Jump to content

6.12.4 becomes unresponsive every 3 days


Recommended Posts

Hi all,

I did setup a new Server from scratch, everything runs very smoothly and I am using Docker, VMs and SMB Shares.

I attach my diagnostics that where taken after hard-resetting the server because the first try stopped ther:

 

 


mkdir -p /boot/logs
mkdir -p '/rd6-diagnostics-20230924-2253/system' '/rd6-diagnostics-20230924-2253/config' '/rd6-diagnostics-20230924-2253/logs' '/rd6-diagnostics-20230924-2253/shares' '/rd6-diagnostics-20230924-2253/smart' '/rd6-diagnostics-20230924-2253/qemu' '/rd6-diagnostics-20230924-2253/xml'
top -bn1 -o%CPU 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/top.txt'
tail /boot/bz*.sha256 >> '/rd6-diagnostics-20230924-2253/unraid-6.12.4.txt'
uptime
nproc
lscpu 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/lscpu.txt'
lsscsi -vgl 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/lsscsi.txt'
lspci -knn 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/lspci.txt'
lsusb 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/lsusb.txt'
free -mth 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/memory.txt'
ps -auxf --sort=-pcpu 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/ps.txt'
lsof -Pni 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/lsof.txt'
lsmod|sort 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/lsmod.txt'
df -h 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/df.txt'
ip -br a|awk '/^(eth|bond)[0-9]+ /{print $1}'|sort
dmidecode -qt2|awk -F: '/^ Manufacturer:/{m=$2};/^ Product Name:/{p=$2} END{print m" -"p}' 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/motherboard.txt'
dmidecode -qt0 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/motherboard.txt'
cat /proc/meminfo 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/meminfo.txt'
dmidecode --type 17 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/meminfo.txt'
ethtool 'eth0' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt'
ethtool -i 'eth0' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt'
ethtool 'eth1' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt'
ethtool -i 'eth1' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt'
ethtool 'eth2' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt'
ethtool -i 'eth2' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt'
ethtool 'eth3' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt'
ethtool -i 'eth3' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt'
ethtool 'eth4' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt'
ethtool -i 'eth4' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt'
ethtool 'eth5' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt'
ethtool -i 'eth5' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt'
ip -br a|todos >'/rd6-diagnostics-20230924-2253/system/ifconfig.txt'
sed -ri 's/(["\[ ])(127|10|172\.1[6-9]|172\.2[0-9]|172\.3[0-1]|192\.168)((\.[0-9]{1,3}){2,3}([/" .]|$))/\1@@@\2\3/g; s/(["\[ ][0-9]{1,3}\.)([0-9]{1,3}\.){2}([0-9]{1,3})([/" .]|$)/\1XXX.XXX.\3\4/g; s/@@@//g' '/rd6-diagnostics-20230924-2253/system/ifconfig.txt' 2>/dev/null
sed -ri 's/(["\[ ]([0-9a-f]{1,4}:){4})(([0-9a-f]{1,4}:){3}|:)([0-9a-f]{1,4})([/" .]|$)/\1XXXX:XXXX:XXXX:\5\6/g' '/rd6-diagnostics-20230924-2253/system/ifconfig.txt' 2>/dev/null
find /sys/kernel/iommu_groups/ -type l 2>/dev/null|sort -V|todos >'/rd6-diagnostics-20230924-2253/system/iommu_groups.txt'
todos '/rd6-diagnostics-20230924-2253/system/cmdline.txt'
echo -ne ' /boot ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/boot'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt'
echo -ne ' /boot/config ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/boot/config'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt'
echo -ne ' /boot/config/plugins ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/boot/config/plugins'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt'
echo -ne ' /boot/syslinux ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/boot/syslinux'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt'
echo -ne ' /var/log ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/var/log'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt'
echo -ne ' /var/log/plugins ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/var/log/plugins'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt'
echo -ne ' /boot/extra folder does not exist ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt'
echo -ne ' /var/log/packages ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/var/log/packages'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt'
echo -ne ' /var/lib/pkgtools/packages ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/var/lib/pkgtools/packages'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt'
echo -ne ' /tmp ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/tmp'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt'
cp /boot/config/*.{cfg,conf,dat} '/rd6-diagnostics-20230924-2253/config' 2>/dev/null
cp /boot/config/go '/rd6-diagnostics-20230924-2253/config/go.txt' 2>/dev/null
sed -i -e '/password/c ***line removed***' -e '/user/c ***line removed***' -e '/pass/c ***line removed***' '/rd6-diagnostics-20230924-2253/config/go.txt'
sed -ri 's/^((disk|flash)(Read|Write)List.*=")[^"]+/\1.../' '/rd6-diagnostics-20230924-2253/config/*.cfg' 2>/dev/null
sed -ri 's/(["\[ ])(127|10|172\.1[6-9]|172\.2[0-9]|172\.3[0-1]|192\.168)((\.[0-9]{1,3}){2,3}([/" .]|$))/\1@@@\2\3/g; s/(["\[ ][0-9]{1,3}\.)([0-9]{1,3}\.){2}([0-9]{1,3})([/" .]|$)/\1XXX.XXX.\3\4/g; s/@@@//g' '/rd6-diagnostics-20230924-2253/config/network.cfg' 2>/dev/null
sed -ri 's/(["\[ ]([0-9a-f]{1,4}:){4})(([0-9a-f]{1,4}:){3}|:)([0-9a-f]{1,4})([/" .]|$)/\1XXXX:XXXX:XXXX:\5\6/g' '/rd6-diagnostics-20230924-2253/config/network.cfg' 2>/dev/null
/usr/local/emhttp/webGui/scripts/show_interfaces ip|tr -d ' '|tr '#' ' '|tr ',' ' ' >'/rd6-diagnostics-20230924-2253/config/listen.txt'
/usr/local/emhttp/webGui/scripts/error_interfaces|sed 's///' >>'/rd6-diagnostics-20230924-2253/config/listen.txt'
sed -ri 's/(["\[ ])(127|10|172\.1[6-9]|172\.2[0-9]|172\.3[0-1]|192\.168)((\.[0-9]{1,3}){2,3}([/" .]|$))/\1@@@\2\3/g; s/(["\[ ][0-9]{1,3}\.)([0-9]{1,3}\.){2}([0-9]{1,3})([/" .]|$)/\1XXX.XXX.\3\4/g; s/@@@//g' '/rd6-diagnostics-20230924-2253/config/listen.txt' 2>/dev/null
sed -ri 's/(["\[ ]([0-9a-f]{1,4}:){4})(([0-9a-f]{1,4}:){3}|:)([0-9a-f]{1,4})([/" .]|$)/\1XXXX:XXXX:XXXX:\5\6/g' '/rd6-diagnostics-20230924-2253/config/listen.txt' 2>/dev/null
sed -ri 's/^(share(Comment|ReadList|WriteList)=")[^"]+/\1.../' '/rd6-diagnostics-20230924-2253/shares/appdata.cfg' 2>/dev/null

 

I cannot tell if it is the reason but it happened the first time after updating to 6.12.4. 

 

Can you give me any advice on the matter ?

 

Thanks in Advance

rd6-diagnostics-20230925-0024.zip

Edited by Schulmeister
forgot the file
Link to comment

There are quite a few seemingly related threads/issues, is it time for some sort of collection thread?

Also, it appears to be 6.12.x specific. (I personally had no issues whatsoever for over a year with the same setup having these random lockups on 6.12)

 

Unraid 6.12.3 crashing, docker service is unavailable

UNRAID IS UNRESPONSIVE AFTER A DAY OR SO

LOSE CONNECTION TO WEBUI, SSH, DOCKER. SMB STILL WORKING

SERVER GOES UNRESPONSIVE DAILY, BUT STILL RESPONDS TO PINGS

6.12.4 SYSTEM HARD LOCKS OFTEN

UNRAID RANDOMLY CRASHES FOR SOME REASON?

Link to comment
4 hours ago, JorgeB said:

Nothing in that syslog snippet that I can see, if it keeps happening eable the syslog server and post that after a crash.

 

The diagnostic stops there and thats it.

sed -ri 's/^(share(Comment|ReadList|WriteList)=")[^"]+/\1.../' '/rd6-diagnostics-20230924-2253/shares/appdata.cfg' 2>/dev/null

 

 

I will set up syslog server and hope for a solution..

Can I check any logs in order to get to the Problem ?

Edited by Schulmeister
forgot to mention the syslog server
Link to comment
8 hours ago, unitary-synagogue4092 said:

since the last update, I have to physically restart the server,

it is a disaster.

6.12.4 is the worst update I've ever seen in my life.

 

is there a pilot on the plane?

I would not go that far.

6.12 is an excellent Update - except for this one problem since 6.12.4 with the unresponsibility. 
I cannot tell what the problem was - I think I have a problem with the VM-Backup script colliding with mover - but that is just a hunch and I have no evidence whatsoever.

I have now the internal and an external syslog-server and fingers crossed we get to the bottom of things.

 

Again, this version is far from "desaster" and I have every confidence that we will solve the problem.

Link to comment

I have just started encountering this issue as well. On Saturday, and tonight, I have had to hard-reboot the server...

 

On Saturday, i was able to access the web UI but nothing would work (taking down the array, rebooting, etc)

 

Tonight, I couldn't even ping the box (which is very scary of course) but iDrac (dedicated LAN port) still worked. Plugging in a mouse and keyboard w/ monitor and I couldn't get it to wake up at all.

 

Logs attached, will enable persistent logging.

 

FYI, Running on a Dell Poweredge T420.

kuiper-diagnostics-20230926-2310.zip

Link to comment
4 hours ago, JorgeB said:

The only out of the ordinary thing I see are nginx and user scripts related errors, try booting in safe mode and post a new syslog if it crashes again.

 

You likely didn't set the remote IP in the syslog settings, you need to use the Unraid server IP.

That is most likely the error I made. I'll change that

Link to comment
  • 4 weeks later...

I have - hopefully - found the solution.

 

I got rid of every ZFS-formatted volume.

I had the cache formatted in ZFS and one Disk from the array (Spaceinvaderone made two videos on how that is a good idea)

I removed the ZFS Cache and added a btrfs one.

I reformatted the disk back to xfs.

Since 10 days now issues - fingers crossed that was the problem.

Link to comment

I have copied the Files on my local disk - they work fine.
I deleted them, ran a scrub again and now everything is fine. 

I'll have an eye on that - the lockups of the server could have another reason:

 

What does that mean:

Oct 27 07:00:52 RD6 nginx: 2023/10/27 07:00:52 [error] 7772#7772: *1110829 open() "/usr/local/emhttp/server-status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /server-status?auto HTTP/1.1", host: "localhost"
Oct 27 07:00:52 RD6 nginx: 2023/10/27 07:00:52 [error] 7772#7772: *1110830 open() "/usr/local/emhttp/server-status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /server-status?auto HTTP/1.1", host: "127.0.0.1"
Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110837 open() "/usr/local/emhttp/server-status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /server-status?auto HTTP/1.1", host: "localhost"
Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110838 open() "/usr/local/emhttp/server-status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /server-status?auto HTTP/1.1", host: "127.0.0.1"
Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110839 "/usr/local/emhttp/api/index.html" is not found (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /api/ HTTP/1.1", host: "127.0.0.1"
Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110842 open() "/usr/local/emhttp/status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /status?full&json HTTP/1.1", host: "localhost"
Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110843 open() "/usr/local/emhttp/status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /status?full&json HTTP/1.1", host: "127.0.0.1"
Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110844 open() "/usr/local/emhttp/status/format/json" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /status/format/json HTTP/1.1", host: "127.0.0.1"
Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110845 open() "/usr/local/emhttp/basic_status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /basic_status HTTP/1.1", host: "127.0.0.1"
Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110846 open() "/usr/local/emhttp/stub_status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /stub_status HTTP/1.1", host: "localhost"
Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110847 open() "/usr/local/emhttp/stub_status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /stub_status HTTP/1.1", host: "127.0.0.1"
Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110848 open() "/usr/local/emhttp/nginx_status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /nginx_status HTTP/1.1", host: "127.0.0.1"
Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110849 open() "/usr/local/emhttp/status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /status HTTP/1.1", host: "127.0.0.1"
Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110850 FastCGI sent in stderr: "Primary script unknown" while reading response header from upstream, client: 127.0.0.1, server: , request: "GET /admin/api.php?auth=&version=true HTTP/1.1", upstream: "fastcgi://unix:/var/run/php5-fpm.sock:", host: "127.0.0.1"
Oct 27 07:00:55 RD6 nginx: 2023/10/27 07:00:55 [error] 7772#7772: *1110852 open() "/usr/local/emhttp/us" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /us HTTP/1.1", host: "localhost"
Oct 27 07:00:55 RD6 nginx: 2023/10/27 07:00:55 [error] 7772#7772: *1110853 open() "/usr/local/emhttp/us" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /us HTTP/1.1", host: "127.0.0.1"
 

Link to comment

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.
Note: Your post will require moderator approval before it will be visible.

Guest
Reply to this topic...

×   Pasted as rich text.   Restore formatting

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

×
×
  • Create New...