Schulmeister Posted September 24, 2023 Share Posted September 24, 2023 (edited) Hi all, I did setup a new Server from scratch, everything runs very smoothly and I am using Docker, VMs and SMB Shares. I attach my diagnostics that where taken after hard-resetting the server because the first try stopped ther: mkdir -p /boot/logs mkdir -p '/rd6-diagnostics-20230924-2253/system' '/rd6-diagnostics-20230924-2253/config' '/rd6-diagnostics-20230924-2253/logs' '/rd6-diagnostics-20230924-2253/shares' '/rd6-diagnostics-20230924-2253/smart' '/rd6-diagnostics-20230924-2253/qemu' '/rd6-diagnostics-20230924-2253/xml' top -bn1 -o%CPU 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/top.txt' tail /boot/bz*.sha256 >> '/rd6-diagnostics-20230924-2253/unraid-6.12.4.txt' uptime nproc lscpu 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/lscpu.txt' lsscsi -vgl 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/lsscsi.txt' lspci -knn 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/lspci.txt' lsusb 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/lsusb.txt' free -mth 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/memory.txt' ps -auxf --sort=-pcpu 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/ps.txt' lsof -Pni 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/lsof.txt' lsmod|sort 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/lsmod.txt' df -h 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/df.txt' ip -br a|awk '/^(eth|bond)[0-9]+ /{print $1}'|sort dmidecode -qt2|awk -F: '/^ Manufacturer:/{m=$2};/^ Product Name:/{p=$2} END{print m" -"p}' 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/motherboard.txt' dmidecode -qt0 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/motherboard.txt' cat /proc/meminfo 2>/dev/null|todos >'/rd6-diagnostics-20230924-2253/system/meminfo.txt' dmidecode --type 17 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/meminfo.txt' ethtool 'eth0' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt' ethtool -i 'eth0' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt' ethtool 'eth1' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt' ethtool -i 'eth1' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt' ethtool 'eth2' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt' ethtool -i 'eth2' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt' ethtool 'eth3' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt' ethtool -i 'eth3' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt' ethtool 'eth4' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt' ethtool -i 'eth4' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt' ethtool 'eth5' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt' ethtool -i 'eth5' 2>/dev/null|todos >>'/rd6-diagnostics-20230924-2253/system/ethtool.txt' ip -br a|todos >'/rd6-diagnostics-20230924-2253/system/ifconfig.txt' sed -ri 's/(["\[ ])(127|10|172\.1[6-9]|172\.2[0-9]|172\.3[0-1]|192\.168)((\.[0-9]{1,3}){2,3}([/" .]|$))/\1@@@\2\3/g; s/(["\[ ][0-9]{1,3}\.)([0-9]{1,3}\.){2}([0-9]{1,3})([/" .]|$)/\1XXX.XXX.\3\4/g; s/@@@//g' '/rd6-diagnostics-20230924-2253/system/ifconfig.txt' 2>/dev/null sed -ri 's/(["\[ ]([0-9a-f]{1,4}:){4})(([0-9a-f]{1,4}:){3}|:)([0-9a-f]{1,4})([/" .]|$)/\1XXXX:XXXX:XXXX:\5\6/g' '/rd6-diagnostics-20230924-2253/system/ifconfig.txt' 2>/dev/null find /sys/kernel/iommu_groups/ -type l 2>/dev/null|sort -V|todos >'/rd6-diagnostics-20230924-2253/system/iommu_groups.txt' todos '/rd6-diagnostics-20230924-2253/system/cmdline.txt' echo -ne ' /boot ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/boot'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt' echo -ne ' /boot/config ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/boot/config'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt' echo -ne ' /boot/config/plugins ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/boot/config/plugins'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt' echo -ne ' /boot/syslinux ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/boot/syslinux'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt' echo -ne ' /var/log ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/var/log'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt' echo -ne ' /var/log/plugins ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/var/log/plugins'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt' echo -ne ' /boot/extra folder does not exist ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt' echo -ne ' /var/log/packages ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/var/log/packages'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt' echo -ne ' /var/lib/pkgtools/packages ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/var/lib/pkgtools/packages'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt' echo -ne ' /tmp ' >>'/rd6-diagnostics-20230924-2253/system/folders.txt';ls -l '/tmp'|todos >>'/rd6-diagnostics-20230924-2253/system/folders.txt' cp /boot/config/*.{cfg,conf,dat} '/rd6-diagnostics-20230924-2253/config' 2>/dev/null cp /boot/config/go '/rd6-diagnostics-20230924-2253/config/go.txt' 2>/dev/null sed -i -e '/password/c ***line removed***' -e '/user/c ***line removed***' -e '/pass/c ***line removed***' '/rd6-diagnostics-20230924-2253/config/go.txt' sed -ri 's/^((disk|flash)(Read|Write)List.*=")[^"]+/\1.../' '/rd6-diagnostics-20230924-2253/config/*.cfg' 2>/dev/null sed -ri 's/(["\[ ])(127|10|172\.1[6-9]|172\.2[0-9]|172\.3[0-1]|192\.168)((\.[0-9]{1,3}){2,3}([/" .]|$))/\1@@@\2\3/g; s/(["\[ ][0-9]{1,3}\.)([0-9]{1,3}\.){2}([0-9]{1,3})([/" .]|$)/\1XXX.XXX.\3\4/g; s/@@@//g' '/rd6-diagnostics-20230924-2253/config/network.cfg' 2>/dev/null sed -ri 's/(["\[ ]([0-9a-f]{1,4}:){4})(([0-9a-f]{1,4}:){3}|:)([0-9a-f]{1,4})([/" .]|$)/\1XXXX:XXXX:XXXX:\5\6/g' '/rd6-diagnostics-20230924-2253/config/network.cfg' 2>/dev/null /usr/local/emhttp/webGui/scripts/show_interfaces ip|tr -d ' '|tr '#' ' '|tr ',' ' ' >'/rd6-diagnostics-20230924-2253/config/listen.txt' /usr/local/emhttp/webGui/scripts/error_interfaces|sed 's///' >>'/rd6-diagnostics-20230924-2253/config/listen.txt' sed -ri 's/(["\[ ])(127|10|172\.1[6-9]|172\.2[0-9]|172\.3[0-1]|192\.168)((\.[0-9]{1,3}){2,3}([/" .]|$))/\1@@@\2\3/g; s/(["\[ ][0-9]{1,3}\.)([0-9]{1,3}\.){2}([0-9]{1,3})([/" .]|$)/\1XXX.XXX.\3\4/g; s/@@@//g' '/rd6-diagnostics-20230924-2253/config/listen.txt' 2>/dev/null sed -ri 's/(["\[ ]([0-9a-f]{1,4}:){4})(([0-9a-f]{1,4}:){3}|:)([0-9a-f]{1,4})([/" .]|$)/\1XXXX:XXXX:XXXX:\5\6/g' '/rd6-diagnostics-20230924-2253/config/listen.txt' 2>/dev/null sed -ri 's/^(share(Comment|ReadList|WriteList)=")[^"]+/\1.../' '/rd6-diagnostics-20230924-2253/shares/appdata.cfg' 2>/dev/null I cannot tell if it is the reason but it happened the first time after updating to 6.12.4. Can you give me any advice on the matter ? Thanks in Advance rd6-diagnostics-20230925-0024.zip Edited September 24, 2023 by Schulmeister forgot the file Quote Link to comment
JorgeB Posted September 25, 2023 Share Posted September 25, 2023 Nothing in that syslog snippet that I can see, if it keeps happening eable the syslog server and post that after a crash. Quote Link to comment
Alintya Posted September 25, 2023 Share Posted September 25, 2023 There are quite a few seemingly related threads/issues, is it time for some sort of collection thread? Also, it appears to be 6.12.x specific. (I personally had no issues whatsoever for over a year with the same setup having these random lockups on 6.12) Unraid 6.12.3 crashing, docker service is unavailable UNRAID IS UNRESPONSIVE AFTER A DAY OR SO LOSE CONNECTION TO WEBUI, SSH, DOCKER. SMB STILL WORKING SERVER GOES UNRESPONSIVE DAILY, BUT STILL RESPONDS TO PINGS 6.12.4 SYSTEM HARD LOCKS OFTEN UNRAID RANDOMLY CRASHES FOR SOME REASON? Quote Link to comment
Schulmeister Posted September 25, 2023 Author Share Posted September 25, 2023 (edited) 4 hours ago, JorgeB said: Nothing in that syslog snippet that I can see, if it keeps happening eable the syslog server and post that after a crash. The diagnostic stops there and thats it. sed -ri 's/^(share(Comment|ReadList|WriteList)=")[^"]+/\1.../' '/rd6-diagnostics-20230924-2253/shares/appdata.cfg' 2>/dev/null I will set up syslog server and hope for a solution.. Can I check any logs in order to get to the Problem ? Edited September 25, 2023 by Schulmeister forgot to mention the syslog server Quote Link to comment
JorgeB Posted September 25, 2023 Share Posted September 25, 2023 7 minutes ago, Schulmeister said: Can I check any logs in order to get to the Problem ? The persistent syslog is your best bet. Quote Link to comment
unitary-synagogue4092 Posted September 26, 2023 Share Posted September 26, 2023 (edited) since the last update, I have to physically restart the server, it is a disaster. 6.12.4 is the worst update I've ever seen in my life. is there a pilot on the plane? Edited September 26, 2023 by unitary-synagogue4092 Edit Quote Link to comment
Schulmeister Posted September 26, 2023 Author Share Posted September 26, 2023 8 hours ago, unitary-synagogue4092 said: since the last update, I have to physically restart the server, it is a disaster. 6.12.4 is the worst update I've ever seen in my life. is there a pilot on the plane? I would not go that far. 6.12 is an excellent Update - except for this one problem since 6.12.4 with the unresponsibility. I cannot tell what the problem was - I think I have a problem with the VM-Backup script colliding with mover - but that is just a hunch and I have no evidence whatsoever. I have now the internal and an external syslog-server and fingers crossed we get to the bottom of things. Again, this version is far from "desaster" and I have every confidence that we will solve the problem. Quote Link to comment
wes.crockett Posted September 27, 2023 Share Posted September 27, 2023 I have just started encountering this issue as well. On Saturday, and tonight, I have had to hard-reboot the server... On Saturday, i was able to access the web UI but nothing would work (taking down the array, rebooting, etc) Tonight, I couldn't even ping the box (which is very scary of course) but iDrac (dedicated LAN port) still worked. Plugging in a mouse and keyboard w/ monitor and I couldn't get it to wake up at all. Logs attached, will enable persistent logging. FYI, Running on a Dell Poweredge T420. kuiper-diagnostics-20230926-2310.zip Quote Link to comment
Schulmeister Posted September 28, 2023 Author Share Posted September 28, 2023 It happened again. I am really happy that I had a qnap-syslog server running, because there are no syslogs under appdata (the folder I selected for the local syslog server) Anyway - the system was totally unresponsive - even local commandline (Monitor/keyboard) I post the diagnostics and the syslog messages rd6-diagnostics-20230928-2206.zip Quote Link to comment
JorgeB Posted September 29, 2023 Share Posted September 29, 2023 The only out of the ordinary thing I see are nginx and user scripts related errors, try booting in safe mode and post a new syslog if it crashes again. 10 hours ago, Schulmeister said: because there are no syslogs under appdata (the folder I selected for the local syslog server) You likely didn't set the remote IP in the syslog settings, you need to use the Unraid server IP. Quote Link to comment
itimpi Posted September 29, 2023 Share Posted September 29, 2023 11 hours ago, Schulmeister said: because there are no syslogs under appdata (the folder I selected for the local syslog server) You could have also selected the 'mirror to flash' option that puts it into the 'logs' folder on the flash drive. Quote Link to comment
Schulmeister Posted September 29, 2023 Author Share Posted September 29, 2023 4 hours ago, JorgeB said: The only out of the ordinary thing I see are nginx and user scripts related errors, try booting in safe mode and post a new syslog if it crashes again. You likely didn't set the remote IP in the syslog settings, you need to use the Unraid server IP. That is most likely the error I made. I'll change that Quote Link to comment
Schulmeister Posted October 24, 2023 Author Share Posted October 24, 2023 I have - hopefully - found the solution. I got rid of every ZFS-formatted volume. I had the cache formatted in ZFS and one Disk from the array (Spaceinvaderone made two videos on how that is a good idea) I removed the ZFS Cache and added a btrfs one. I reformatted the disk back to xfs. Since 10 days now issues - fingers crossed that was the problem. Quote Link to comment
Schulmeister Posted October 25, 2023 Author Share Posted October 25, 2023 Aaaaaand - down again, I've jinxed it. Around 14:20 today the Unraid system was unresponsive again. I have attached the diags and the syslog file. Maybe this time some insights will be possible. syslog-10.20.30.100.log rd6-diagnostics-20231025-1623.zip Quote Link to comment
JorgeB Posted October 25, 2023 Share Posted October 25, 2023 There is a call trace mentioning btrfs, but no clues to me what caused it. Quote Link to comment
Schulmeister Posted October 25, 2023 Author Share Posted October 25, 2023 should I run a scrub on my btrfs cachedrives ? Quote Link to comment
Schulmeister Posted October 25, 2023 Author Share Posted October 25, 2023 First Cachepool: NVME - uncorrectable Quote Link to comment
JorgeB Posted October 25, 2023 Share Posted October 25, 2023 Not a good sign, suggests some hardware issue, you can check the syslog to see if it lists the corrupt files, if yes delete/restore from a backup and run another scrub. Quote Link to comment
Schulmeister Posted October 25, 2023 Author Share Posted October 25, 2023 I highly doubt that - both nvme disks are new, so is the pcie-nvme-adaptor It might be caused by the systemlockups, or maybe vice-versa - who knows. What should I do ? Quote Link to comment
JorgeB Posted October 26, 2023 Share Posted October 26, 2023 10 hours ago, Schulmeister said: I highly doubt that - both nvme disks are new, so is the pcie-nvme-adaptor Unlikely tho be a device problem, other hardware. 10 hours ago, Schulmeister said: It might be caused by the systemlockups Those should never corrupt data. Quote Link to comment
Schulmeister Posted October 27, 2023 Author Share Posted October 27, 2023 I have copied the Files on my local disk - they work fine. I deleted them, ran a scrub again and now everything is fine. I'll have an eye on that - the lockups of the server could have another reason: What does that mean: Oct 27 07:00:52 RD6 nginx: 2023/10/27 07:00:52 [error] 7772#7772: *1110829 open() "/usr/local/emhttp/server-status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /server-status?auto HTTP/1.1", host: "localhost" Oct 27 07:00:52 RD6 nginx: 2023/10/27 07:00:52 [error] 7772#7772: *1110830 open() "/usr/local/emhttp/server-status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /server-status?auto HTTP/1.1", host: "127.0.0.1" Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110837 open() "/usr/local/emhttp/server-status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /server-status?auto HTTP/1.1", host: "localhost" Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110838 open() "/usr/local/emhttp/server-status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /server-status?auto HTTP/1.1", host: "127.0.0.1" Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110839 "/usr/local/emhttp/api/index.html" is not found (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /api/ HTTP/1.1", host: "127.0.0.1" Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110842 open() "/usr/local/emhttp/status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /status?full&json HTTP/1.1", host: "localhost" Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110843 open() "/usr/local/emhttp/status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /status?full&json HTTP/1.1", host: "127.0.0.1" Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110844 open() "/usr/local/emhttp/status/format/json" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /status/format/json HTTP/1.1", host: "127.0.0.1" Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110845 open() "/usr/local/emhttp/basic_status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /basic_status HTTP/1.1", host: "127.0.0.1" Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110846 open() "/usr/local/emhttp/stub_status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /stub_status HTTP/1.1", host: "localhost" Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110847 open() "/usr/local/emhttp/stub_status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /stub_status HTTP/1.1", host: "127.0.0.1" Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110848 open() "/usr/local/emhttp/nginx_status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /nginx_status HTTP/1.1", host: "127.0.0.1" Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110849 open() "/usr/local/emhttp/status" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /status HTTP/1.1", host: "127.0.0.1" Oct 27 07:00:54 RD6 nginx: 2023/10/27 07:00:54 [error] 7772#7772: *1110850 FastCGI sent in stderr: "Primary script unknown" while reading response header from upstream, client: 127.0.0.1, server: , request: "GET /admin/api.php?auth=&version=true HTTP/1.1", upstream: "fastcgi://unix:/var/run/php5-fpm.sock:", host: "127.0.0.1" Oct 27 07:00:55 RD6 nginx: 2023/10/27 07:00:55 [error] 7772#7772: *1110852 open() "/usr/local/emhttp/us" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /us HTTP/1.1", host: "localhost" Oct 27 07:00:55 RD6 nginx: 2023/10/27 07:00:55 [error] 7772#7772: *1110853 open() "/usr/local/emhttp/us" failed (2: No such file or directory), client: 127.0.0.1, server: , request: "GET /us HTTP/1.1", host: "127.0.0.1" Quote Link to comment
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.
Note: Your post will require moderator approval before it will be visible.