cache_dirs - an attempt to keep directory entries in RAM to prevent disk spin-up


Recommended Posts

I'll try it and see if I can catch it when it begins to crash. It been running now for a few hours.

 

[EDIT]

free -l

free -l
             total       used       free     shared    buffers     cached
Mem:       3374304    3267792     106512          0     231668    2663280
Low:        880408     789936      90472
High:      2493896    2477856      16040
-/+ buffers/cache:     372844    3001460
Swap:            0          0          0

 

ulimit -a

ulimit -a
core file size          (blocks, -c) 0
data seg size           (kbytes, -d) unlimited
scheduling priority             (-e) 0
file size               (blocks, -f) unlimited
pending signals                 (-i) 26101
max locked memory       (kbytes, -l) 64
max memory size         (kbytes, -m) unlimited
open files                      (-n) 1024
pipe size            (512 bytes, -p) 8
POSIX message queues     (bytes, -q) 819200
real-time priority              (-r) 0
stack size              (kbytes, -s) 8192
cpu time               (seconds, -t) unlimited
max user processes              (-u) 26101
virtual memory          (kbytes, -v) unlimited
file locks                      (-x) unlimited

 

ps -eo size,pid,user,command | sort -n

ps -eo size,pid,user,command | sort -n
    0     2 root     [kthreadd]
    0     3 root     [ksoftirqd/0]
    0     6 root     [migration/0]
    0     7 root     [migration/1]
    0     9 root     [ksoftirqd/1]
    0    11 root     [khelper]
    0   158 root     [sync_supers]
    0   160 root     [bdi-default]
    0   162 root     [kblockd]
    0   289 root     [ata_sff]
    0   299 root     [khubd]
    0   408 root     [rpciod]
    0   428 root     [kswapd0]
    0   489 root     [fsnotify_mark]
    0   509 root     [nfsiod]
    0   512 root     [cifsiod]
    0   518 root     [crypto]
    0   679 root     [deferwq]
    0   888 root     [scsi_eh_0]
    0   901 root     [scsi_eh_1]
    0   902 root     [scsi_eh_2]
    0   903 root     [scsi_eh_3]
    0   904 root     [scsi_eh_4]
    0   905 root     [scsi_eh_5]
    0   906 root     [scsi_eh_6]
    0   912 root     [kworker/u:7]
    0   913 root     [scsi_eh_7]
    0   914 root     [scsi_eh_8]
    0   918 root     [scsi_eh_9]
    0   919 root     [usb-storage]
    0  1012 root     [scsi_wq_0]
    0  5713 root     [flush-8:32]
    0 11323 root     [mdrecoveryd]
    0 11326 root     [spinupd]
    0 11327 root     [spinupd]
    0 11328 root     [spinupd]
    0 11329 root     [spinupd]
    0 11330 root     [spinupd]
    0 11331 root     [spinupd]
    0 11332 root     [spinupd]
    0 11333 root     [spinupd]
    0 11334 root     [spinupd]
    0 11335 root     [spinupd]
    0 11336 root     [spinupd]
    0 11337 root     [spinupd]
    0 11401 root     [unraidd]
    0 11434 root     [reiserfs]
    0 13866 root     [kworker/1:2]
    0 14282 root     [kworker/0:1]
    0 25329 root     [kworker/0:2]
    0 27425 root     [kworker/u:0]
    0 27561 root     [kworker/1:0]
   SZ   PID USER     COMMAND
  284     1 root     init
  292  1230 root     /usr/sbin/inetd
  292  1260 root     /usr/sbin/crond -l notice
  296  1096 root     /usr/sbin/syslogd -m0
  296  1244 root     /usr/sbin/acpid
  296  1262 daemon   /usr/sbin/atd -b 15 -l 1
  296 12425 root     logger -tunmenu -plocal7.info -is
  300  1100 root     /usr/sbin/klogd -c 3 -x
  300  1216 bin      /sbin/rpc.portmap
  304  1126 root     /sbin/dhcpcd -t 10 -h Tower eth0
  304 11313 root     /sbin/agetty 38400 tty1 linux
  304 11314 root     /sbin/agetty 38400 tty2 linux
  304 11315 root     /sbin/agetty 38400 tty3 linux
  304 11316 root     /sbin/agetty 38400 tty4 linux
  304 11317 root     /sbin/agetty 38400 tty5 linux
  304 11318 root     /sbin/agetty 38400 tty6 linux
  320  1220 root     /sbin/rpc.statd
  324  1255 81       /usr/bin/dbus-daemon --system
  376 22555 root     in.telnetd: 192.168.1.9                                                                                                                                                                                                                                                                                    
  408 12424 root     /bin/bash /boot/unmenu/uu
  456 11358 root     /usr/sbin/nmbd -D
  484 22556 root     -bash
  664 12323 root     /usr/sbin/ntpd -g -p /var/run/ntpd.pid
  676 11414 root     /sbin/udevd --daemon
  676 19097 root     /sbin/udevd --daemon
  680   707 root     /sbin/udevd --daemon
  736 11360 root     /usr/sbin/smbd -D
  768 21253 root     ps -eo size,pid,user,command
  992 11374 root     /usr/sbin/smbd -D
1072 14279 root     /usr/sbin/smbd -D
1560 11817 root     /usr/sbin/smbd -D
8696 11312 root     /usr/local/sbin/emhttp
10660 12428 root     awk -W re-interval -f ./unmenu.awk
25908 21254 root     sort -n
72148 11524 root     /usr/local/sbin/shfs /mnt/user -disks 16777214 -o noatime,big_writes,allow_other,use_ino
276856 11553 nobody  /usr/lib/java/bin/java -Xmx100m -Dsubsonic.home=/boot/config/plugins/subsonic -Dsubsonic.host=0.0.0.0 -Dsubsonic.port=37011 -Dsubsonic.httpsPort=0 -Dsubsonic.contextPath=/ -Dsubsonic.defaultMusicFolder=/var/music -Dsubsonic.defaultPodcastFolder=/var/music/Podcast -Dsubsonic.defaultPlaylistFolder=/var/playlists -Djava.awt.headless=true -verbose:gc -jar subsonic-booter-jar-with-dependencies.jar

Link to comment
  • 2 weeks later...

373517299.png

 

Do you have the same effect when you are looking your memory stats ?

Looks like each time linux "release" the memory, my drives spinning up ...

 

On the graph, each day at 4:30am .... we can see a memory release ... and then my disk spindown around 5:30am...

Link to comment

I wonder what is causing the memory release?  If at that time the cached directory entries for cache_dirs are also getting flushed from memory that would explain the disks spinning up as now they actually have to be read to get the directory entries the next time cache_dirs attempts this.

Link to comment

I wonder what is causing the memory release?  If at that time the cached directory entries for cache_dirs are also getting flushed from memory that would explain the disks spinning up as now they actually have to be read to get the directory entries the next time cache_dirs attempts this.

All it would take is any process running the

sync

command to flush all the disk buffers.  I do not think that is it though, as sync alone would not clear the cache. 

 

Instead, I suspect a massive use of RAM is requiring it to be freed for reuse for another process.

 

What version of unRAID are you running?  What processes do you have running at 4:30 AM in cron that might use a LOT of memory?

 

Joe L.

Link to comment

No idea if this tool was already posted on the previous 28 pages so i post it :)

 

http://hoytech.com/vmtouch/

 

I currently use it to evict my cache drive from memory, since disk try to cache all structure, and, complete file in memory!

You can see how many file are cached on memory on a specific directory :

 

          Files: 755

    Directories: 150

  Resident Pages:1821733/1821733  6/6G  100%

        Elapsed: 0.028956 seconds

 

 

After my evict :

 

          Files: 755

    Directories: 150

  Resident Pages: 0/1821733  0/6G  0%

        Elapsed: 0.028956 seconds

 

2072808858.png

 

Really interesting after movin some new content to your share and if you only need to keep in memory just the dirscructure!

So now my disk dont spinup anymore after pushin some new data on my share.

 

Someone knew this soft? Some can test it?

Link to comment
  • 4 weeks later...

I'm new to cache_dirs. Don't know why I didn't use it before except that I was lame...

 

Anyway, running it under the plug-in interface on 5.0-rc12a. This is all on a VM. When I open my "Media" share in Windows, disk #2 always spins up. Media is spread across several disks.

 

I'm using pure default config on the plug-in interface.

 

root@Floater:~# free -l
             total       used       free     shared    buffers     cached
Mem:       2074512     739824    1334688          0     334304     211760
Low:        890824     511788     379036
High:      1183688     228036     955652
-/+ buffers/cache:     193760    1880752
Swap:            0          0          0

 

Doesn't look like I'm running into memory limits. ESXi also shows way below the 2GB commit limit for the VM.

 

Where can I look for more debug data?

 

Thanks,

Paul

Link to comment

I'm new to cache_dirs. Don't know why I didn't use it before except that I was lame...

 

Anyway, running it under the plug-in interface on 5.0-rc12a. This is all on a VM. When I open my "Media" share in Windows, disk #2 always spins up. Media is spread across several disks.

 

I'm using pure default config on the plug-in interface.

 

root@Floater:~# free -l
             total       used       free     shared    buffers     cached
Mem:       2074512     739824    1334688          0     334304     211760
Low:        890824     511788     379036
High:      1183688     228036     955652
-/+ buffers/cache:     193760    1880752
Swap:            0          0          0

 

Doesn't look like I'm running into memory limits. ESXi also shows way below the 2GB commit limit for the VM.

 

Where can I look for more debug data?

 

Thanks,

Paul

cache_dirs ONLY reads the directory entries.

If your windows explorer is updating its thumbnail images, or looking at the contents of any file, you will see the disk spin up.    Turn off those features in windows

(fdisk in windows will often stop them  ;))

and you'll see the spin-ups stop.

 

Joe L.

PS. Only kidding about reformatting your window's hard disk.  It does stop the spin-ups though.

Link to comment
In my structure, under "Media" is only directories. You have to go another level deeper to see files and thumbs. I'm wondering what could be getting accessed?

 

Do any of this directories (folders) display icons within it's folder icon? The system looks one level deeper to determine what to display.

Link to comment
  • 4 weeks later...

Hi Joe L.,

 

I am trying to troubleshoot an issue what happens to one of my friend with unRAID and cache_dirs.

 

Cache_dirs is restarting without any log entry every ~1.5-2 hours. My bet was going on that the process gets killed due to not enough memory, but this is occuring even if I exlude all the larger user shares with the '-e' option.

 

Would you have a hint for me where to look at?

 

I uploaded the system log (it is not visible from this log, but please note I tried to exclude a lot more shares as well, keeping only 2 very small in for caching).

 

Thank you for your help in advance!

syslog-2013-05-22.txt

Link to comment

Hi Joe L.,

 

I am trying to troubleshoot an issue what happens to one of my friend with unRAID and cache_dirs.

 

Cache_dirs is restarting without any log entry every ~1.5-2 hours. My bet was going on that the process gets killed due to not enough memory, but this is occuring even if I exlude all the larger user shares with the '-e' option.

 

Would you have a hint for me where to look at?

 

I uploaded the system log (it is not visible from this log, but please note I tried to exclude a lot more shares as well, keeping only 2 very small in for caching).

 

Thank you for your help in advance!

The only time cache_dirs will re-schedule itself is if it thinks the array has stopped.

This is the logic involved:

  # If the array is now stopped, terminate cache_dirs and re-invoke later via "at"
  num_dirs=`find /mnt/disk[1-9]* -type d -maxdepth 0 -print 2>/dev/null|wc -l`
  if [ "$num_dirs" -eq 0 ]
  then
    if [ "$background" = "yes" ]
    then
      rm -f $lockfile
      echo scheduling via at $at_command  | logger -t$program_name
      echo $at_command " 1>/dev/null 2>&1" | at now + 1 minute
    fi

 

In other words, this command must be returning 0  (zero directories found)

find /mnt/disk[1-9]* -type d -maxdepth 0 -print | wc -l

 

All I can think of is you are running into some system issue that prevents the above line from returning the number of directories.

 

Try running it when the problem is occurring.

Link to comment

So you don't think it's a problem due to running out of memory?

 

I ran the command and it returns correctly the number of the top level directories (shares).

Do you mean I should run it in the second when cache_dirs re-schedule itself? I don't know how could I do that timing wise. Cannot we print this to the log file?

Link to comment

I can't get around this issue. I really turned off everything I can. Limited the number of directories to cache (tried with using include and exlude as well), but this is just continue to occur.

 

Joe L., do you have a clue how should I track down this issue?

Link to comment

Hi Joe -

 

thought I'd combine the previous post with this one to keep things centralized.

 

As you know, I recently upgraded to rc12a from 4.7 and was using cache_dirs previously without problem.  However, it appears to be spinning up the disks as I'm opening up the root of the user share (on a PC or in a telnet session). 

 

It appears it's a problem when running in conjunction with the latest spinup_when_accessed script.  If cache_dirs is running alone, then listing the contents does not spin up the disks.

 

Running inotifywait which browsing to my Backups share (on disk12) shows:

 

Setting up watches.  Beware: since -r was given, this may take a while!
Watches established.
/mnt/user/ OPEN,ISDIR Backups
/mnt/user/ OPEN,ISDIR Backups
/mnt/user/ OPEN,ISDIR Backups
/mnt/user/ CLOSE_NOWRITE,CLOSE,ISDIR Backups
/mnt/user/ OPEN,ISDIR Backups
/mnt/user/ CLOSE_NOWRITE,CLOSE,ISDIR Backups
/mnt/user/ OPEN,ISDIR Backups
/mnt/user/ CLOSE_NOWRITE,CLOSE,ISDIR Backups

 

Input would be appreciated.  Thank you.

Link to comment

I don't think there will be any benefit for me to run cache_dirs because my media player required to read the info files and grab the images (wall and cover)?

Sorry to say, but it will not be as much help, but you can give it a try anyways.

 

Joe L.

Link to comment

Hi Joe -

 

thought I'd combine the previous post with this one to keep things centralized.

 

As you know, I recently upgraded to rc12a from 4.7 and was using cache_dirs previously without problem.  However, it appears to be spinning up the disks as I'm opening up the root of the user share (on a PC or in a telnet session). 

 

It appears it's a problem when running in conjunction with the latest spinup_when_accessed script.  If cache_dirs is running alone, then listing the contents does not spin up the disks.

 

Running inotifywait which browsing to my Backups share (on disk12) shows:

 

Setting up watches.  Beware: since -r was given, this may take a while!
Watches established.
/mnt/user/ OPEN,ISDIR Backups
/mnt/user/ OPEN,ISDIR Backups
/mnt/user/ OPEN,ISDIR Backups
/mnt/user/ CLOSE_NOWRITE,CLOSE,ISDIR Backups
/mnt/user/ OPEN,ISDIR Backups
/mnt/user/ CLOSE_NOWRITE,CLOSE,ISDIR Backups
/mnt/user/ OPEN,ISDIR Backups
/mnt/user/ CLOSE_NOWRITE,CLOSE,ISDIR Backups

 

Input would be appreciated.  Thank you.

It shows those directories are no longer in the buffer cache.  If you've excluded "Backups" that is entirely expected.  If not, then you just have a lot of files to scan and cache, and not enough RAM to hole it all in the buffer cache.
Link to comment

It shows those directories are no longer in the buffer cache.  If you've excluded "Backups" that is entirely expected.  If not, then you just have a lot of files to scan and cache, and not enough RAM to hole it all in the buffer cache.

 

Thanks for the additional input Joe.  At the moment, cache_dirs is running without any exclusions.  I've tried some of the troubleshooting steps you mentioned in a private e-mail (e.g. running in foreground, changing cache pressure, etc.), but unfortunately haven't been getting any closer to solving the problem.  I'm not convinced memory is a problem, as everything was running as expected under v4.7.  Under rc12a, cache_dirs will run fine by itself, but as soon as spinup_when_accessed (presumably doesn't have a large memory footprint) is added to the mix, it stops working properly.

 

Output of free -m:

 

             total       used       free     shared    buffers     cached
Mem:          4040       3934        105          0        214       3327
-/+ buffers/cache:        391       3648
Swap:            0          0          0
root@Tower:~# 

 

 

 

Link to comment

Allot of this depends on how many files you have on your disks.

The greater the amount of files, the greater the chance that file entries will be flushed out of the dentry cache.

 

It depends on low memory, plus there's a fixed max size of the dentry cache. When I last looked at the kernel code it was also pretty aggressive in recapturing dentry nodes that were not accessed or exceeded the limit. 

As an experiment I booted with a parameter to expand the dentry cache. It did not work for me. In fact it caused OOM conditions.

 

I know my environment was different. While others enjoyed the benefit of directory caching, I could not *sigh*.  My download/bittorrent drive had so many files that it would just exceed all limits.

 

Along with my rsync_linked_backup methodology, the file count was ginourmous.

 

I wonder if we are seeing a trend with the new kernel or it's the amount of files on the filesystem.

 

Have you tried capturing the files in a list with find.

 

find /mnt/disk1 -ls > /tmp/filelist.disk1

 

you can do that for each disk.

 

Then do.

 

wc -l /tmp/filelist.*

 

 

@Joe L, is there a max depth option for dircache?

 

 

 

Link to comment

@Joe L, is there a max depth option for dircache?

cache_dirs -d NN -w

Where NN = the maxdepth used by the "find" command.

 

I would normally say you could try

cache_dirs -d 5 -w

as most directory trees for media are less than 5 levels deep.

 

/mnt/disk5/Movies/moviename/files = only 4 levels

Link to comment

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.
Note: Your post will require moderator approval before it will be visible.

Guest
Reply to this topic...

×   Pasted as rich text.   Restore formatting

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.