Identifying which files aren't there


Observe

Recommended Posts

Hello,

 

I am currently in the awkward situation where I use nextcloud for my photo storage however, due to fault of my own some of the photos got permenantly deleted. I have however an external backup of these photos but I do not know which ones are missing... Is there any tool that could help me with this? 

The folder structure is completely different and therefore cannot just copy/paste using windows and let that figure it out for me. I need something to find all files that are on the external storage which are not on the share.

 

Thanks in advance for any help!

Link to comment

Make a hash of all the filenames (without the path) and increment the value.  When done, anything with value 2 is in both, value 1 is in just 1.

 

I'd probably use Perl's FIle::Find and then for each filename $files{$filename}++     and after the find is done walk through the hash keys and print anything with value 1.

 

Or in shell script something like:  find /backup -type f | while read f; do g=$(basename "$f"); echo "$g" >> backed_up; find /path/to/share -type f | while read f; do g=$(basename "$f"); echo "$g" >> share; sort backed_up > backed_up.sorted;sort share> share.sorted; diff backup.sorted share.sorted

 

 

Link to comment

Thanks for the help guys :)

Having retaught myself python I made a couple of scritps to do the influenced by uek2wooF's solution. I will paste the solution below in case anyone stumbles on this in the future which hopefully can give them a big start.  

FYI I know this code is stupidly inefficent but there was really no need for me to make it better. 


 

import os
from PIL import Image
import hashlib
import re

counter = 0

log = open("logs/Log.txt", "a")
hashLog = open("logs/HashLog.txt", "a")
errorLog = open("logs/ErrorLog.txt", "a")


def is_ascii(s):
    return all(ord(c) < 128 for c in s)


for root, dirs, files in os.walk("/mnt/disks/DISKNAME/"):
    if "derivatives" in root:
        break
    else:
        for file in files:
            if file.endswith(".jpg") or file.endswith(".png") or file.endswith(".JPG") or file.endswith(".PNG"):
                if file.startswith("._"):
                    break
                else:
                    fullFilePath = os.path.join(root, file)
                    try:
                        if is_ascii(fullFilePath) == True:
                            print(fullFilePath)
                            md5hash = hashlib.md5(
                                Image.open(fullFilePath).tobytes())
                            log.write("Index: "+ str(counter) + "\n")
                            log.write(md5hash.hexdigest()+"\n")
                            log.write(fullFilePath+"\n")                            
                            hashLog.write(md5hash.hexdigest()+"\n")
                            counter += 1
                        else:
                            errorLog.write(fullFilePath+"\n")
                    except Exception as e:
                        try:
                            errorLog.write(fullFilePath+"\n")
                        except Exception as e:
                            errorLog.write(
                                "There is a badly named file near index" + str(counter) + "\n")

print("I counted ", counter, " images.")
log.close()
hashLog.close()
errorLog.close()
differencesLog = open("logs/differencesNextCloud.txt", "a")

filepathBase = "logs/Log.txt"
filepathComparison = "logs/nextcloudLog.txt"

with open(filepathBase) as fpBase:
    baseLine = fpBase.readline()
    # Skips first index line
    baseLine = fpBase.readline()
    while baseLine:
        with open(filepathComparison) as fpComp:
            compLine = fpComp.readline()
            # Skips first index line
            compLine = fpComp.readline()
            found = 0
            while compLine:
                if baseLine == compLine:
                    found = 1
                    print("Found " + baseLine + " in both files")
                    fpComp.close()
                    break
                else:                    
                    #This is the file path
                    fpComp.readline()
                    #This is the next index
                    fpComp.readline()
                    #This is the next hash
                    compLine = fpComp.readline()        
        
        #If it hasn't found the hash anywhere it will add it to the comparison file 
        if found == 0:
            print("** Never found " + baseLine + " in comparison  files! Adding to differences! **")
            #This is the hash
            differencesLog.write(baseLine)

            #This is the file path
            baseLine = fpBase.readline()                
            differencesLog.write(baseLine+"\n")

            #This is the next index
            baseLine = fpBase.readline()

            #This is the next hash
            baseLine = fpBase.readline()
        else:
            #This is the file path
            baseLine = fpBase.readline()

            #This is the next index
            baseLine = fpBase.readline()
            
            #This is the next hash
            baseLine = fpBase.readline()

fpBase.close()

#-----------------------------------------------------------------------------------------------------------#

with open(filepathComparison) as fpComp2:
    compLine2 = fpComp2.readline()
    # Skips first index line
    compLine2 = fpComp2.readline()
    while compLine2:
        with open(filepathBase) as fpBase2:
            baseLine2 = fpBase2.readline()
            # Skips first index line
            baseLine2 = fpBase2.readline()
            found = 0
            while baseLine2:                
                if compLine2 == baseLine2:
                    found = 1
                    print("Found " + compLine2 + " in both files")
                    fpBase2.close() 
                    break
                else:
                    #This is the file path
                    fpBase2.readline()
                    #This is the next index
                    fpBase2.readline()
                    #This is the next hash
                    baseLine2 = fpBase2.readline()         

        #If it hasn't found the hash anywhere it will add it to the comparison file 
        if found == 0:
            print("** Never found " + compLine2 + " in comparison  files! Adding to differences! **")
            #This is the hash
            differencesLog.write(compLine2)

            #This is the file path
            compLine2 = fpComp2.readline()                
            differencesLog.write(compLine2+"\n")

            #This is the next index
            compLine2 = fpComp2.readline()

            #This is the next hash
            compLine2 = fpComp2.readline()
        else:
            #This is the file path
            compLine2 = fpComp2.readline()

            #This is the next index
            compLine2 = fpComp2.readline()
            
            #This is the next hash
            compLine2 = fpComp2.readline()

fpComp2.close() 
differencesLog.close()
import os
import shutil

filepathTextFile = "logs/toMove.txt"
filepathToMoveTo = "/mnt/user/nextcloud/PATH"
notMovedLog = open("logs/notMovedLog.txt", "a")
errorLog = open("logs/moverErrorLog.txt", "a")
counter=0


with open(filepathTextFile) as fpBase:
    currentLine = fpBase.readline()
    
    # Skips first hash line
    currentLine = fpBase.readline()

    while currentLine:

        

        try:
            currentLine = currentLine.strip('\n')
            fileName = os.path.basename(currentLine)    
            shutil.move(currentLine, filepathToMoveTo + str(counter) + " - " + fileName)
            print("New file: " + filepathToMoveTo + str(counter) + " - " + fileName)
        except Exception as e:      
            print("! - Error occured - !")      
            errorLog.write(str(e) + "\n")
            notMovedLog.write(currentLine + "\n")

        counter += 1

        # This is the gap
        fpBase.readline()
        # This is the hash
        fpBase.readline()
        # This is the new file path
        currentLine = fpBase.readline()

errorLog.close()
notMovedLog.close()

 

Link to comment

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.
Note: Your post will require moderator approval before it will be visible.

Guest
Reply to this topic...

×   Pasted as rich text.   Restore formatting

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.