[Solved] How do I unzip files en masse but skip and log errors


I’ve written my solution in Python, since I found it easier to write and to understand. You need Python 3 in order to run this script.

import os
import shutil
import sys
import datetime
import glob
import subprocess

PATH_7ZIP = r'C:\Program Files\7-Zip\7z.exe' # Change it according to your 7-Zip installation
PATH_ZIPS = r'zips'                          # This is where you should put your zips
PATH_OUTF = r'outputs'                       # This is where the output folders will be put

FILE_LOGS = r'status.log'                    # This is the name of the log file

def log(msg):
    with open(FILE_LOGS, 'a') as f:
        now = datetime.datetime.now()
        f.write("{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}.{:06d} {}\n".format(
            now.year,
            now.month,
            now.day,
            now.hour,
            now.minute,
            now.second,
            now.microsecond,
            msg
        ))

def fatal_error(msg, ret):
    print("Fatal Error:", msg, file=sys.stderr)
    log("Fatal Error: " + msg)
    exit(ret)

def warning(msg):
    log("Warning: " + msg)

def info(msg):
    log("Info: " + msg)

# The core logic
def extract_zip(z):
    # This executes 7-Zip:
    #   "e" : extract
    #   z   : the zip file you want to unzip
    #   "-y": say yes to all the questions that 7-Zip may ask (like if you want to override the file)
    #   "-p": set the password to none (this prevents 7-Zip to ask it)
    #   "-o": sets the output path (which is PATH_OUTF\ZIP_NAME)
    proc = subprocess.run([PATH_7ZIP, "e", z, "-y", "-p", "-o" + os.path.join(
            PATH_OUTF,
            os.path.basename(z))
        ], capture_output=True)

    # if 7-Zip returns an error lets log it
    if proc.returncode != 0:
        warning(z + ". " + proc.stderr.decode("ascii").replace('\r', '').
            replace('\n', ''))
    # else log that we have successfully extracted the zip
    else:
        info(z)

def main():
    info("Starting main")
    # Search for all the zips 
    zips = glob.glob(os.path.join(PATH_ZIPS, "*.zip"))
    # Add also all the 7z (optional)
    zips.extend(glob.glob(os.path.join(PATH_ZIPS, "*.7z")))
    # (here you can add other file extensions)

    info("Found " + str(len(zips)) + " zips!")

    for z in zips:
        extract_zip(z)

    info("End")

# ENTRY POINT: here the program begins
if __name__ == "__main__":
    info("Starting new session")
    
    # Lets check the globals
    if not os.path.exists(PATH_7ZIP):
        fatal_error("7z.exe not found!", 2)
    if not os.path.exists(PATH_ZIPS):
        fatal_error("Cannot find zips folder!", 3)
    if os.path.exists(PATH_OUTF):
        # In order to make this script removing the previous outputs, it asks you to pass in the commandline "replace". By doing so we prevent the user to delete the previous data by mistake
        if len(sys.argv) == 2 and sys.argv[1] == 'replace':
            info("Deleting previous output folder")
            shutil.rmtree(PATH_OUTF)
        else:
            fatal_error("Output dir already exists! Please remove it or call " +
                        "this script using {} replace".format(sys.argv[0]), 4)
    os.makedirs(PATH_OUTF)
    
    main()

NOTE: the log file does not get overwritten when you run again the script. The script just creates (if necessary) and appends to that file.

1

solved How do I unzip files en masse but skip and log errors