#!/usr/bin/env python3 """ Script to check for leftover files Checks a collection of temporary or cache directories, to ensure we aren't wasting image size by forgetting cleanup steps. This script is run in every local repo image we test """ import os import sys from subprocess import check_output from textwrap import indent # directories larger than this are considered a failure # a few little files here aren't a problem THRESHOLD = 1 # in MB MB = 1024 * 1024 # the paths to check # all of these locations # should be cleaned up # missing is okay PATHS = [ "/tmp/", # check whole home? # this shouldn't be empty, but for our tests (so far) it should be very small # This is the easiest way to ensure we aren't leaving any unexpected files # without knowing ahead of time where all possible caches might be (.npm, .cache, etc.) "~/", "/root/", ] def du(path): """Return disk usage in megabytes of a path""" # -ks: get total size, reported in kilobytes out = check_output(["du", "-Hks", path]) return int(out.split(None, 1)[0]) / 1024 def check_dir_size(path): """Check the size of a directory Returns: True: directory size is below THRESHOLD or is missing False: directory is larger than THRESHOLD """ path = os.path.expanduser(path) if not os.path.exists(path): print("{path}: missing OK".format(**locals())) return True size_mb = du(path) print("{path}: {size_mb:.1f} MB".format(**locals()), end=" ") if size_mb <= THRESHOLD: print("OK") return True else: print("FAIL") # check size of files one-level deep (du only reports dirs) for name in os.listdir(path): subpath = os.path.join(path, name) if os.path.isfile(subpath): file_sz = os.stat(subpath).st_size / MB if file_sz > 0.1: print(" {file_sz:.1f}M {subpath}".format(**locals())) # get report on all subdirs that are at least 100k print( indent( check_output(["du", "-Hh", "-t", "100000", path]).decode("utf8"), " " ) ) return False def main(): results = [check_dir_size(path) for path in PATHS] if not all(results): sys.exit(1) if __name__ == "__main__": main()