From 14048cc049d832f6ff955d33022e2007374a6b51 Mon Sep 17 00:00:00 2001 From: Stephen Mather <1174901+smathermather@users.noreply.github.com> Date: Tue, 2 May 2023 13:33:41 -0400 Subject: [PATCH] Via Australian Plant Phenomics Facility via https://gitlab.com/-/snippets/2493855 discussion here: https://community.opendronemap.org/t/code-snippet-for-naming-dji-phantom-4-multispectral-images-to-work-better-with-odm/14678 --- contrib/exif-binner/exif_binner.py | 207 +++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 contrib/exif-binner/exif_binner.py diff --git a/contrib/exif-binner/exif_binner.py b/contrib/exif-binner/exif_binner.py new file mode 100644 index 00000000..d0fab558 --- /dev/null +++ b/contrib/exif-binner/exif_binner.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 + +# standard libraries +import sys +import os + + +import PIL +from PIL import Image, ExifTags +import shutil +from tqdm import tqdm +import re +import csv + +import math +import argparse +parser = argparse.ArgumentParser() + +# Usage: +# python exif_binner.py + +# required args +parser.add_argument("file_dir", help="input folder of images") +parser.add_argument("output_dir", help="output folder to copy images to") + +# args with defaults +parser.add_argument("-b", "--bands", help="number of expected bands per capture", type=int, default=5) +parser.add_argument("-s", "--sequential", help="use sequential capture group in filenames rather than original capture ID", type=bool, default=True) +parser.add_argument("-z", "--zero_pad", help="if using sequential capture groups, zero-pad the group number to this many digits. 0 for no padding, -1 for auto padding", type=int, default=5) +parser.add_argument("-w", "--whitespace_replace", help="replace whitespace characters with this character", type=str, default="-") + +# optional args no defaults +parser.add_argument("-l", "--logfile", help="write image metadata used to this CSV file", type=str) +parser.add_argument("-r", "--replace_filename", help="use this instead of using the original filename in new filenames", type=str) +parser.add_argument("-f", "--force", help="don't ask for confirmation", action="store_true") +parser.add_argument("-g", "--no_grouping", help="do not apply grouping, only validate and add band name", action="store_true") +args = parser.parse_args() + +file_dir = args.file_dir +output_dir = args.output_dir +replacement_character = args.whitespace_replace +expected_bands = args.bands +logfile = args.logfile + +output_valid = os.path.join(output_dir, "valid") +output_invalid = os.path.join(output_dir, "invalid") + +file_count = len(os.listdir(file_dir)) + +auto_zero_pad = len(str(math.ceil(float(file_count) / float(expected_bands)))) + +if args.zero_pad >= 1: + if int("9" * args.zero_pad) < math.ceil(float(file_count) / float(expected_bands)): + raise ValueError("Zero pad must have more digits than maximum capture groups! Attempted to pad " + str(args.zero_pad) + " digits with " + + str(file_count) + " files and " + str(expected_bands) + " bands (up to " + str(math.ceil(float(file_count) / float(expected_bands))) + + " capture groups possible, try at least " + str(auto_zero_pad) + " digits to zero pad)") + +if args.force is False: + print("Input dir: " + str(file_dir) + " (" + str(file_count) + " files)") + print("Output folder: " + str(output_dir)) + if args.replace_filename: + print("Replacing all basic filenames with: " + args.replace_filename) + else: + print("Replace whitespace in filenames with: " + replacement_character) + print("Number of expected bands: " + str(expected_bands)) + if logfile: + print("Save image processing metadata to: " + logfile) + confirmation = input("Confirm processing [Y/N]: ") + if confirmation.lower() in ["y"]: + pass + else: + sys.exit() + +no_exif_n = 0 + +images = [] + +print("Indexing images ...") + +# Uses tqdm() for the progress bar, if not needed swap with +# for filename in os.listdir(file_dir): + +for filename in tqdm(os.listdir(file_dir)): + old_path = os.path.join(file_dir, filename) + file_name, file_ext = os.path.splitext(filename) + image_entry = {"name": filename, "valid": True, "band": "-", "ID": "-", "group": 0, "DateTime": "-", "error": "-"} # dashes to ensure CSV exports properly, can be blank + try: + img = Image.open(old_path) + except PIL.UnidentifiedImageError as img_err: + # if it tries importing a file it can't read as an image + # uncomment to print errors + # sys.stderr.write(str(img_err) + "\n") + no_exif_n += 1 + if logfile: + image_entry["valid"] = False + image_entry["error"] = "Not readable as image: " + str(img_err) + images.append(image_entry) + continue + for key, val in img.getexif().items(): + if key in ExifTags.TAGS: + # print(ExifTags.TAGS[key] + ":" + str(val)) # debugging + if ExifTags.TAGS[key] == "XMLPacket": + # find bandname + bandname_start = val.find(b'') + bandname_end = val.find(b'') + bandname_coded = val[(bandname_start + 17):bandname_end] + bandname = bandname_coded.decode("UTF-8") + image_entry["band"] = str(bandname) + # find capture ID + image_entry["ID"] = re.findall('CaptureUUID="([^"]*)"', str(val))[0] + if ExifTags.TAGS[key] == "DateTime": + image_entry["DateTime"] = str(val) + image_entry["band"].replace(" ", "-") + if len(image_entry["band"]) >= 99: # if it's too long, wrong value (RGB pic has none) + # no exif present + no_exif_n += 1 + image_entry["valid"] = False + image_entry["error"] = "Image band name appears to be too long" + elif image_entry["ID"] == "" and expected_bands > 1: + no_exif_n += 1 + image_entry["valid"] = False + image_entry["error"] = "No Capture ID found" + if (file_ext.lower() in [".jpg", ".jpeg"]) and (image_entry["band"] == "-"): # hack for DJI RGB jpgs + # handle = open(old_path, 'rb').read() + # xmp_start = handle.find(b'