diff --git a/opendm/video/parameters.py b/opendm/video/parameters.py index 13a9dc16..f5cbc557 100644 --- a/opendm/video/parameters.py +++ b/opendm/video/parameters.py @@ -19,6 +19,7 @@ class Parameters: timezone = None frame_format = None stats_file = None + limit = None def __init__(self, args): @@ -27,13 +28,12 @@ class Parameters: # "start" -> start frame index") # "end" -> end frame index") # "output-resolution" -> Override output resolution (ex. 640x480)") - # "blur-percentage" -> discard the lowest X percent of frames based on blur score (allowed values from 0.0 to 1.0)") - # "blur-threshold" -> blur measures that fall below this value will be considered 'blurry' (to be used in exclusion with --blur-percentage)") + # "blur-threshold" -> blur measures that fall below this value will be considered 'blurry'. Good value is 300 # "distance-threshold" -> distance measures that fall below this value will be considered 'similar'") # "black-ratio-threshold" -> Set the threshold for considering a frame 'black'. Express the minimum value for the ratio: nb_black_pixels / nb_pixels. Default value is 0.98") # "pixel-black-threshold" -> Set the threshold for considering a pixel 'black'. The threshold expresses the maximum pixel luminance value for which a pixel is considered 'black'. Good value is 0.30 (30%)") # "use-srt" -> Use SRT files for extracting metadata (same name as video file with .srt extension)") - # "timezone" -> UTC timezone offset (ex. -5 for EST). Default to local timezone") + # "limit" -> Maximum number of output frames # "frame-format" -> frame format (jpg, png, tiff, etc.)") # "stats-file" -> Save statistics to csv file") @@ -50,7 +50,8 @@ class Parameters: self.start = args["start"] if args["start"] else 0 self.end = args["end"] if args["end"] else None - self.blur_percentage = args["blur_percentage"] if args["blur_percentage"] else None + self.limit = args["limit"] if args["limit"] else None + self.blur_threshold = args["blur_threshold"] if args["blur_threshold"] else None self.distance_threshold = args["distance_threshold"] if args["distance_threshold"] else None @@ -60,8 +61,6 @@ class Parameters: self.use_srt = args["use_srt"] - self.utc_offset = datetime.timedelta(hours=int(args["timezone"])) if args["timezone"] != "local" else datetime.datetime.now() - datetime.datetime.utcnow() - self.frame_format = args["frame_format"] self.output_resolution = tuple(map(int, args["output_resolution"].split("x"))) if args["output_resolution"] else None @@ -88,29 +87,19 @@ class Parameters: print("Start frame index must be greater than 0") return False + if args["limit"] and args["limit"] < 0: + print("Limit must be greater than 0") + return False + if args["end"]: if args["end"] < 0: print("End frame index must be greater than 0") return False - if args["end"] < args["start"]: + if args["start"] is not None and args["end"] < args["start"]: print("End frame index must be greater than start frame index") return False - if args["timezone"] and args["timezone"] != "local": - try: - val = int(args["timezone"]) - if val < -12 or val > 14: - print("Timezone must be in the range -12 to 14") - return False - except ValueError: - print("Timezone must be a valid integer") - return False - - if args["blur_percentage"] and (args["blur_percentage"] < 0 or args["blur_percentage"] > 1): - print("Blur percentage must be in the range 0.0 to 1.0") - return False - if args["blur_threshold"] and args["blur_threshold"] < 0: print("Blur threshold must be greater than 0") return False diff --git a/opendm/video/srtparser.py b/opendm/video/srtparser.py index a69a11d6..1ab8c0cf 100644 --- a/opendm/video/srtparser.py +++ b/opendm/video/srtparser.py @@ -1,24 +1,23 @@ -from datetime import datetime, timedelta +from datetime import datetime import re class SrtFileParser: - def __init__(self, filename, utc_offset): + def __init__(self, filename): self.filename = filename self.data = [] - self.utc_offset = utc_offset - def get_entry(self, timestamp): + def get_entry(self, timestamp: datetime): if not self.data: self.parse() # check min and max - if timestamp < self.min or timestamp > self.max: + if timestamp < self.data[0]["start"] or timestamp > self.data[len(self.data) - 1]["end"]: return None for entry in self.data: - if entry["timestamp"] <= timestamp: + if entry["start"] <= timestamp and entry["end"] >= timestamp: return entry - return self.data[len(self.data) - 1] + return None def parse(self): @@ -30,17 +29,32 @@ class SrtFileParser: # 2023-01-06 18:56:48,380,821 # [iso : 3200] [shutter : 1/60.0] [fnum : 280] [ev : 0] [ct : 3925] [color_md : default] [focal_len : 240] [latitude: 0.000000] [longitude: 0.000000] [altitude: 0.000000] - self.min = datetime.max - self.max = datetime.min - with open(self.filename, 'r') as f: + srtcnt = None + difftime = None + timestamp = None + iso = None + shutter = None + fnum = None + ev = None + ct = None + color_md = None + focal_len = None + latitude = None + longitude = None + altitude = None + start = None + end = None + for line in f: # Check if line is empty if not line.strip(): if srtcnt is not None: self.data.append({ + "start": start, + "end": end, "srtcnt": srtcnt, "difftime": difftime, "timestamp": timestamp, @@ -55,9 +69,6 @@ class SrtFileParser: "longitude": longitude, "altitude": altitude }) - self.min = min(self.min, timestamp) - # account for the difftime milliseconds to get the actual max - self.max = max(self.max, timestamp + timedelta(milliseconds=difftime)) srtcnt = None difftime = None @@ -72,11 +83,21 @@ class SrtFileParser: latitude = None longitude = None altitude = None + start = None + end = None + continue # Remove the html font tag line = re.sub('<[^<]+?>', '', line) + # Search this "00:00:00,000 --> 00:00:00,016" + match = re.search("(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})", line) + if match: + start = datetime.strptime(match.group(1), "%H:%M:%S,%f") + end = datetime.strptime(match.group(2), "%H:%M:%S,%f") + + match = re.search("SrtCnt : (\d+)", line) if match: srtcnt = int(match.group(1)) @@ -89,8 +110,6 @@ class SrtFileParser: if match: timestamp = match.group(1) timestamp = datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S,%f") - # The timestamp is in local time, so we need to subtract the UTC offset - timestamp = timestamp - self.utc_offset match = re.search("iso : (\d+)", line) if match: @@ -134,9 +153,3 @@ class SrtFileParser: if match: altitude = float(match.group(1)) altitude = altitude if altitude != 0 else None - - self.data.reverse() - - self.max = self.max.replace(microsecond=0) - self.min = self.min.replace(microsecond=0) - diff --git a/opendm/video/video2dataset.py b/opendm/video/video2dataset.py index 759494e2..102098fa 100644 --- a/opendm/video/video2dataset.py +++ b/opendm/video/video2dataset.py @@ -2,31 +2,21 @@ from opendm.video.parameters import Parameters import datetime from fractions import Fraction import io -from math import floor +from math import ceil, floor import time import cv2 import os -import pymediainfo import collections from PIL import Image from checkers import BlackFrameChecker, PercentageBlurChecker, SimilarityChecker, ThresholdBlurChecker from srtparser import SrtFileParser import piexif - class Video2Dataset: def __init__(self, parameters : Parameters): self.parameters = parameters - # We prioritize blur threshold over blur percentage. - if parameters.blur_threshold is not None: - self.blur_checker = ThresholdBlurChecker(parameters.blur_threshold) - else: - if parameters.blur_percentage is not None: - self.blur_checker = PercentageBlurChecker(parameters.blur_percentage) - else: - self.blur_checker = None - + self.blur_checker = ThresholdBlurChecker(parameters.blur_threshold) if parameters.blur_threshold is not None else None self.similarity_checker = SimilarityChecker(parameters.distance_threshold) if parameters.distance_threshold is not None else None self.black_checker = BlackFrameChecker(parameters.black_ratio_threshold, parameters.pixel_black_threshold) if parameters.black_ratio_threshold is not None or parameters.pixel_black_threshold is not None else None @@ -52,26 +42,26 @@ class Video2Dataset: print("Processing video: {}".format(input_file)) # get video info - video_info = self.GetVideoInfo(input_file) + video_info = get_video_info(input_file) print(video_info) - utc_offset = self.parameters.utc_offset if self.parameters.utc_offset is not None else video_info.utc_offset - if self.parameters.use_srt: - srt_file = os.path.splitext(input_file)[0] + ".srt" - if os.path.exists(srt_file): - print("Loading SRT file: {}".format(srt_file)) - srt_parser = SrtFileParser(srt_file, utc_offset) - srt_parser.parse() - else: - srt_file = os.path.splitext(input_file)[0] + ".SRT" + + name = os.path.splitext(input_file)[0] + + srt_files = [name + ".srt", name + ".SRT"] + srt_parser = None + + for srt_file in srt_files: if os.path.exists(srt_file): print("Loading SRT file: {}".format(srt_file)) - srt_parser = SrtFileParser(srt_file, utc_offset) - srt_parser.parse() - else: - print("SRT file not found: {}".format(srt_file)) - srt_parser = None + try: + srt_parser = SrtFileParser(srt_file) + srt_parser.parse() + break + except Exception as e: + print("Error parsing SRT file: {}".format(e)) + srt_parser = None else: srt_parser = None @@ -98,8 +88,6 @@ class Video2Dataset: print("Error opening video stream or file") return - frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - if (self.parameters.start is not None): cap.set(cv2.CAP_PROP_POS_FRAMES, self.parameters.start) self.frame_index = self.parameters.start @@ -107,7 +95,9 @@ class Video2Dataset: else: start_frame = 0 - frames_to_process = self.parameters.end - start_frame + 1 if (self.parameters.end is not None) else frames_count - start_frame + frames_to_process = self.parameters.end - start_frame + 1 if (self.parameters.end is not None) else video_info.total_frames - start_frame + + output_file_paths = [] while (cap.isOpened()): ret, frame = cap.read() @@ -127,11 +117,19 @@ class Video2Dataset: if stats is not None and self.parameters.stats_file is not None: self.WriteStats(input_file, stats) + # Add element to array + if stats is not None and "written" in stats.keys(): + output_file_paths.append(stats["path"]) + cap.release() if self.f is not None: self.f.close() + if self.parameters.limit is not None and self.global_idx >= self.parameters.limit: + print("Limit of {} frames reached, trimming dataset to {} frames".format(self.parameters.limit, self.global_idx)) + limit_files(output_file_paths, self.parameters.limit) + end = time.time() print("Total processing time: {:.2f}s".format(end - start)) @@ -173,8 +171,9 @@ class Video2Dataset: self.frame_index += 1 return res - self.SaveFrame(frame, video_info, srt_parser) + path = self.SaveFrame(frame, video_info, srt_parser) res["written"] = True + res["path"] = path self.frame_index += 1 self.global_idx += 1 @@ -192,40 +191,38 @@ class Video2Dataset: _, buf = cv2.imencode('.' + self.parameters.frame_format, frame) - start_time_utc = video_info.start_time_utc if video_info.start_time_utc is not None \ - else srt_parser.data[0].timestamp if srt_parser is not None \ - else datetime.datetime.now() + #start_time_utc = video_info.start_time_utc if video_info.start_time_utc is not None \ + # else srt_parser.data[0].timestamp if srt_parser is not None \ + # else datetime.datetime.now() - elapsed_time_utc = start_time_utc + datetime.timedelta(seconds=(self.frame_index / video_info.frame_rate)) - elapsed_time = elapsed_time_utc + srt_parser.utc_offset + #elapsed_time_utc = start_time_utc + datetime.timedelta(seconds=(self.frame_index / video_info.frame_rate)) + #elapsed_time = elapsed_time_utc + srt_parser.utc_offset if srt_parser is not None else elapsed_time_utc + + delta = datetime.timedelta(seconds=(self.frame_index / video_info.frame_rate)) + # convert to datetime + elapsed_time = datetime.datetime(1900, 1, 1) + delta img = Image.open(io.BytesIO(buf)) - # Exif dict contains the following keys: '0th', 'Exif', 'GPS', '1st', 'thumbnail' + entry = srt_parser.get_entry(elapsed_time) if srt_parser is not None else None + elapsed_time_str = (elapsed_time + (datetime.datetime.now() - datetime.datetime(1900, 1, 1))).strftime("%Y:%m:%d %H:%M:%S.%f") + # Exif dict contains the following keys: '0th', 'Exif', 'GPS', '1st', 'thumbnail' # Set the EXIF metadata exif_dict = { "0th": { piexif.ImageIFD.Software: "ODM", - piexif.ImageIFD.DateTime: elapsed_time.strftime('%Y:%m:%d %H:%M:%S'), + piexif.ImageIFD.DateTime: elapsed_time_str, piexif.ImageIFD.XResolution: (frame.shape[1], 1), piexif.ImageIFD.YResolution: (frame.shape[0], 1), }, "Exif": { - piexif.ExifIFD.DateTimeOriginal: elapsed_time.strftime('%Y:%m:%d %H:%M:%S'), - piexif.ExifIFD.DateTimeDigitized: elapsed_time.strftime('%Y:%m:%d %H:%M:%S'), + piexif.ExifIFD.DateTimeOriginal: elapsed_time_str, + piexif.ExifIFD.DateTimeDigitized: elapsed_time_str, piexif.ExifIFD.PixelXDimension: (frame.shape[1], 1), piexif.ExifIFD.PixelYDimension: (frame.shape[0], 1), }} - if video_info.orientation is not None: - exif_dict["0th"][piexif.ImageIFD.Orientation] = video_info.orientation - - if video_info.model is not None: - exif_dict["Exif"][piexif.ImageIFD.Model] = video_info.model - - entry = srt_parser.get_entry(elapsed_time_utc) if srt_parser is not None else None - if entry is not None: segs = entry["shutter"].split("/") exif_dict["Exif"][piexif.ExifIFD.ExposureTime] = (int(float(segs[0])), int(float(segs[1]))) @@ -233,24 +230,14 @@ class Video2Dataset: exif_dict["Exif"][piexif.ExifIFD.FNumber] = (entry["fnum"], 1) exif_dict["Exif"][piexif.ExifIFD.ISOSpeedRatings] = (entry["iso"], 1) - exif_dict["GPS"] = { - piexif.GPSIFD.GPSMapDatum: "WGS-84", - piexif.GPSIFD.GPSDateStamp: elapsed_time.strftime('%Y:%m:%d %H:%M:%S') - } - - if (entry["latitude"] is not None): - exif_dict["GPS"][piexif.GPSIFD.GPSLatitude] = FloatToRational(entry["latitude"]) - - if (entry["longitude"] is not None): - exif_dict["GPS"][piexif.GPSIFD.GPSLongitude] = FloatToRational(entry["longitude"]) - - if (entry["altitude"] is not None): - exif_dict["GPS"][piexif.GPSIFD.GPSAltitude] = FloatToRational(entry["altitude"]) + exif_dict["GPS"] = get_gps_location(elapsed_time, entry["latitude"], entry["longitude"], entry["altitude"]) exif_bytes = piexif.dump(exif_dict) img.save(path, exif=exif_bytes) + return path + def WriteStats(self, input_file, stats): self.f.write("{};{};{};{};{};{};{};{};{};{}\n".format( @@ -266,84 +253,82 @@ class Video2Dataset: stats["written"] if "written" in stats else "").replace(".", ",")) - def GetVideoInfo(self, input_file): +def get_video_info(input_file): - video = cv2.VideoCapture(input_file) + video = cv2.VideoCapture(input_file) - total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) - frame_rate = video.get(cv2.CAP_PROP_FPS) - start_time_utc, utc_offset, orientation, model = self.GetVideoMetadata(input_file) + total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + frame_rate = video.get(cv2.CAP_PROP_FPS) - video.release() + video.release() - return collections.namedtuple("VideoInfo", ["total_frames", "frame_rate", "start_time_utc", "utc_offset", "orientation", "model"])(total_frames, frame_rate, start_time_utc, utc_offset, orientation, model) + return collections.namedtuple("VideoInfo", ["total_frames", "frame_rate"])(total_frames, frame_rate) - - def GetVideoMetadata(self, input_file): - - try: - - metadata = pymediainfo.MediaInfo.parse(input_file).to_data() - - start_time_utc = None - orientation = 1 - performer = None - utc_offset = None - - if metadata is not None and 'tracks' in metadata: - # Check if it is safe to access the first element of the tracks list - if len(metadata['tracks']) > 0: - - start_time_utc = metadata['tracks'][0].get('encoded_date') or \ - metadata['tracks'][0].get('tagged_date') - - start_time_utc = datetime.datetime.strptime(start_time_utc, '%Z %Y-%m-%d %H:%M:%S') if start_time_utc is not None else None - - file_creation_date_utc = metadata['tracks'][0].get('file_creation_date') - file_creation_date_utc = datetime.datetime.strptime(file_creation_date_utc, '%Z %Y-%m-%d %H:%M:%S.%f') if file_creation_date_utc is not None else None - - file_creation_date = metadata['tracks'][0].get('file_creation_date__local') - file_creation_date = datetime.datetime.strptime(file_creation_date, '%Y-%m-%d %H:%M:%S.%f') if file_creation_date is not None else None - - if file_creation_date_utc is not None and file_creation_date is not None: - utc_offset = file_creation_date - file_creation_date_utc - - performer = metadata['tracks'][0].get('performer') - - # Check if it is safe to access the second element of the tracks list - if len(metadata['tracks']) > 1: - - orientation = metadata['tracks'][1].get('rotation') - - if orientation is not None: - orientation = int(float(orientation)) - - # The 8 EXIF orientation values are numbered 1 to 8. - # 1 = 0°: the correct orientation, no adjustment is required. - # 2 = 0°, mirrored: image has been flipped back-to-front. - # 3 = 180°: image is upside down. - # 4 = 180°, mirrored: image has been flipped back-to-front and is upside down. - # 5 = 90°: image has been flipped back-to-front and is on its side. - # 6 = 90°, mirrored: image is on its side. - # 7 = 270°: image has been flipped back-to-front and is on its far side. - # 8 = 270°, mirrored: image is on its far side. - - if orientation == 0: - orientation = 1 - elif orientation == 90: - orientation = 8 - elif orientation == 180: - orientation = 3 - elif orientation == 270: - orientation = 6 - - return start_time_utc, utc_offset, orientation, performer - - except Exception as e: - - return start_time_utc, utc_offset, orientation, performer - - -def FloatToRational(f): +def float_to_rational(f): f = Fraction(f).limit_denominator() - return (f.numerator, f.denominator) \ No newline at end of file + return (f.numerator, f.denominator) + +def limit_files(paths, limit): + + cnt = len(paths) + num_to_delete = cnt - limit + + if num_to_delete <= 0: + return + + skip = floor(num_to_delete / limit) if num_to_delete > cnt else ceil(cnt / num_to_delete) + + to_keep = [] + + for i in range(len(paths)): + if i % skip == 0: + os.remove(paths[i]) + else: + to_keep.append(paths[i]) + + limit_files(to_keep, limit) + +def to_deg(value, loc): + """convert decimal coordinates into degrees, munutes and seconds tuple + Keyword arguments: value is float gps-value, loc is direction list ["S", "N"] or ["W", "E"] + return: tuple like (25, 13, 48.343 ,'N') + """ + if value < 0: + loc_value = loc[0] + elif value > 0: + loc_value = loc[1] + else: + loc_value = "" + abs_value = abs(value) + deg = int(abs_value) + t1 = (abs_value-deg)*60 + min = int(t1) + sec = round((t1 - min)* 60, 5) + return (deg, min, sec, loc_value) + +def get_gps_location(elapsed_time, lat, lng, altitude): + + lat_deg = to_deg(lat, ["S", "N"]) + lng_deg = to_deg(lng, ["W", "E"]) + + exiv_lat = (float_to_rational(lat_deg[0]), float_to_rational(lat_deg[1]), float_to_rational(lat_deg[2])) + exiv_lng = (float_to_rational(lng_deg[0]), float_to_rational(lng_deg[1]), float_to_rational(lng_deg[2])) + + gps_ifd = { + piexif.GPSIFD.GPSVersionID: (2, 0, 0, 0), + piexif.GPSIFD.GPSDateStamp: elapsed_time.strftime('%Y:%m:%d %H:%M:%S.%f') + } + + if altitude is not None: + gps_ifd[piexif.GPSIFD.GPSAltitudeRef] = 0 + gps_ifd[piexif.GPSIFD.GPSAltitude] = float_to_rational(round(altitude)) + + if lat is not None: + gps_ifd[piexif.GPSIFD.GPSLatitudeRef] = lat_deg[3] + gps_ifd[piexif.GPSIFD.GPSLatitude] = exiv_lat + + if lng is not None: + gps_ifd[piexif.GPSIFD.GPSLongitudeRef] = lng_deg[3] + gps_ifd[piexif.GPSIFD.GPSLongitude] = exiv_lng + + return gps_ifd \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c016a6e7..e8f31569 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,5 +36,4 @@ trimesh==3.17.1 pandas==1.5.2 # for video support -pymediainfo==6.0.1 piexif==1.1.3 \ No newline at end of file