diff --git a/README.md b/README.md index 9e25d6a0..c1bbbb79 100644 --- a/README.md +++ b/README.md @@ -259,6 +259,10 @@ Experimental flags need to be enabled in Docker to use the ```--squash``` flag. After this, you must restart docker. +## Video Support + +Starting from version 3.0.4, ODM can automatically extract images from video files (.mp4 or .mov). Just place one or more video files into the `images` folder and run the program as usual. Subtitles files (.srt) with GPS information are also supported. Place .srt files in the `images` folder, making sure that the filenames match. For example, `my_video.mp4` ==> `my_video.srt` (case-sensitive). + ## Developers Help improve our software! We welcome contributions from everyone, whether to add new features, improve speed, fix existing bugs or add support for more cameras. Check our [code of conduct](https://github.com/OpenDroneMap/documents/blob/master/CONDUCT.md), the [contributing guidelines](https://github.com/OpenDroneMap/documents/blob/master/CONTRIBUTING.md) and [how decisions are made](https://github.com/OpenDroneMap/documents/blob/master/GOVERNANCE.md#how-decisions-are-made). diff --git a/SuperBuild/cmake/External-OpenCV.cmake b/SuperBuild/cmake/External-OpenCV.cmake index 812d4873..293d788a 100644 --- a/SuperBuild/cmake/External-OpenCV.cmake +++ b/SuperBuild/cmake/External-OpenCV.cmake @@ -55,7 +55,7 @@ ExternalProject_Add(${_proj_name} -DBUILD_opencv_photo=ON -DBUILD_opencv_legacy=ON -DBUILD_opencv_python3=ON - -DWITH_FFMPEG=OFF + -DWITH_FFMPEG=ON -DWITH_CUDA=OFF -DWITH_GTK=OFF -DWITH_VTK=OFF diff --git a/VERSION b/VERSION index 75a22a26..b0f2dcb3 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.3 +3.0.4 diff --git a/opendm/config.py b/opendm/config.py index e9de9e5b..c7953fb7 100755 --- a/opendm/config.py +++ b/opendm/config.py @@ -653,6 +653,20 @@ def config(argv=None, parser=None): version='ODM {0}'.format(__version__), help='Displays version number and exits. ') + parser.add_argument('--video-limit', + type=int, + action=StoreValue, + default=500, + metavar='', + help='Maximum number of frames to extract from video files for processing. Set to 0 for no limit. Default: %(default)s') + + parser.add_argument('--video-resolution', + type=int, + action=StoreValue, + default=4000, + metavar='', + help='The maximum output resolution of extracted video frames in pixels. Default: %(default)s') + parser.add_argument('--split', type=int, action=StoreValue, diff --git a/opendm/context.py b/opendm/context.py index 2fac61d3..7bafdd5d 100644 --- a/opendm/context.py +++ b/opendm/context.py @@ -41,6 +41,7 @@ settings_path = os.path.join(root_path, 'settings.yaml') # Define supported image extensions supported_extensions = {'.jpg','.jpeg','.png', '.tif', '.tiff', '.bmp'} +supported_video_extensions = {'.mp4', '.mov'} # Define the number of cores num_cores = multiprocessing.cpu_count() diff --git a/opendm/location.py b/opendm/location.py index 46e9b550..64c5c07a 100644 --- a/opendm/location.py +++ b/opendm/location.py @@ -119,7 +119,6 @@ def parse_srs_header(header): :param header (str) line :return Proj object """ - log.ODM_INFO('Parsing SRS header: %s' % header) header = header.strip() ref = header.split(' ') @@ -155,4 +154,15 @@ def parse_srs_header(header): 'Modify your input and try again.' % header) raise RuntimeError(e) - return srs \ No newline at end of file + return srs + +def utm_srs_from_ll(lon, lat): + utm_zone, hemisphere = get_utm_zone_and_hemisphere_from(lon, lat) + return parse_srs_header("WGS84 UTM %s%s" % (utm_zone, hemisphere)) + +def utm_transformers_from_ll(lon, lat): + source_srs = CRS.from_epsg(4326) + target_srs = utm_srs_from_ll(lon, lat) + ll_to_utm = transformer(source_srs, target_srs) + utm_to_ll = transformer(target_srs, source_srs) + return ll_to_utm, utm_to_ll \ No newline at end of file diff --git a/opendm/video/__init__.py b/opendm/video/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/opendm/video/checkers.py b/opendm/video/checkers.py new file mode 100644 index 00000000..1036689f --- /dev/null +++ b/opendm/video/checkers.py @@ -0,0 +1,128 @@ +import cv2 +import numpy as np + +class ThresholdBlurChecker: + def __init__(self, threshold): + self.threshold = threshold + + def NeedPreProcess(self): + return False + + def PreProcess(self, video_path, start_frame, end_frame): + return + + def IsBlur(self, image_bw, id): + var = cv2.Laplacian(image_bw, cv2.CV_64F).var() + return var, var < self.threshold + +class SimilarityChecker: + def __init__(self, threshold, max_features=500): + self.threshold = threshold + self.max_features = max_features + self.last_image = None + self.last_image_id = None + self.last_image_features = None + + def IsSimilar(self, image_bw, id): + + if self.last_image is None: + self.last_image = image_bw + self.last_image_id = id + self.last_image_features = cv2.goodFeaturesToTrack(image_bw, self.max_features, 0.01, 10) + return 0, False, None + + # Detect features + features, status, _ = cv2.calcOpticalFlowPyrLK(self.last_image, image_bw, self.last_image_features, None) + + # Filter out the "bad" features (i.e. those that are not tracked successfully) + good_features = features[status == 1] + good_features2 = self.last_image_features[status == 1] + + # Calculate the difference between the locations of the good features in the two frames + distance = np.average(np.abs(good_features2 - good_features)) + + res = distance < self.threshold + + if (not res): + self.last_image = image_bw + self.last_image_id = id + self.last_image_features = cv2.goodFeaturesToTrack(image_bw, self.max_features, 0.01, 10) + + return distance, res, self.last_image_id + + +class NaiveBlackFrameChecker: + def __init__(self, threshold): + self.threshold = threshold + + def PreProcess(self, video_path, start_frame, end_frame, width=800, height=600): + return + + def NeedPreProcess(self): + return False + + def IsBlack(self, image_bw, id): + return np.average(image_bw) < self.threshold + + +class BlackFrameChecker: + def __init__(self, picture_black_ratio_th=0.98, pixel_black_th=0.30): + self.picture_black_ratio_th = picture_black_ratio_th if picture_black_ratio_th is not None else 0.98 + self.pixel_black_th = pixel_black_th if pixel_black_th is not None else 0.30 + self.luminance_minimum_value = None + self.luminance_range_size = None + self.absolute_threshold = None + + def NeedPreProcess(self): + return True + + def PreProcess(self, video_path, start_frame, end_frame): + # Open video file + cap = cv2.VideoCapture(video_path) + + # Set frame start and end indices + cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame) + frame_end = end_frame + if end_frame == -1: + frame_end = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + # Initialize luminance range size and minimum value + self.luminance_range_size = 0 + self.luminance_minimum_value = 255 + + frame_index = start_frame if start_frame is not None else 0 + + # Read and process frames from video file + while (cap.isOpened() and (end_frame is None or frame_index <= end_frame)): + + ret, frame = cap.read() + if not ret: + break + + # Convert frame to grayscale + gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + gray_frame_min = gray_frame.min() + gray_frame_max = gray_frame.max() + + # Update luminance range size and minimum value + self.luminance_range_size = max(self.luminance_range_size, gray_frame_max - gray_frame_min) + self.luminance_minimum_value = min(self.luminance_minimum_value, gray_frame_min) + + frame_index += 1 + + # Calculate absolute threshold for considering a pixel "black" + self.absolute_threshold = self.luminance_minimum_value + self.pixel_black_th * self.luminance_range_size + + # Close video file + cap.release() + + def IsBlack(self, image_bw, id): + + # Count number of pixels < self.absolute_threshold + nb_black_pixels = np.sum(image_bw < self.absolute_threshold) + + # Calculate ratio of black pixels + ratio_black_pixels = nb_black_pixels / (image_bw.shape[0] * image_bw.shape[1]) + + # Check if ratio of black pixels is above threshold + return ratio_black_pixels >= self.picture_black_ratio_th \ No newline at end of file diff --git a/opendm/video/parameters.py b/opendm/video/parameters.py new file mode 100644 index 00000000..56194f12 --- /dev/null +++ b/opendm/video/parameters.py @@ -0,0 +1,46 @@ + +import argparse +import datetime +import os + +class Parameters: + + def __init__(self, args): + + # "input" -> path to input video file(s), use ',' to separate multiple files") + # "output" -> path to output directory") + # "start" -> start frame index") + # "end" -> end frame index") + # "output-resolution" -> Override output resolution (ex. 1024)") + # "blur-threshold" -> blur measures that fall below this value will be considered 'blurry'. Good value is 300 + # "distance-threshold" -> distance measures that fall below this value will be considered 'similar'") + # "black-ratio-threshold" -> Set the threshold for considering a frame 'black'. Express the minimum value for the ratio: nb_black_pixels / nb_pixels. Default value is 0.98") + # "pixel-black-threshold" -> Set the threshold for considering a pixel 'black'. The threshold expresses the maximum pixel luminance value for which a pixel is considered 'black'. Good value is 0.30 (30%)") + # "use-srt" -> Use SRT files for extracting metadata (same name as video file with .srt extension)") + # "limit" -> Maximum number of output frames + # "frame-format" -> frame format (jpg, png, tiff, etc.)") + # "stats-file" -> Save statistics to csv file") + + if not os.path.exists(args["output"]): + os.makedirs(args["output"]) + + self.input = args["input"] + if isinstance(self.input, str): + self.input = [self.input] + + self.output = args["output"] + self.start = args.get("start", 0) + self.end = args.get("end", None) + self.limit = args.get("limit", None) + self.blur_threshold = args.get("blur_threshold", None) + self.distance_threshold = args.get("distance_threshold", None) + self.black_ratio_threshold = args.get("black_ratio_threshold", None) + self.pixel_black_threshold = args.get("pixel_black_threshold", None) + self.use_srt = "use_srt" in args + self.frame_format = args.get("frame_format", "jpg") + self.max_dimension = args.get("max_dimension", None) + + self.stats_file = args.get("stats_file", None) + + # We will resize the image to this size before processing + self.internal_resolution = 800 diff --git a/opendm/video/srtparser.py b/opendm/video/srtparser.py new file mode 100644 index 00000000..7e52db43 --- /dev/null +++ b/opendm/video/srtparser.py @@ -0,0 +1,206 @@ +from datetime import datetime +from opendm import location, log +import re + + +def match_single(regexes, line, dtype=int): + if isinstance(regexes, str): + regexes = [(regexes, dtype)] + + for i in range(len(regexes)): + if isinstance(regexes[i], str): + regexes[i] = (regexes[i], dtype) + + try: + for r, transform in regexes: + match = re.search(r, line) + if match: + res = match.group(1) + return transform(res) + except Exception as e: + log.ODM_WARNING("Cannot parse SRT line \"%s\": %s", (line, str(e))) + + return None + +class SrtFileParser: + def __init__(self, filename): + self.filename = filename + self.data = [] + self.gps_data = [] + self.ll_to_utm = None + self.utm_to_ll = None + + def get_entry(self, timestamp: datetime): + if not self.data: + self.parse() + + # check min and max + if timestamp < self.data[0]["start"] or timestamp > self.data[len(self.data) - 1]["end"]: + return None + + for entry in self.data: + if entry["start"] <= timestamp and entry["end"] >= timestamp: + return entry + + return None + + def get_gps(self, timestamp): + if not self.data: + self.parse() + + # Initialize on first call + prev_coords = None + + if not self.gps_data: + for d in self.data: + lat, lon, alt = d.get('latitude'), d.get('longitude'), d.get('altitude') + tm = d.get('start') + + if lat is not None and lon is not None: + if self.ll_to_utm is None: + self.ll_to_utm, self.utm_to_ll = location.utm_transformers_from_ll(lon, lat) + + coords = self.ll_to_utm.TransformPoint(lon, lat, alt) + + # First or new (in X/Y only) + add = (not len(self.gps_data)) or (coords[0], coords[1]) != (self.gps_data[-1][1][0], self.gps_data[-1][1][1]) + if add: + self.gps_data.append((tm, coords)) + + # No data available + if not len(self.gps_data) or self.gps_data[0][0] > timestamp: + return None + + # Interpolate + start = None + for i in range(len(self.gps_data)): + tm, coords = self.gps_data[i] + + # Perfect match + if timestamp == tm: + return self.utm_to_ll.TransformPoint(*coords) + + elif tm > timestamp: + end = i + start = i - 1 + if start < 0: + return None + + gd_s = self.gps_data[start] + gd_e = self.gps_data[end] + sx, sy, sz = gd_s[1] + ex, ey, ez = gd_e[1] + + dt = (gd_e[0] - gd_s[0]).total_seconds() + if dt >= 10: + return None + + dx = (ex - sx) / dt + dy = (ey - sy) / dt + dz = (ez - sz) / dt + t = (timestamp - gd_s[0]).total_seconds() + + return self.utm_to_ll.TransformPoint( + sx + dx * t, + sy + dy * t, + sz + dz * t + ) + + def parse(self): + + # SRT metadata is not standarized, we support the following formats: + + # DJI mavic air 2 + # 1 + # 00:00:00,000 --> 00:00:00,016 + # SrtCnt : 1, DiffTime : 16ms + # 2023-01-06 18:56:48,380,821 + # [iso : 3200] [shutter : 1/60.0] [fnum : 280] [ev : 0] [ct : 3925] [color_md : default] [focal_len : 240] [latitude: 0.000000] [longitude: 0.000000] [altitude: 0.000000] + + # DJI Mavic Mini + # 1 + # 00:00:00,000 --> 00:00:01,000 + # F/2.8, SS 206.14, ISO 150, EV 0, GPS (-82.6669, 27.7716, 10), D 2.80m, H 0.00m, H.S 0.00m/s, V.S 0.00m/s + + with open(self.filename, 'r') as f: + + iso = None + shutter = None + fnum = None + focal_len = None + latitude = None + longitude = None + altitude = None + start = None + end = None + + for line in f: + + # Check if line is empty + if not line.strip(): + if start is not None: + self.data.append({ + "start": start, + "end": end, + "iso": iso, + "shutter": shutter, + "fnum": fnum, + "focal_len": focal_len, + "latitude": latitude, + "longitude": longitude, + "altitude": altitude + }) + + iso = None + shutter = None + fnum = None + ct = None + focal_len = None + latitude = None + longitude = None + altitude = None + start = None + end = None + + continue + + # Remove html tags + line = re.sub('<[^<]+?>', '', line) + + # Search this "00:00:00,000 --> 00:00:00,016" + match = re.search("(\d{2}:\d{2}:\d{2},\d+) --> (\d{2}:\d{2}:\d{2},\d+)", line) + if match: + start = datetime.strptime(match.group(1), "%H:%M:%S,%f") + end = datetime.strptime(match.group(2), "%H:%M:%S,%f") + + iso = match_single([ + "iso : (\d+)", + "ISO (\d+)" + ], line) + + shutter = match_single([ + "shutter : \d+/(\d+\.?\d*)" + "SS (\d+\.?\d*)" + ], line) + + fnum = match_single([ + ("fnum : (\d+)", lambda v: float(v)/100.0), + ("F/([\d\.]+)", float), + ], line) + + focal_len = match_single("focal_len : (\d+)", line) + + latitude = match_single([ + ("latitude: ([\d\.\-]+)", lambda v: float(v) if v != 0 else None), + ("GPS \([\d\.\-]+,? ([\d\.\-]+),? [\d\.\-]+\)", lambda v: float(v) if v != 0 else None), + ], line) + + longitude = match_single([ + ("longitude: ([\d\.\-]+)", lambda v: float(v) if v != 0 else None), + ("GPS \(([\d\.\-]+),? [\d\.\-]+,? [\d\.\-]+\)", lambda v: float(v) if v != 0 else None), + ], line) + + altitude = match_single([ + ("altitude: ([\d\.\-]+)", lambda v: float(v) if v != 0 else None), + ("GPS \([\d\.\-]+,? [\d\.\-]+,? ([\d\.\-]+)\)", lambda v: float(v) if v != 0 else None), + ], line) \ No newline at end of file diff --git a/opendm/video/video2dataset.py b/opendm/video/video2dataset.py new file mode 100644 index 00000000..8f977dfa --- /dev/null +++ b/opendm/video/video2dataset.py @@ -0,0 +1,351 @@ +import datetime +from fractions import Fraction +import io +from math import ceil, floor +import time +import cv2 +import os +import collections +from PIL import Image +import numpy as np +import piexif +from opendm import log +from opendm.video.srtparser import SrtFileParser +from opendm.video.parameters import Parameters +from opendm.video.checkers import BlackFrameChecker, SimilarityChecker, ThresholdBlurChecker + +class Video2Dataset: + + def __init__(self, parameters : Parameters): + self.parameters = parameters + + self.blur_checker = ThresholdBlurChecker(parameters.blur_threshold) if parameters.blur_threshold is not None else None + self.similarity_checker = SimilarityChecker(parameters.distance_threshold) if parameters.distance_threshold is not None else None + self.black_checker = BlackFrameChecker(parameters.black_ratio_threshold, parameters.pixel_black_threshold) if parameters.black_ratio_threshold is not None or parameters.pixel_black_threshold is not None else None + + self.frame_index = parameters.start + self.f = None + + + def ProcessVideo(self): + self.date_now = None + start = time.time() + + if (self.parameters.stats_file is not None): + self.f = open(self.parameters.stats_file, "w") + self.f.write("global_idx;file_name;frame_index;blur_score;is_blurry;is_black;last_frame_index;similarity_score;is_similar;written\n") + + self.global_idx = 0 + + output_file_paths = [] + + # foreach input file + for input_file in self.parameters.input: + # get file name + file_name = os.path.basename(input_file) + log.ODM_INFO("Processing video: {}".format(input_file)) + + # get video info + video_info = get_video_info(input_file) + log.ODM_INFO(video_info) + + # Set pseudo start time + if self.date_now is None: + try: + self.date_now = datetime.datetime.fromtimestamp(os.path.getmtime(input_file)) + except: + self.date_now = datetime.datetime.now() + else: + self.date_now += datetime.timedelta(seconds=video_info.total_frames / video_info.frame_rate) + + log.ODM_INFO("Use pseudo start time: %s" % self.date_now) + + if self.parameters.use_srt: + + name = os.path.splitext(input_file)[0] + + srt_files = [name + ".srt", name + ".SRT"] + srt_parser = None + + for srt_file in srt_files: + if os.path.exists(srt_file): + log.ODM_INFO("Loading SRT file: {}".format(srt_file)) + try: + srt_parser = SrtFileParser(srt_file) + srt_parser.parse() + break + except Exception as e: + log.ODM_INFO("Error parsing SRT file: {}".format(e)) + srt_parser = None + else: + srt_parser = None + + if (self.black_checker is not None and self.black_checker.NeedPreProcess()): + start2 = time.time() + log.ODM_INFO("Preprocessing for black frame checker... this might take a bit") + self.black_checker.PreProcess(input_file, self.parameters.start, self.parameters.end) + end = time.time() + log.ODM_INFO("Preprocessing time: {:.2f}s".format(end - start2)) + log.ODM_INFO("Calculated luminance_range_size is {}".format(self.black_checker.luminance_range_size)) + log.ODM_INFO("Calculated luminance_minimum_value is {}".format(self.black_checker.luminance_minimum_value)) + log.ODM_INFO("Calculated absolute_threshold is {}".format(self.black_checker.absolute_threshold)) + + # open video file + cap = cv2.VideoCapture(input_file) + if (not cap.isOpened()): + log.ODM_INFO("Error opening video stream or file") + return + + if (self.parameters.start is not None): + cap.set(cv2.CAP_PROP_POS_FRAMES, self.parameters.start) + self.frame_index = self.parameters.start + start_frame = self.parameters.start + else: + start_frame = 0 + + frames_to_process = self.parameters.end - start_frame + 1 if (self.parameters.end is not None) else video_info.total_frames - start_frame + + progress = 0 + while (cap.isOpened()): + ret, frame = cap.read() + + if not ret: + break + + if (self.parameters.end is not None and self.frame_index > self.parameters.end): + break + + # Calculate progress percentage + prev_progress = progress + progress = floor((self.frame_index - start_frame + 1) / frames_to_process * 100) + if progress != prev_progress: + print("[{}][{:3d}%] Processing frame {}/{}: ".format(file_name, progress, self.frame_index - start_frame + 1, frames_to_process), end="\r") + + stats = self.ProcessFrame(frame, video_info, srt_parser) + + if stats is not None and self.parameters.stats_file is not None: + self.WriteStats(input_file, stats) + + # Add element to array + if stats is not None and "written" in stats.keys(): + output_file_paths.append(stats["path"]) + + cap.release() + + if self.f is not None: + self.f.close() + + if self.parameters.limit is not None and self.parameters.limit > 0 and self.global_idx >= self.parameters.limit: + log.ODM_INFO("Limit of {} frames reached, trimming dataset".format(self.parameters.limit)) + output_file_paths = limit_files(output_file_paths, self.parameters.limit) + + end = time.time() + log.ODM_INFO("Total processing time: {:.2f}s".format(end - start)) + return output_file_paths + + + def ProcessFrame(self, frame, video_info, srt_parser): + + res = {"frame_index": self.frame_index, "global_idx": self.global_idx} + + frame_bw = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + + h, w = frame_bw.shape + resolution = self.parameters.internal_resolution + if resolution < w or resolution < h: + m = max(w, h) + factor = resolution / m + frame_bw = cv2.resize(frame_bw, (int(ceil(w * factor)), int(ceil(h * factor))), interpolation=cv2.INTER_NEAREST) + + if (self.blur_checker is not None): + blur_score, is_blurry = self.blur_checker.IsBlur(frame_bw, self.frame_index) + res["blur_score"] = blur_score + res["is_blurry"] = is_blurry + + if is_blurry: + # print ("blurry, skipping") + self.frame_index += 1 + return res + + if (self.black_checker is not None): + is_black = self.black_checker.IsBlack(frame_bw, self.frame_index) + res["is_black"] = is_black + + if is_black: + # print ("black, skipping") + self.frame_index += 1 + return res + + if (self.similarity_checker is not None): + similarity_score, is_similar, last_frame_index = self.similarity_checker.IsSimilar(frame_bw, self.frame_index) + res["similarity_score"] = similarity_score + res["is_similar"] = is_similar + res["last_frame_index"] = last_frame_index + + if is_similar: + # print ("similar to {}, skipping".format(self.similarity_checker.last_image_id)) + self.frame_index += 1 + return res + + path = self.SaveFrame(frame, video_info, srt_parser) + res["written"] = True + res["path"] = path + self.frame_index += 1 + self.global_idx += 1 + + return res + + def SaveFrame(self, frame, video_info, srt_parser: SrtFileParser): + max_dim = self.parameters.max_dimension + if max_dim is not None: + h, w, _ = frame.shape + if max_dim < w or max_dim < h: + m = max(w, h) + factor = max_dim / m + frame = cv2.resize(frame, (int(ceil(w * factor)), int(ceil(h * factor))), interpolation=cv2.INTER_AREA) + + path = os.path.join(self.parameters.output, + "{}_{}_{}.{}".format(video_info.basename, self.global_idx, self.frame_index, self.parameters.frame_format)) + + _, buf = cv2.imencode('.' + self.parameters.frame_format, frame) + + delta = datetime.timedelta(seconds=(self.frame_index / video_info.frame_rate)) + elapsed_time = datetime.datetime(1900, 1, 1) + delta + + img = Image.open(io.BytesIO(buf)) + + entry = gps_coords = None + if srt_parser is not None: + entry = srt_parser.get_entry(elapsed_time) + gps_coords = srt_parser.get_gps(elapsed_time) + + exif_time = (elapsed_time + (self.date_now - datetime.datetime(1900, 1, 1))) + elapsed_time_str = exif_time.strftime("%Y:%m:%d %H:%M:%S") + subsec_time_str = exif_time.strftime("%f") + + # Exif dict contains the following keys: '0th', 'Exif', 'GPS', '1st', 'thumbnail' + # Set the EXIF metadata + exif_dict = { + "0th": { + piexif.ImageIFD.Software: "ODM", + piexif.ImageIFD.DateTime: elapsed_time_str, + piexif.ImageIFD.XResolution: (frame.shape[1], 1), + piexif.ImageIFD.YResolution: (frame.shape[0], 1), + piexif.ImageIFD.Make: "DJI" if video_info.basename.lower().startswith("dji") else "Unknown", + piexif.ImageIFD.Model: "Unknown" + }, + "Exif": { + piexif.ExifIFD.DateTimeOriginal: elapsed_time_str, + piexif.ExifIFD.DateTimeDigitized: elapsed_time_str, + piexif.ExifIFD.SubSecTime: subsec_time_str, + piexif.ExifIFD.PixelXDimension: frame.shape[1], + piexif.ExifIFD.PixelYDimension: frame.shape[0], + }} + + if entry is not None: + if entry["shutter"] is not None: + exif_dict["Exif"][piexif.ExifIFD.ExposureTime] = (1, int(entry["shutter"])) + if entry["focal_len"] is not None: + exif_dict["Exif"][piexif.ExifIFD.FocalLength] = (entry["focal_len"], 100) + if entry["fnum"] is not None: + exif_dict["Exif"][piexif.ExifIFD.FNumber] = float_to_rational(entry["fnum"]) + if entry["iso"] is not None: + exif_dict["Exif"][piexif.ExifIFD.ISOSpeedRatings] = entry["iso"] + + if gps_coords is not None: + exif_dict["GPS"] = get_gps_location(elapsed_time, gps_coords[1], gps_coords[0], gps_coords[2]) + + exif_bytes = piexif.dump(exif_dict) + img.save(path, exif=exif_bytes, quality=95) + + return path + + + def WriteStats(self, input_file, stats): + self.f.write("{};{};{};{};{};{};{};{};{};{}\n".format( + stats["global_idx"], + input_file, + stats["frame_index"], + stats["blur_score"] if "blur_score" in stats else "", + stats["is_blurry"] if "is_blurry" in stats else "", + stats["is_black"] if "is_black" in stats else "", + stats["last_frame_index"] if "last_frame_index" in stats else "", + stats["similarity_score"] if "similarity_score" in stats else "", + stats["is_similar"] if "is_similar" in stats else "", + stats["written"] if "written" in stats else "").replace(".", ",")) + + +def get_video_info(input_file): + + video = cv2.VideoCapture(input_file) + basename = os.path.splitext(os.path.basename(input_file))[0] + + total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + frame_rate = video.get(cv2.CAP_PROP_FPS) + + video.release() + + return collections.namedtuple("VideoInfo", ["total_frames", "frame_rate", "basename"])(total_frames, frame_rate, basename) + +def float_to_rational(f): + f = Fraction(f).limit_denominator() + return (f.numerator, f.denominator) + +def limit_files(paths, limit): + if len(paths) <= limit: + return paths + + to_keep = [] + all_idxes = np.arange(0, len(paths)) + keep_idxes = np.linspace(0, len(paths) - 1, limit, dtype=int) + remove_idxes = set(all_idxes) - set(keep_idxes) + + p = np.array(paths) + to_keep = list(p[keep_idxes]) + + for idx in remove_idxes: + os.remove(paths[idx]) + + return to_keep + +def to_deg(value, loc): + """convert decimal coordinates into degrees, munutes and seconds tuple + Keyword arguments: value is float gps-value, loc is direction list ["S", "N"] or ["W", "E"] + return: tuple like (25, 13, 48.343 ,'N') + """ + if value < 0: + loc_value = loc[0] + elif value > 0: + loc_value = loc[1] + else: + loc_value = "" + abs_value = abs(value) + deg = int(abs_value) + t1 = (abs_value-deg)*60 + min = int(t1) + sec = round((t1 - min)* 60, 5) + return (deg, min, sec, loc_value) + +def get_gps_location(elapsed_time, lat, lng, altitude): + + lat_deg = to_deg(lat, ["S", "N"]) + lng_deg = to_deg(lng, ["W", "E"]) + + exiv_lat = (float_to_rational(lat_deg[0]), float_to_rational(lat_deg[1]), float_to_rational(lat_deg[2])) + exiv_lng = (float_to_rational(lng_deg[0]), float_to_rational(lng_deg[1]), float_to_rational(lng_deg[2])) + + gps_ifd = { + piexif.GPSIFD.GPSVersionID: (2, 0, 0, 0), + piexif.GPSIFD.GPSDateStamp: elapsed_time.strftime('%Y:%m:%d') + } + + if lat is not None and lng is not None: + gps_ifd[piexif.GPSIFD.GPSLatitudeRef] = lat_deg[3] + gps_ifd[piexif.GPSIFD.GPSLatitude] = exiv_lat + gps_ifd[piexif.GPSIFD.GPSLongitudeRef] = lng_deg[3] + gps_ifd[piexif.GPSIFD.GPSLongitude] = exiv_lng + if altitude is not None: + gps_ifd[piexif.GPSIFD.GPSAltitudeRef] = 0 + gps_ifd[piexif.GPSIFD.GPSAltitude] = float_to_rational(round(altitude)) + + return gps_ifd \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 3084deb3..481dc910 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,3 +34,4 @@ onnxruntime==1.12.1 codem==0.24.0 trimesh==3.17.1 pandas==1.5.2 +piexif==1.1.3 diff --git a/stages/dataset.py b/stages/dataset.py index 355fac4d..c7b84ab9 100644 --- a/stages/dataset.py +++ b/stages/dataset.py @@ -15,6 +15,7 @@ from opendm import ai from opendm.skyremoval.skyfilter import SkyFilter from opendm.bgfilter import BgFilter from opendm.concurrency import parallel_map +from opendm.video.video2dataset import Parameters, Video2Dataset def save_images_database(photos, database_file): with open(database_file, 'w') as f: @@ -58,22 +59,25 @@ class ODMLoadDatasetStage(types.ODM_Stage): except Exception as e: log.ODM_WARNING("Cannot write benchmark file: %s" % str(e)) - # check if the image filename is supported - def valid_image_filename(filename): + def valid_filename(filename, supported_extensions): (pathfn, ext) = os.path.splitext(filename) - return ext.lower() in context.supported_extensions and pathfn[-5:] != "_mask" + return ext.lower() in supported_extensions and pathfn[-5:] != "_mask" # Get supported images from dir def get_images(in_dir): entries = os.listdir(in_dir) valid, rejects = [], [] for f in entries: - if valid_image_filename(f): + if valid_filename(f, context.supported_extensions): valid.append(f) else: rejects.append(f) return valid, rejects + def search_video_files(in_dir): + entries = os.listdir(in_dir) + return [os.path.join(in_dir, f) for f in entries if valid_filename(f, context.supported_video_extensions)] + def find_mask(photo_path, masks): (pathfn, ext) = os.path.splitext(os.path.basename(photo_path)) k = "{}_mask".format(pathfn) @@ -85,6 +89,8 @@ class ODMLoadDatasetStage(types.ODM_Stage): return mask else: log.ODM_WARNING("Image mask {} has a space. Spaces are currently not supported for image masks.".format(mask)) + + # get images directory images_dir = tree.dataset_raw @@ -100,6 +106,51 @@ class ODMLoadDatasetStage(types.ODM_Stage): if not os.path.exists(images_dir): raise system.ExitException("There are no images in %s! Make sure that your project path and dataset name is correct. The current is set to: %s" % (images_dir, args.project_path)) + # Check if we need to extract video frames + frames_db_file = os.path.join(images_dir, 'frames.json') + if not os.path.exists(frames_db_file) or self.rerun(): + video_files = search_video_files(images_dir) + + # If we're re-running the pipeline, and frames have been extracted during a previous run + # we need to remove those before re-extracting them + if len(video_files) > 0 and os.path.exists(frames_db_file) and self.rerun(): + log.ODM_INFO("Re-run, removing previously extracted video frames") + frames = [] + try: + with open(frames_db_file, 'r') as f: + frames = json.loads(f.read()) + except Exception as e: + log.ODM_WARNING("Cannot check previous video extraction: %s" % str(e)) + + for f in frames: + fp = os.path.join(images_dir, f) + if os.path.isfile(fp): + os.remove(fp) + + if len(video_files) > 0: + log.ODM_INFO("Found video files (%s), extracting frames" % len(video_files)) + + try: + params = Parameters({ + "input": video_files, + "output": images_dir, + + "blur_threshold": 300, + "distance_threshold": 10, + "black_ratio_threshold": 0.98, + "pixel_black_threshold": 0.30, + "use_srt": True, + "max_dimension": args.video_resolution, + "limit": args.video_limit, + }) + v2d = Video2Dataset(params) + frames = v2d.ProcessVideo() + + with open(frames_db_file, 'w') as f: + f.write(json.dumps([os.path.basename(f) for f in frames])) + except Exception as e: + log.ODM_WARNING("Could not extract video frames: %s" % str(e)) + files, rejects = get_images(images_dir) if files: # create ODMPhoto list