Merge pull request #1567 from HeDo88TH/add-video2dataset

Added video2dataset module
2023-01-24 20:53:11 -05:00 · 2023-01-24 20:53:11 -05:00 · 643f92a66d
commit 643f92a66d
--- a/README.md
+++ b/README.md
@ -259,6 +259,10 @@ Experimental flags need to be enabled in Docker to use the ```--squash``` flag.
 After this, you must restart docker.
 ## Video Support
 Starting from version 3.0.4, ODM can automatically extract images from video files (.mp4 or .mov). Just place one or more video files into the `images` folder and run the program as usual. Subtitles files (.srt) with GPS information are also supported. Place .srt files in the `images` folder, making sure that the filenames match. For example, `my_video.mp4` ==> `my_video.srt` (case-sensitive).
 ## Developers
 Help improve our software! We welcome contributions from everyone, whether to add new features, improve speed, fix existing bugs or add support for more cameras. Check our [code of conduct](https://github.com/OpenDroneMap/documents/blob/master/CONDUCT.md), the [contributing guidelines](https://github.com/OpenDroneMap/documents/blob/master/CONTRIBUTING.md) and [how decisions are made](https://github.com/OpenDroneMap/documents/blob/master/GOVERNANCE.md#how-decisions-are-made).
--- a/SuperBuild/cmake/External-OpenCV.cmake
+++ b/SuperBuild/cmake/External-OpenCV.cmake
@ -55,7 +55,7 @@ ExternalProject_Add(${_proj_name}
    -DBUILD_opencv_photo=ON
    -DBUILD_opencv_legacy=ON
    -DBUILD_opencv_python3=ON
-    -DWITH_FFMPEG=OFF
+    -DWITH_FFMPEG=ON
    -DWITH_CUDA=OFF
    -DWITH_GTK=OFF
    -DWITH_VTK=OFF
--- a/2
+++ b/2
@ -1 +1 @@
-3.0.3
+3.0.4
--- a/opendm/config.py
+++ b/opendm/config.py
@ -653,6 +653,20 @@ def config(argv=None, parser=None):
                        version='ODM {0}'.format(__version__),
                        help='Displays version number and exits. ')
    parser.add_argument('--video-limit',
                        type=int,
                        action=StoreValue,
                        default=500,
                        metavar='<positive integer>',
                        help='Maximum number of frames to extract from video files for processing. Set to 0 for no limit. Default: %(default)s')
    parser.add_argument('--video-resolution',
                        type=int,
                        action=StoreValue,
                        default=4000,
                        metavar='<positive integer>',
                        help='The maximum output resolution of extracted video frames in pixels. Default: %(default)s')
    parser.add_argument('--split',
                        type=int,
                        action=StoreValue,
--- a/opendm/context.py
+++ b/opendm/context.py
@ -41,6 +41,7 @@ settings_path = os.path.join(root_path, 'settings.yaml')
 # Define supported image extensions
 supported_extensions = {'.jpg','.jpeg','.png', '.tif', '.tiff', '.bmp'}
 supported_video_extensions = {'.mp4', '.mov'}
 # Define the number of cores
 num_cores = multiprocessing.cpu_count()
--- a/opendm/location.py
+++ b/opendm/location.py
@ -119,7 +119,6 @@ def parse_srs_header(header):
    :param header (str) line
    :return Proj object
    """
    log.ODM_INFO('Parsing SRS header: %s' % header)
    header = header.strip()
    ref = header.split(' ')
@ -155,4 +154,15 @@ def parse_srs_header(header):
                            'Modify your input and try again.' % header)
        raise RuntimeError(e)
-    return srs
+    return srs
 def utm_srs_from_ll(lon, lat):
    utm_zone, hemisphere = get_utm_zone_and_hemisphere_from(lon, lat)
    return parse_srs_header("WGS84 UTM %s%s" % (utm_zone, hemisphere))
 def utm_transformers_from_ll(lon, lat):
    source_srs = CRS.from_epsg(4326)
    target_srs = utm_srs_from_ll(lon, lat)
    ll_to_utm = transformer(source_srs, target_srs)
    utm_to_ll = transformer(target_srs, source_srs)
    return ll_to_utm, utm_to_ll
--- a/opendm/video/init.py
+++ b/opendm/video/init.py
--- a/opendm/video/checkers.py
+++ b/opendm/video/checkers.py
@ -0,0 +1,128 @@
 import cv2
 import numpy as np
 class ThresholdBlurChecker:
    def __init__(self, threshold):
        self.threshold = threshold
    def NeedPreProcess(self):
        return False
    def PreProcess(self, video_path, start_frame, end_frame):
        return
    def IsBlur(self, image_bw, id):
        var = cv2.Laplacian(image_bw, cv2.CV_64F).var()
        return var, var < self.threshold
 class SimilarityChecker:
    def __init__(self, threshold, max_features=500):
        self.threshold = threshold
        self.max_features = max_features
        self.last_image = None
        self.last_image_id = None
        self.last_image_features = None
    def IsSimilar(self, image_bw, id):
        if self.last_image is None:
            self.last_image = image_bw
            self.last_image_id = id
            self.last_image_features = cv2.goodFeaturesToTrack(image_bw, self.max_features, 0.01, 10)
            return 0, False, None
        # Detect features
        features, status, _ = cv2.calcOpticalFlowPyrLK(self.last_image, image_bw, self.last_image_features, None)
        # Filter out the "bad" features (i.e. those that are not tracked successfully)
        good_features = features[status == 1]
        good_features2 = self.last_image_features[status == 1]
        # Calculate the difference between the locations of the good features in the two frames
        distance = np.average(np.abs(good_features2 - good_features))
        res = distance < self.threshold
        if (not res):
            self.last_image = image_bw
            self.last_image_id = id
            self.last_image_features = cv2.goodFeaturesToTrack(image_bw, self.max_features, 0.01, 10)
        return distance, res, self.last_image_id
 class NaiveBlackFrameChecker:
    def __init__(self, threshold):
        self.threshold = threshold
    def PreProcess(self, video_path, start_frame, end_frame, width=800, height=600):
        return
    def NeedPreProcess(self):
        return False
    def IsBlack(self, image_bw, id):
        return np.average(image_bw) < self.threshold
 class BlackFrameChecker:
    def __init__(self, picture_black_ratio_th=0.98, pixel_black_th=0.30):
        self.picture_black_ratio_th = picture_black_ratio_th if picture_black_ratio_th is not None else 0.98
        self.pixel_black_th = pixel_black_th if pixel_black_th is not None else 0.30
        self.luminance_minimum_value = None
        self.luminance_range_size = None
        self.absolute_threshold = None
    def NeedPreProcess(self):
        return True
    def PreProcess(self, video_path, start_frame, end_frame):
        # Open video file
        cap = cv2.VideoCapture(video_path)
        # Set frame start and end indices
        cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
        frame_end = end_frame
        if end_frame == -1:
            frame_end = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        # Initialize luminance range size and minimum value
        self.luminance_range_size = 0
        self.luminance_minimum_value = 255
        frame_index = start_frame if start_frame is not None else 0
        # Read and process frames from video file
        while (cap.isOpened() and (end_frame is None or frame_index <= end_frame)):
            ret, frame = cap.read()
            if not ret:
                break
            # Convert frame to grayscale
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            gray_frame_min = gray_frame.min()
            gray_frame_max = gray_frame.max()
            # Update luminance range size and minimum value
            self.luminance_range_size = max(self.luminance_range_size, gray_frame_max - gray_frame_min)
            self.luminance_minimum_value = min(self.luminance_minimum_value, gray_frame_min)
            frame_index += 1
        # Calculate absolute threshold for considering a pixel "black"
        self.absolute_threshold = self.luminance_minimum_value + self.pixel_black_th * self.luminance_range_size
        # Close video file
        cap.release()
    def IsBlack(self, image_bw, id):
        # Count number of pixels < self.absolute_threshold
        nb_black_pixels = np.sum(image_bw < self.absolute_threshold)
        # Calculate ratio of black pixels
        ratio_black_pixels = nb_black_pixels / (image_bw.shape[0] * image_bw.shape[1])
        # Check if ratio of black pixels is above threshold
        return ratio_black_pixels >= self.picture_black_ratio_th
--- a/opendm/video/parameters.py
+++ b/opendm/video/parameters.py
@ -0,0 +1,46 @@
 import argparse
 import datetime
 import os
 class Parameters:
    def __init__(self, args):
        # "input" -> path to input video file(s), use ',' to separate multiple files")
        # "output" -> path to output directory")
        # "start" -> start frame index")
        # "end" -> end frame index")
        # "output-resolution" -> Override output resolution (ex. 1024)")
        # "blur-threshold" -> blur measures that fall below this value will be considered 'blurry'. Good value is 300
        # "distance-threshold" -> distance measures that fall below this value will be considered 'similar'")
        # "black-ratio-threshold" -> Set the threshold for considering a frame 'black'. Express the minimum value for the ratio: nb_black_pixels / nb_pixels. Default value is 0.98")
        # "pixel-black-threshold" -> Set the threshold for considering a pixel 'black'. The threshold expresses the maximum pixel luminance value for which a pixel is considered 'black'. Good value is 0.30 (30%)")
        # "use-srt" -> Use SRT files for extracting metadata (same name as video file with .srt extension)")
        # "limit" -> Maximum number of output frames
        # "frame-format" -> frame format (jpg, png, tiff, etc.)")
        # "stats-file" -> Save statistics to csv file")
        if not os.path.exists(args["output"]):
            os.makedirs(args["output"])
        self.input = args["input"]
        if isinstance(self.input, str):
            self.input = [self.input]
        self.output = args["output"]
        self.start = args.get("start", 0)
        self.end = args.get("end", None)
        self.limit = args.get("limit", None)
        self.blur_threshold = args.get("blur_threshold", None)
        self.distance_threshold = args.get("distance_threshold", None)
        self.black_ratio_threshold = args.get("black_ratio_threshold", None)
        self.pixel_black_threshold = args.get("pixel_black_threshold", None)
        self.use_srt = "use_srt" in args
        self.frame_format = args.get("frame_format", "jpg")
        self.max_dimension = args.get("max_dimension", None)
        self.stats_file = args.get("stats_file", None)
        # We will resize the image to this size before processing
        self.internal_resolution = 800
--- a/opendm/video/srtparser.py
+++ b/opendm/video/srtparser.py
@ -0,0 +1,206 @@
 from datetime import datetime
 from opendm import location, log
 import re
 def match_single(regexes, line, dtype=int):
    if isinstance(regexes, str):
        regexes = [(regexes, dtype)]
    for i in range(len(regexes)):
        if isinstance(regexes[i], str):
            regexes[i] = (regexes[i], dtype)
    try:
        for r, transform in regexes:
            match = re.search(r, line)
            if match:
                res = match.group(1)
                return transform(res)
    except Exception as e:
        log.ODM_WARNING("Cannot parse SRT line \"%s\": %s", (line, str(e)))
    return None
 class SrtFileParser:
    def __init__(self, filename):
        self.filename = filename
        self.data = []
        self.gps_data = []
        self.ll_to_utm = None
        self.utm_to_ll = None
    def get_entry(self, timestamp: datetime):
        if not self.data:
            self.parse()
        # check min and max
        if timestamp < self.data[0]["start"] or timestamp > self.data[len(self.data) - 1]["end"]:
            return None
        for entry in self.data:
            if entry["start"] <= timestamp and entry["end"] >= timestamp:
                return entry
        return None
    def get_gps(self, timestamp):
        if not self.data:
            self.parse()
        # Initialize on first call
        prev_coords = None
        if not self.gps_data:
            for d in self.data:
                lat, lon, alt = d.get('latitude'), d.get('longitude'), d.get('altitude')
                tm = d.get('start')
                if lat is not None and lon is not None:
                    if self.ll_to_utm is None:
                        self.ll_to_utm, self.utm_to_ll = location.utm_transformers_from_ll(lon, lat)
                    coords = self.ll_to_utm.TransformPoint(lon, lat, alt)
                    # First or new (in X/Y only)
                    add = (not len(self.gps_data)) or (coords[0], coords[1]) != (self.gps_data[-1][1][0], self.gps_data[-1][1][1])
                    if add:
                        self.gps_data.append((tm, coords))
        # No data available
        if not len(self.gps_data) or self.gps_data[0][0] > timestamp:
            return None
        # Interpolate
        start = None
        for i in range(len(self.gps_data)):
            tm, coords = self.gps_data[i]
            # Perfect match
            if timestamp == tm:
                return self.utm_to_ll.TransformPoint(*coords)
            elif tm > timestamp:
                end = i
                start = i - 1
                if start < 0:
                    return None
                gd_s = self.gps_data[start]
                gd_e = self.gps_data[end]
                sx, sy, sz = gd_s[1]
                ex, ey, ez = gd_e[1]
                dt = (gd_e[0] - gd_s[0]).total_seconds()
                if dt >= 10:
                    return None
                dx = (ex - sx) / dt
                dy = (ey - sy) / dt
                dz = (ez - sz) / dt
                t = (timestamp - gd_s[0]).total_seconds()
                return self.utm_to_ll.TransformPoint(
                    sx + dx * t,
                    sy + dy * t,
                    sz + dz * t
                )
    def parse(self):
        # SRT metadata is not standarized, we support the following formats:
        # DJI mavic air 2
        # 1
        # 00:00:00,000 --> 00:00:00,016
        # <font size="36">SrtCnt : 1, DiffTime : 16ms
        # 2023-01-06 18:56:48,380,821
        # [iso : 3200] [shutter : 1/60.0] [fnum : 280] [ev : 0] [ct : 3925] [color_md : default] [focal_len : 240] [latitude: 0.000000] [longitude: 0.000000] [altitude: 0.000000] </font>
        # DJI Mavic Mini
        # 1
        # 00:00:00,000 --> 00:00:01,000
        # F/2.8, SS 206.14, ISO 150, EV 0, GPS (-82.6669, 27.7716, 10), D 2.80m, H 0.00m, H.S 0.00m/s, V.S 0.00m/s 
        with open(self.filename, 'r') as f:
            iso = None
            shutter = None
            fnum = None
            focal_len = None
            latitude = None
            longitude = None
            altitude = None
            start = None
            end = None
            for line in f:
                # Check if line is empty
                if not line.strip():
                    if start is not None:
                        self.data.append({
                            "start": start,
                            "end": end,
                            "iso": iso,
                            "shutter": shutter,
                            "fnum": fnum,
                            "focal_len": focal_len,
                            "latitude": latitude,
                            "longitude": longitude,
                            "altitude": altitude
                        })
                    iso = None
                    shutter = None
                    fnum = None
                    ct = None
                    focal_len = None
                    latitude = None
                    longitude = None
                    altitude = None
                    start = None
                    end = None
                    continue
                # Remove html tags
                line = re.sub('<[^<]+?>', '', line)
                # Search this "00:00:00,000 --> 00:00:00,016"
                match = re.search("(\d{2}:\d{2}:\d{2},\d+) --> (\d{2}:\d{2}:\d{2},\d+)", line)
                if match:
                    start = datetime.strptime(match.group(1), "%H:%M:%S,%f")
                    end = datetime.strptime(match.group(2), "%H:%M:%S,%f")
                iso = match_single([
                    "iso : (\d+)",
                    "ISO (\d+)"
                ], line)
                shutter = match_single([
                    "shutter : \d+/(\d+\.?\d*)"
                    "SS (\d+\.?\d*)"
                ], line)
                fnum = match_single([
                    ("fnum : (\d+)", lambda v: float(v)/100.0),
                    ("F/([\d\.]+)", float),
                ], line)
                focal_len = match_single("focal_len : (\d+)", line)
                latitude = match_single([
                    ("latitude: ([\d\.\-]+)", lambda v: float(v) if v != 0 else None),
                    ("GPS \([\d\.\-]+,? ([\d\.\-]+),? [\d\.\-]+\)", lambda v: float(v) if v != 0 else None),
                ], line)
                longitude = match_single([
                    ("longitude: ([\d\.\-]+)", lambda v: float(v) if v != 0 else None),
                    ("GPS \(([\d\.\-]+),? [\d\.\-]+,? [\d\.\-]+\)", lambda v: float(v) if v != 0 else None),
                ], line)
                altitude = match_single([
                    ("altitude: ([\d\.\-]+)", lambda v: float(v) if v != 0 else None),
                    ("GPS \([\d\.\-]+,? [\d\.\-]+,? ([\d\.\-]+)\)", lambda v: float(v) if v != 0 else None),
                ], line)
--- a/opendm/video/video2dataset.py
+++ b/opendm/video/video2dataset.py
@ -0,0 +1,351 @@
 import datetime
 from fractions import Fraction
 import io
 from math import ceil, floor
 import time
 import cv2
 import os
 import collections
 from PIL import Image
 import numpy as np
 import piexif
 from opendm import log
 from opendm.video.srtparser import SrtFileParser
 from opendm.video.parameters import Parameters
 from opendm.video.checkers import BlackFrameChecker, SimilarityChecker, ThresholdBlurChecker
 class Video2Dataset:
    def __init__(self, parameters : Parameters):
        self.parameters = parameters
        self.blur_checker = ThresholdBlurChecker(parameters.blur_threshold) if parameters.blur_threshold is not None else None
        self.similarity_checker = SimilarityChecker(parameters.distance_threshold) if parameters.distance_threshold is not None else None
        self.black_checker = BlackFrameChecker(parameters.black_ratio_threshold, parameters.pixel_black_threshold) if parameters.black_ratio_threshold is not None or parameters.pixel_black_threshold is not None else None
        self.frame_index = parameters.start
        self.f = None
    def ProcessVideo(self):
        self.date_now = None
        start = time.time()
        if (self.parameters.stats_file is not None):
            self.f = open(self.parameters.stats_file, "w")
            self.f.write("global_idx;file_name;frame_index;blur_score;is_blurry;is_black;last_frame_index;similarity_score;is_similar;written\n")
        self.global_idx = 0
        output_file_paths = []
        # foreach input file
        for input_file in self.parameters.input:
            # get file name
            file_name = os.path.basename(input_file)
            log.ODM_INFO("Processing video: {}".format(input_file))
            # get video info
            video_info = get_video_info(input_file)
            log.ODM_INFO(video_info)
            # Set pseudo start time
            if self.date_now is None:
                try:
                    self.date_now = datetime.datetime.fromtimestamp(os.path.getmtime(input_file))
                except:
                    self.date_now = datetime.datetime.now()
            else:
                self.date_now += datetime.timedelta(seconds=video_info.total_frames / video_info.frame_rate)
            log.ODM_INFO("Use pseudo start time: %s" % self.date_now)
            if self.parameters.use_srt:
                name = os.path.splitext(input_file)[0]
                srt_files = [name + ".srt", name + ".SRT"]
                srt_parser = None
                for srt_file in srt_files:
                    if os.path.exists(srt_file):
                        log.ODM_INFO("Loading SRT file: {}".format(srt_file))
                        try:
                            srt_parser = SrtFileParser(srt_file)
                            srt_parser.parse()
                            break
                        except Exception as e:
                            log.ODM_INFO("Error parsing SRT file: {}".format(e))
                            srt_parser = None
            else:
                srt_parser = None
            if (self.black_checker is not None and self.black_checker.NeedPreProcess()):
                start2 = time.time()
                log.ODM_INFO("Preprocessing for black frame checker... this might take a bit")
                self.black_checker.PreProcess(input_file, self.parameters.start, self.parameters.end)
                end = time.time()
                log.ODM_INFO("Preprocessing time: {:.2f}s".format(end - start2))
                log.ODM_INFO("Calculated luminance_range_size is {}".format(self.black_checker.luminance_range_size))
                log.ODM_INFO("Calculated luminance_minimum_value is {}".format(self.black_checker.luminance_minimum_value))
                log.ODM_INFO("Calculated absolute_threshold is {}".format(self.black_checker.absolute_threshold))
            # open video file
            cap = cv2.VideoCapture(input_file)
            if (not cap.isOpened()):
                log.ODM_INFO("Error opening video stream or file")
                return
            if (self.parameters.start is not None):
                cap.set(cv2.CAP_PROP_POS_FRAMES, self.parameters.start)
                self.frame_index = self.parameters.start
                start_frame = self.parameters.start
            else:
                start_frame = 0
            frames_to_process = self.parameters.end - start_frame + 1 if (self.parameters.end is not None) else video_info.total_frames - start_frame
            progress = 0
            while (cap.isOpened()):
                ret, frame = cap.read()
                if not ret:
                    break
                if (self.parameters.end is not None and self.frame_index > self.parameters.end):
                    break
                # Calculate progress percentage
                prev_progress = progress
                progress = floor((self.frame_index - start_frame + 1) / frames_to_process * 100)
                if progress != prev_progress:
                    print("[{}][{:3d}%] Processing frame {}/{}: ".format(file_name, progress, self.frame_index - start_frame + 1, frames_to_process), end="\r")
                stats = self.ProcessFrame(frame, video_info, srt_parser)
                if stats is not None and self.parameters.stats_file is not None:
                    self.WriteStats(input_file, stats)
                # Add element to array
                if stats is not None and "written" in stats.keys():
                    output_file_paths.append(stats["path"])
            cap.release()
        if self.f is not None:
            self.f.close()
        if self.parameters.limit is not None and self.parameters.limit > 0 and self.global_idx >= self.parameters.limit:
            log.ODM_INFO("Limit of {} frames reached, trimming dataset".format(self.parameters.limit))
            output_file_paths = limit_files(output_file_paths, self.parameters.limit)
        end = time.time()
        log.ODM_INFO("Total processing time: {:.2f}s".format(end - start))
        return output_file_paths
    def ProcessFrame(self, frame, video_info, srt_parser):
        res = {"frame_index": self.frame_index, "global_idx": self.global_idx}
        frame_bw = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        h, w = frame_bw.shape
        resolution = self.parameters.internal_resolution
        if resolution < w or resolution < h:
            m = max(w, h)
            factor = resolution / m
            frame_bw = cv2.resize(frame_bw, (int(ceil(w * factor)), int(ceil(h * factor))), interpolation=cv2.INTER_NEAREST)
        if (self.blur_checker is not None):
            blur_score, is_blurry = self.blur_checker.IsBlur(frame_bw, self.frame_index)
            res["blur_score"] = blur_score
            res["is_blurry"] = is_blurry
            if is_blurry:
                # print ("blurry, skipping")
                self.frame_index += 1
                return res
        if (self.black_checker is not None):
            is_black = self.black_checker.IsBlack(frame_bw, self.frame_index)
            res["is_black"] = is_black
            if is_black:
                # print ("black, skipping")
                self.frame_index += 1
                return res
        if (self.similarity_checker is not None):
            similarity_score, is_similar, last_frame_index = self.similarity_checker.IsSimilar(frame_bw, self.frame_index)
            res["similarity_score"] = similarity_score
            res["is_similar"] = is_similar
            res["last_frame_index"] = last_frame_index
            if is_similar:
                # print ("similar to {}, skipping".format(self.similarity_checker.last_image_id))
                self.frame_index += 1
                return res
        path = self.SaveFrame(frame, video_info, srt_parser)
        res["written"] = True
        res["path"] = path
        self.frame_index += 1
        self.global_idx += 1
        return res
    def SaveFrame(self, frame, video_info, srt_parser: SrtFileParser):
        max_dim = self.parameters.max_dimension
        if max_dim is not None:
            h, w, _ = frame.shape
            if max_dim < w or max_dim < h:
                m = max(w, h)
                factor = max_dim / m
                frame = cv2.resize(frame, (int(ceil(w * factor)), int(ceil(h * factor))), interpolation=cv2.INTER_AREA)
        path = os.path.join(self.parameters.output,
            "{}_{}_{}.{}".format(video_info.basename, self.global_idx, self.frame_index, self.parameters.frame_format))
        _, buf = cv2.imencode('.' + self.parameters.frame_format, frame)
        delta = datetime.timedelta(seconds=(self.frame_index / video_info.frame_rate))
        elapsed_time = datetime.datetime(1900, 1, 1) + delta
        img = Image.open(io.BytesIO(buf))
        entry = gps_coords = None
        if srt_parser is not None:
            entry = srt_parser.get_entry(elapsed_time)
            gps_coords = srt_parser.get_gps(elapsed_time)
        exif_time = (elapsed_time + (self.date_now - datetime.datetime(1900, 1, 1)))
        elapsed_time_str = exif_time.strftime("%Y:%m:%d %H:%M:%S")
        subsec_time_str = exif_time.strftime("%f")
        # Exif dict contains the following keys: '0th', 'Exif', 'GPS', '1st', 'thumbnail'
        # Set the EXIF metadata
        exif_dict = {
            "0th": {
                piexif.ImageIFD.Software: "ODM",
                piexif.ImageIFD.DateTime: elapsed_time_str,
                piexif.ImageIFD.XResolution: (frame.shape[1], 1),
                piexif.ImageIFD.YResolution: (frame.shape[0], 1),
                piexif.ImageIFD.Make: "DJI" if video_info.basename.lower().startswith("dji") else "Unknown",
                piexif.ImageIFD.Model: "Unknown"
            },
            "Exif": {
                piexif.ExifIFD.DateTimeOriginal: elapsed_time_str,
                piexif.ExifIFD.DateTimeDigitized: elapsed_time_str,
                piexif.ExifIFD.SubSecTime: subsec_time_str,
                piexif.ExifIFD.PixelXDimension: frame.shape[1],
                piexif.ExifIFD.PixelYDimension: frame.shape[0],
            }}
        if entry is not None:
            if entry["shutter"] is not None:
                exif_dict["Exif"][piexif.ExifIFD.ExposureTime] = (1, int(entry["shutter"]))
            if entry["focal_len"] is not None:
                exif_dict["Exif"][piexif.ExifIFD.FocalLength] = (entry["focal_len"], 100)
            if entry["fnum"] is not None:
                exif_dict["Exif"][piexif.ExifIFD.FNumber] = float_to_rational(entry["fnum"])
            if entry["iso"] is not None:
                exif_dict["Exif"][piexif.ExifIFD.ISOSpeedRatings] = entry["iso"]
        if gps_coords is not None:
            exif_dict["GPS"] = get_gps_location(elapsed_time, gps_coords[1], gps_coords[0], gps_coords[2])
        exif_bytes = piexif.dump(exif_dict)
        img.save(path, exif=exif_bytes, quality=95)
        return path
    def WriteStats(self, input_file, stats):
        self.f.write("{};{};{};{};{};{};{};{};{};{}\n".format(
            stats["global_idx"],
            input_file,
            stats["frame_index"],
            stats["blur_score"] if "blur_score" in stats else "",
            stats["is_blurry"] if "is_blurry" in stats else "",
            stats["is_black"] if "is_black" in stats else "",
            stats["last_frame_index"] if "last_frame_index" in stats else "",
            stats["similarity_score"] if "similarity_score" in stats else "",
            stats["is_similar"] if "is_similar" in stats else "",
            stats["written"] if "written" in stats else "").replace(".", ","))
 def get_video_info(input_file):
    video = cv2.VideoCapture(input_file)
    basename = os.path.splitext(os.path.basename(input_file))[0]
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_rate = video.get(cv2.CAP_PROP_FPS)
    video.release()
    return collections.namedtuple("VideoInfo", ["total_frames", "frame_rate", "basename"])(total_frames, frame_rate, basename)
 def float_to_rational(f):
    f = Fraction(f).limit_denominator()
    return (f.numerator, f.denominator)
 def limit_files(paths, limit):
    if len(paths) <= limit:
        return paths
    to_keep = []
    all_idxes = np.arange(0, len(paths))
    keep_idxes = np.linspace(0, len(paths) - 1, limit, dtype=int)
    remove_idxes = set(all_idxes) - set(keep_idxes)
    p = np.array(paths)
    to_keep = list(p[keep_idxes])
    for idx in remove_idxes:
        os.remove(paths[idx])
    return to_keep
 def to_deg(value, loc):
    """convert decimal coordinates into degrees, munutes and seconds tuple
    Keyword arguments: value is float gps-value, loc is direction list ["S", "N"] or ["W", "E"]
    return: tuple like (25, 13, 48.343 ,'N')
    """
    if value < 0:
        loc_value = loc[0]
    elif value > 0:
        loc_value = loc[1]
    else:
        loc_value = ""
    abs_value = abs(value)
    deg =  int(abs_value)
    t1 = (abs_value-deg)*60
    min = int(t1)
    sec = round((t1 - min)* 60, 5)
    return (deg, min, sec, loc_value)
 def get_gps_location(elapsed_time, lat, lng, altitude):
    lat_deg = to_deg(lat, ["S", "N"])
    lng_deg = to_deg(lng, ["W", "E"])
    exiv_lat = (float_to_rational(lat_deg[0]), float_to_rational(lat_deg[1]), float_to_rational(lat_deg[2]))
    exiv_lng = (float_to_rational(lng_deg[0]), float_to_rational(lng_deg[1]), float_to_rational(lng_deg[2]))
    gps_ifd = {
        piexif.GPSIFD.GPSVersionID: (2, 0, 0, 0),
        piexif.GPSIFD.GPSDateStamp: elapsed_time.strftime('%Y:%m:%d')
    }
    if lat is not None and lng is not None:
        gps_ifd[piexif.GPSIFD.GPSLatitudeRef] = lat_deg[3]
        gps_ifd[piexif.GPSIFD.GPSLatitude] = exiv_lat
        gps_ifd[piexif.GPSIFD.GPSLongitudeRef] = lng_deg[3]
        gps_ifd[piexif.GPSIFD.GPSLongitude] = exiv_lng
        if altitude is not None:
            gps_ifd[piexif.GPSIFD.GPSAltitudeRef] = 0
            gps_ifd[piexif.GPSIFD.GPSAltitude] = float_to_rational(round(altitude))
    return gps_ifd
--- a/requirements.txt
+++ b/requirements.txt
@ -34,3 +34,4 @@ onnxruntime==1.12.1
 codem==0.24.0
 trimesh==3.17.1
 pandas==1.5.2
 piexif==1.1.3
--- a/stages/dataset.py
+++ b/stages/dataset.py
@ -15,6 +15,7 @@ from opendm import ai
 from opendm.skyremoval.skyfilter import SkyFilter
 from opendm.bgfilter import BgFilter
 from opendm.concurrency import parallel_map
 from opendm.video.video2dataset import Parameters, Video2Dataset
 def save_images_database(photos, database_file):
    with open(database_file, 'w') as f:
@ -58,22 +59,25 @@ class ODMLoadDatasetStage(types.ODM_Stage):
            except Exception as e:
                log.ODM_WARNING("Cannot write benchmark file: %s" % str(e))
-        # check if the image filename is supported
+        def valid_filename(filename, supported_extensions):
        def valid_image_filename(filename):
            (pathfn, ext) = os.path.splitext(filename)
-            return ext.lower() in context.supported_extensions and pathfn[-5:] != "_mask"
+            return ext.lower() in supported_extensions and pathfn[-5:] != "_mask"
        # Get supported images from dir
        def get_images(in_dir):
            entries = os.listdir(in_dir)
            valid, rejects = [], []
            for f in entries:
-                if valid_image_filename(f):
+                if valid_filename(f, context.supported_extensions):
                    valid.append(f)
                else:
                    rejects.append(f)
            return valid, rejects
        def search_video_files(in_dir):
            entries = os.listdir(in_dir)
            return [os.path.join(in_dir, f) for f in entries if valid_filename(f, context.supported_video_extensions)]
        def find_mask(photo_path, masks):
            (pathfn, ext) = os.path.splitext(os.path.basename(photo_path))
            k = "{}_mask".format(pathfn)
@ -85,6 +89,8 @@ class ODMLoadDatasetStage(types.ODM_Stage):
                    return mask
                else:
                    log.ODM_WARNING("Image mask {} has a space. Spaces are currently not supported for image masks.".format(mask))
        # get images directory
        images_dir = tree.dataset_raw
@ -100,6 +106,51 @@ class ODMLoadDatasetStage(types.ODM_Stage):
            if not os.path.exists(images_dir):
                raise system.ExitException("There are no images in %s! Make sure that your project path and dataset name is correct. The current is set to: %s" % (images_dir, args.project_path))
            # Check if we need to extract video frames
            frames_db_file = os.path.join(images_dir, 'frames.json')
            if not os.path.exists(frames_db_file) or self.rerun():
                video_files = search_video_files(images_dir)
                # If we're re-running the pipeline, and frames have been extracted during a previous run
                # we need to remove those before re-extracting them
                if len(video_files) > 0 and os.path.exists(frames_db_file) and self.rerun():
                    log.ODM_INFO("Re-run, removing previously extracted video frames")
                    frames = []
                    try:
                        with open(frames_db_file, 'r') as f:
                            frames = json.loads(f.read())
                    except Exception as e:
                        log.ODM_WARNING("Cannot check previous video extraction: %s" % str(e))
                    for f in frames:
                        fp = os.path.join(images_dir, f)
                        if os.path.isfile(fp):
                            os.remove(fp)
                if len(video_files) > 0:
                    log.ODM_INFO("Found video files (%s), extracting frames" % len(video_files))
                    try:
                        params = Parameters({
                            "input": video_files,
                            "output": images_dir,
                            "blur_threshold": 300,
                            "distance_threshold": 10, 
                            "black_ratio_threshold": 0.98,
                            "pixel_black_threshold": 0.30,
                            "use_srt": True,
                            "max_dimension": args.video_resolution,
                            "limit": args.video_limit,
                        })
                        v2d = Video2Dataset(params)
                        frames = v2d.ProcessVideo()
                        with open(frames_db_file, 'w') as f:
                            f.write(json.dumps([os.path.basename(f) for f in frames]))
                    except Exception as e:
                        log.ODM_WARNING("Could not extract video frames: %s" % str(e))
            files, rejects = get_images(images_dir)
            if files:
                # create ODMPhoto list