Merge pull request #1567 from HeDo88TH/add-video2dataset

Added video2dataset module
pull/1589/head
Piero Toffanin 2023-01-24 20:53:11 -05:00 zatwierdzone przez GitHub
commit 643f92a66d
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
13 zmienionych plików z 820 dodań i 8 usunięć

Wyświetl plik

@ -259,6 +259,10 @@ Experimental flags need to be enabled in Docker to use the ```--squash``` flag.
After this, you must restart docker.
## Video Support
Starting from version 3.0.4, ODM can automatically extract images from video files (.mp4 or .mov). Just place one or more video files into the `images` folder and run the program as usual. Subtitles files (.srt) with GPS information are also supported. Place .srt files in the `images` folder, making sure that the filenames match. For example, `my_video.mp4` ==> `my_video.srt` (case-sensitive).
## Developers
Help improve our software! We welcome contributions from everyone, whether to add new features, improve speed, fix existing bugs or add support for more cameras. Check our [code of conduct](https://github.com/OpenDroneMap/documents/blob/master/CONDUCT.md), the [contributing guidelines](https://github.com/OpenDroneMap/documents/blob/master/CONTRIBUTING.md) and [how decisions are made](https://github.com/OpenDroneMap/documents/blob/master/GOVERNANCE.md#how-decisions-are-made).

Wyświetl plik

@ -55,7 +55,7 @@ ExternalProject_Add(${_proj_name}
-DBUILD_opencv_photo=ON
-DBUILD_opencv_legacy=ON
-DBUILD_opencv_python3=ON
-DWITH_FFMPEG=OFF
-DWITH_FFMPEG=ON
-DWITH_CUDA=OFF
-DWITH_GTK=OFF
-DWITH_VTK=OFF

Wyświetl plik

@ -1 +1 @@
3.0.3
3.0.4

Wyświetl plik

@ -653,6 +653,20 @@ def config(argv=None, parser=None):
version='ODM {0}'.format(__version__),
help='Displays version number and exits. ')
parser.add_argument('--video-limit',
type=int,
action=StoreValue,
default=500,
metavar='<positive integer>',
help='Maximum number of frames to extract from video files for processing. Set to 0 for no limit. Default: %(default)s')
parser.add_argument('--video-resolution',
type=int,
action=StoreValue,
default=4000,
metavar='<positive integer>',
help='The maximum output resolution of extracted video frames in pixels. Default: %(default)s')
parser.add_argument('--split',
type=int,
action=StoreValue,

Wyświetl plik

@ -41,6 +41,7 @@ settings_path = os.path.join(root_path, 'settings.yaml')
# Define supported image extensions
supported_extensions = {'.jpg','.jpeg','.png', '.tif', '.tiff', '.bmp'}
supported_video_extensions = {'.mp4', '.mov'}
# Define the number of cores
num_cores = multiprocessing.cpu_count()

Wyświetl plik

@ -119,7 +119,6 @@ def parse_srs_header(header):
:param header (str) line
:return Proj object
"""
log.ODM_INFO('Parsing SRS header: %s' % header)
header = header.strip()
ref = header.split(' ')
@ -155,4 +154,15 @@ def parse_srs_header(header):
'Modify your input and try again.' % header)
raise RuntimeError(e)
return srs
return srs
def utm_srs_from_ll(lon, lat):
utm_zone, hemisphere = get_utm_zone_and_hemisphere_from(lon, lat)
return parse_srs_header("WGS84 UTM %s%s" % (utm_zone, hemisphere))
def utm_transformers_from_ll(lon, lat):
source_srs = CRS.from_epsg(4326)
target_srs = utm_srs_from_ll(lon, lat)
ll_to_utm = transformer(source_srs, target_srs)
utm_to_ll = transformer(target_srs, source_srs)
return ll_to_utm, utm_to_ll

Wyświetl plik

Wyświetl plik

@ -0,0 +1,128 @@
import cv2
import numpy as np
class ThresholdBlurChecker:
def __init__(self, threshold):
self.threshold = threshold
def NeedPreProcess(self):
return False
def PreProcess(self, video_path, start_frame, end_frame):
return
def IsBlur(self, image_bw, id):
var = cv2.Laplacian(image_bw, cv2.CV_64F).var()
return var, var < self.threshold
class SimilarityChecker:
def __init__(self, threshold, max_features=500):
self.threshold = threshold
self.max_features = max_features
self.last_image = None
self.last_image_id = None
self.last_image_features = None
def IsSimilar(self, image_bw, id):
if self.last_image is None:
self.last_image = image_bw
self.last_image_id = id
self.last_image_features = cv2.goodFeaturesToTrack(image_bw, self.max_features, 0.01, 10)
return 0, False, None
# Detect features
features, status, _ = cv2.calcOpticalFlowPyrLK(self.last_image, image_bw, self.last_image_features, None)
# Filter out the "bad" features (i.e. those that are not tracked successfully)
good_features = features[status == 1]
good_features2 = self.last_image_features[status == 1]
# Calculate the difference between the locations of the good features in the two frames
distance = np.average(np.abs(good_features2 - good_features))
res = distance < self.threshold
if (not res):
self.last_image = image_bw
self.last_image_id = id
self.last_image_features = cv2.goodFeaturesToTrack(image_bw, self.max_features, 0.01, 10)
return distance, res, self.last_image_id
class NaiveBlackFrameChecker:
def __init__(self, threshold):
self.threshold = threshold
def PreProcess(self, video_path, start_frame, end_frame, width=800, height=600):
return
def NeedPreProcess(self):
return False
def IsBlack(self, image_bw, id):
return np.average(image_bw) < self.threshold
class BlackFrameChecker:
def __init__(self, picture_black_ratio_th=0.98, pixel_black_th=0.30):
self.picture_black_ratio_th = picture_black_ratio_th if picture_black_ratio_th is not None else 0.98
self.pixel_black_th = pixel_black_th if pixel_black_th is not None else 0.30
self.luminance_minimum_value = None
self.luminance_range_size = None
self.absolute_threshold = None
def NeedPreProcess(self):
return True
def PreProcess(self, video_path, start_frame, end_frame):
# Open video file
cap = cv2.VideoCapture(video_path)
# Set frame start and end indices
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
frame_end = end_frame
if end_frame == -1:
frame_end = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Initialize luminance range size and minimum value
self.luminance_range_size = 0
self.luminance_minimum_value = 255
frame_index = start_frame if start_frame is not None else 0
# Read and process frames from video file
while (cap.isOpened() and (end_frame is None or frame_index <= end_frame)):
ret, frame = cap.read()
if not ret:
break
# Convert frame to grayscale
gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray_frame_min = gray_frame.min()
gray_frame_max = gray_frame.max()
# Update luminance range size and minimum value
self.luminance_range_size = max(self.luminance_range_size, gray_frame_max - gray_frame_min)
self.luminance_minimum_value = min(self.luminance_minimum_value, gray_frame_min)
frame_index += 1
# Calculate absolute threshold for considering a pixel "black"
self.absolute_threshold = self.luminance_minimum_value + self.pixel_black_th * self.luminance_range_size
# Close video file
cap.release()
def IsBlack(self, image_bw, id):
# Count number of pixels < self.absolute_threshold
nb_black_pixels = np.sum(image_bw < self.absolute_threshold)
# Calculate ratio of black pixels
ratio_black_pixels = nb_black_pixels / (image_bw.shape[0] * image_bw.shape[1])
# Check if ratio of black pixels is above threshold
return ratio_black_pixels >= self.picture_black_ratio_th

Wyświetl plik

@ -0,0 +1,46 @@
import argparse
import datetime
import os
class Parameters:
def __init__(self, args):
# "input" -> path to input video file(s), use ',' to separate multiple files")
# "output" -> path to output directory")
# "start" -> start frame index")
# "end" -> end frame index")
# "output-resolution" -> Override output resolution (ex. 1024)")
# "blur-threshold" -> blur measures that fall below this value will be considered 'blurry'. Good value is 300
# "distance-threshold" -> distance measures that fall below this value will be considered 'similar'")
# "black-ratio-threshold" -> Set the threshold for considering a frame 'black'. Express the minimum value for the ratio: nb_black_pixels / nb_pixels. Default value is 0.98")
# "pixel-black-threshold" -> Set the threshold for considering a pixel 'black'. The threshold expresses the maximum pixel luminance value for which a pixel is considered 'black'. Good value is 0.30 (30%)")
# "use-srt" -> Use SRT files for extracting metadata (same name as video file with .srt extension)")
# "limit" -> Maximum number of output frames
# "frame-format" -> frame format (jpg, png, tiff, etc.)")
# "stats-file" -> Save statistics to csv file")
if not os.path.exists(args["output"]):
os.makedirs(args["output"])
self.input = args["input"]
if isinstance(self.input, str):
self.input = [self.input]
self.output = args["output"]
self.start = args.get("start", 0)
self.end = args.get("end", None)
self.limit = args.get("limit", None)
self.blur_threshold = args.get("blur_threshold", None)
self.distance_threshold = args.get("distance_threshold", None)
self.black_ratio_threshold = args.get("black_ratio_threshold", None)
self.pixel_black_threshold = args.get("pixel_black_threshold", None)
self.use_srt = "use_srt" in args
self.frame_format = args.get("frame_format", "jpg")
self.max_dimension = args.get("max_dimension", None)
self.stats_file = args.get("stats_file", None)
# We will resize the image to this size before processing
self.internal_resolution = 800

Wyświetl plik

@ -0,0 +1,206 @@
from datetime import datetime
from opendm import location, log
import re
def match_single(regexes, line, dtype=int):
if isinstance(regexes, str):
regexes = [(regexes, dtype)]
for i in range(len(regexes)):
if isinstance(regexes[i], str):
regexes[i] = (regexes[i], dtype)
try:
for r, transform in regexes:
match = re.search(r, line)
if match:
res = match.group(1)
return transform(res)
except Exception as e:
log.ODM_WARNING("Cannot parse SRT line \"%s\": %s", (line, str(e)))
return None
class SrtFileParser:
def __init__(self, filename):
self.filename = filename
self.data = []
self.gps_data = []
self.ll_to_utm = None
self.utm_to_ll = None
def get_entry(self, timestamp: datetime):
if not self.data:
self.parse()
# check min and max
if timestamp < self.data[0]["start"] or timestamp > self.data[len(self.data) - 1]["end"]:
return None
for entry in self.data:
if entry["start"] <= timestamp and entry["end"] >= timestamp:
return entry
return None
def get_gps(self, timestamp):
if not self.data:
self.parse()
# Initialize on first call
prev_coords = None
if not self.gps_data:
for d in self.data:
lat, lon, alt = d.get('latitude'), d.get('longitude'), d.get('altitude')
tm = d.get('start')
if lat is not None and lon is not None:
if self.ll_to_utm is None:
self.ll_to_utm, self.utm_to_ll = location.utm_transformers_from_ll(lon, lat)
coords = self.ll_to_utm.TransformPoint(lon, lat, alt)
# First or new (in X/Y only)
add = (not len(self.gps_data)) or (coords[0], coords[1]) != (self.gps_data[-1][1][0], self.gps_data[-1][1][1])
if add:
self.gps_data.append((tm, coords))
# No data available
if not len(self.gps_data) or self.gps_data[0][0] > timestamp:
return None
# Interpolate
start = None
for i in range(len(self.gps_data)):
tm, coords = self.gps_data[i]
# Perfect match
if timestamp == tm:
return self.utm_to_ll.TransformPoint(*coords)
elif tm > timestamp:
end = i
start = i - 1
if start < 0:
return None
gd_s = self.gps_data[start]
gd_e = self.gps_data[end]
sx, sy, sz = gd_s[1]
ex, ey, ez = gd_e[1]
dt = (gd_e[0] - gd_s[0]).total_seconds()
if dt >= 10:
return None
dx = (ex - sx) / dt
dy = (ey - sy) / dt
dz = (ez - sz) / dt
t = (timestamp - gd_s[0]).total_seconds()
return self.utm_to_ll.TransformPoint(
sx + dx * t,
sy + dy * t,
sz + dz * t
)
def parse(self):
# SRT metadata is not standarized, we support the following formats:
# DJI mavic air 2
# 1
# 00:00:00,000 --> 00:00:00,016
# <font size="36">SrtCnt : 1, DiffTime : 16ms
# 2023-01-06 18:56:48,380,821
# [iso : 3200] [shutter : 1/60.0] [fnum : 280] [ev : 0] [ct : 3925] [color_md : default] [focal_len : 240] [latitude: 0.000000] [longitude: 0.000000] [altitude: 0.000000] </font>
# DJI Mavic Mini
# 1
# 00:00:00,000 --> 00:00:01,000
# F/2.8, SS 206.14, ISO 150, EV 0, GPS (-82.6669, 27.7716, 10), D 2.80m, H 0.00m, H.S 0.00m/s, V.S 0.00m/s
with open(self.filename, 'r') as f:
iso = None
shutter = None
fnum = None
focal_len = None
latitude = None
longitude = None
altitude = None
start = None
end = None
for line in f:
# Check if line is empty
if not line.strip():
if start is not None:
self.data.append({
"start": start,
"end": end,
"iso": iso,
"shutter": shutter,
"fnum": fnum,
"focal_len": focal_len,
"latitude": latitude,
"longitude": longitude,
"altitude": altitude
})
iso = None
shutter = None
fnum = None
ct = None
focal_len = None
latitude = None
longitude = None
altitude = None
start = None
end = None
continue
# Remove html tags
line = re.sub('<[^<]+?>', '', line)
# Search this "00:00:00,000 --> 00:00:00,016"
match = re.search("(\d{2}:\d{2}:\d{2},\d+) --> (\d{2}:\d{2}:\d{2},\d+)", line)
if match:
start = datetime.strptime(match.group(1), "%H:%M:%S,%f")
end = datetime.strptime(match.group(2), "%H:%M:%S,%f")
iso = match_single([
"iso : (\d+)",
"ISO (\d+)"
], line)
shutter = match_single([
"shutter : \d+/(\d+\.?\d*)"
"SS (\d+\.?\d*)"
], line)
fnum = match_single([
("fnum : (\d+)", lambda v: float(v)/100.0),
("F/([\d\.]+)", float),
], line)
focal_len = match_single("focal_len : (\d+)", line)
latitude = match_single([
("latitude: ([\d\.\-]+)", lambda v: float(v) if v != 0 else None),
("GPS \([\d\.\-]+,? ([\d\.\-]+),? [\d\.\-]+\)", lambda v: float(v) if v != 0 else None),
], line)
longitude = match_single([
("longitude: ([\d\.\-]+)", lambda v: float(v) if v != 0 else None),
("GPS \(([\d\.\-]+),? [\d\.\-]+,? [\d\.\-]+\)", lambda v: float(v) if v != 0 else None),
], line)
altitude = match_single([
("altitude: ([\d\.\-]+)", lambda v: float(v) if v != 0 else None),
("GPS \([\d\.\-]+,? [\d\.\-]+,? ([\d\.\-]+)\)", lambda v: float(v) if v != 0 else None),
], line)

Wyświetl plik

@ -0,0 +1,351 @@
import datetime
from fractions import Fraction
import io
from math import ceil, floor
import time
import cv2
import os
import collections
from PIL import Image
import numpy as np
import piexif
from opendm import log
from opendm.video.srtparser import SrtFileParser
from opendm.video.parameters import Parameters
from opendm.video.checkers import BlackFrameChecker, SimilarityChecker, ThresholdBlurChecker
class Video2Dataset:
def __init__(self, parameters : Parameters):
self.parameters = parameters
self.blur_checker = ThresholdBlurChecker(parameters.blur_threshold) if parameters.blur_threshold is not None else None
self.similarity_checker = SimilarityChecker(parameters.distance_threshold) if parameters.distance_threshold is not None else None
self.black_checker = BlackFrameChecker(parameters.black_ratio_threshold, parameters.pixel_black_threshold) if parameters.black_ratio_threshold is not None or parameters.pixel_black_threshold is not None else None
self.frame_index = parameters.start
self.f = None
def ProcessVideo(self):
self.date_now = None
start = time.time()
if (self.parameters.stats_file is not None):
self.f = open(self.parameters.stats_file, "w")
self.f.write("global_idx;file_name;frame_index;blur_score;is_blurry;is_black;last_frame_index;similarity_score;is_similar;written\n")
self.global_idx = 0
output_file_paths = []
# foreach input file
for input_file in self.parameters.input:
# get file name
file_name = os.path.basename(input_file)
log.ODM_INFO("Processing video: {}".format(input_file))
# get video info
video_info = get_video_info(input_file)
log.ODM_INFO(video_info)
# Set pseudo start time
if self.date_now is None:
try:
self.date_now = datetime.datetime.fromtimestamp(os.path.getmtime(input_file))
except:
self.date_now = datetime.datetime.now()
else:
self.date_now += datetime.timedelta(seconds=video_info.total_frames / video_info.frame_rate)
log.ODM_INFO("Use pseudo start time: %s" % self.date_now)
if self.parameters.use_srt:
name = os.path.splitext(input_file)[0]
srt_files = [name + ".srt", name + ".SRT"]
srt_parser = None
for srt_file in srt_files:
if os.path.exists(srt_file):
log.ODM_INFO("Loading SRT file: {}".format(srt_file))
try:
srt_parser = SrtFileParser(srt_file)
srt_parser.parse()
break
except Exception as e:
log.ODM_INFO("Error parsing SRT file: {}".format(e))
srt_parser = None
else:
srt_parser = None
if (self.black_checker is not None and self.black_checker.NeedPreProcess()):
start2 = time.time()
log.ODM_INFO("Preprocessing for black frame checker... this might take a bit")
self.black_checker.PreProcess(input_file, self.parameters.start, self.parameters.end)
end = time.time()
log.ODM_INFO("Preprocessing time: {:.2f}s".format(end - start2))
log.ODM_INFO("Calculated luminance_range_size is {}".format(self.black_checker.luminance_range_size))
log.ODM_INFO("Calculated luminance_minimum_value is {}".format(self.black_checker.luminance_minimum_value))
log.ODM_INFO("Calculated absolute_threshold is {}".format(self.black_checker.absolute_threshold))
# open video file
cap = cv2.VideoCapture(input_file)
if (not cap.isOpened()):
log.ODM_INFO("Error opening video stream or file")
return
if (self.parameters.start is not None):
cap.set(cv2.CAP_PROP_POS_FRAMES, self.parameters.start)
self.frame_index = self.parameters.start
start_frame = self.parameters.start
else:
start_frame = 0
frames_to_process = self.parameters.end - start_frame + 1 if (self.parameters.end is not None) else video_info.total_frames - start_frame
progress = 0
while (cap.isOpened()):
ret, frame = cap.read()
if not ret:
break
if (self.parameters.end is not None and self.frame_index > self.parameters.end):
break
# Calculate progress percentage
prev_progress = progress
progress = floor((self.frame_index - start_frame + 1) / frames_to_process * 100)
if progress != prev_progress:
print("[{}][{:3d}%] Processing frame {}/{}: ".format(file_name, progress, self.frame_index - start_frame + 1, frames_to_process), end="\r")
stats = self.ProcessFrame(frame, video_info, srt_parser)
if stats is not None and self.parameters.stats_file is not None:
self.WriteStats(input_file, stats)
# Add element to array
if stats is not None and "written" in stats.keys():
output_file_paths.append(stats["path"])
cap.release()
if self.f is not None:
self.f.close()
if self.parameters.limit is not None and self.parameters.limit > 0 and self.global_idx >= self.parameters.limit:
log.ODM_INFO("Limit of {} frames reached, trimming dataset".format(self.parameters.limit))
output_file_paths = limit_files(output_file_paths, self.parameters.limit)
end = time.time()
log.ODM_INFO("Total processing time: {:.2f}s".format(end - start))
return output_file_paths
def ProcessFrame(self, frame, video_info, srt_parser):
res = {"frame_index": self.frame_index, "global_idx": self.global_idx}
frame_bw = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
h, w = frame_bw.shape
resolution = self.parameters.internal_resolution
if resolution < w or resolution < h:
m = max(w, h)
factor = resolution / m
frame_bw = cv2.resize(frame_bw, (int(ceil(w * factor)), int(ceil(h * factor))), interpolation=cv2.INTER_NEAREST)
if (self.blur_checker is not None):
blur_score, is_blurry = self.blur_checker.IsBlur(frame_bw, self.frame_index)
res["blur_score"] = blur_score
res["is_blurry"] = is_blurry
if is_blurry:
# print ("blurry, skipping")
self.frame_index += 1
return res
if (self.black_checker is not None):
is_black = self.black_checker.IsBlack(frame_bw, self.frame_index)
res["is_black"] = is_black
if is_black:
# print ("black, skipping")
self.frame_index += 1
return res
if (self.similarity_checker is not None):
similarity_score, is_similar, last_frame_index = self.similarity_checker.IsSimilar(frame_bw, self.frame_index)
res["similarity_score"] = similarity_score
res["is_similar"] = is_similar
res["last_frame_index"] = last_frame_index
if is_similar:
# print ("similar to {}, skipping".format(self.similarity_checker.last_image_id))
self.frame_index += 1
return res
path = self.SaveFrame(frame, video_info, srt_parser)
res["written"] = True
res["path"] = path
self.frame_index += 1
self.global_idx += 1
return res
def SaveFrame(self, frame, video_info, srt_parser: SrtFileParser):
max_dim = self.parameters.max_dimension
if max_dim is not None:
h, w, _ = frame.shape
if max_dim < w or max_dim < h:
m = max(w, h)
factor = max_dim / m
frame = cv2.resize(frame, (int(ceil(w * factor)), int(ceil(h * factor))), interpolation=cv2.INTER_AREA)
path = os.path.join(self.parameters.output,
"{}_{}_{}.{}".format(video_info.basename, self.global_idx, self.frame_index, self.parameters.frame_format))
_, buf = cv2.imencode('.' + self.parameters.frame_format, frame)
delta = datetime.timedelta(seconds=(self.frame_index / video_info.frame_rate))
elapsed_time = datetime.datetime(1900, 1, 1) + delta
img = Image.open(io.BytesIO(buf))
entry = gps_coords = None
if srt_parser is not None:
entry = srt_parser.get_entry(elapsed_time)
gps_coords = srt_parser.get_gps(elapsed_time)
exif_time = (elapsed_time + (self.date_now - datetime.datetime(1900, 1, 1)))
elapsed_time_str = exif_time.strftime("%Y:%m:%d %H:%M:%S")
subsec_time_str = exif_time.strftime("%f")
# Exif dict contains the following keys: '0th', 'Exif', 'GPS', '1st', 'thumbnail'
# Set the EXIF metadata
exif_dict = {
"0th": {
piexif.ImageIFD.Software: "ODM",
piexif.ImageIFD.DateTime: elapsed_time_str,
piexif.ImageIFD.XResolution: (frame.shape[1], 1),
piexif.ImageIFD.YResolution: (frame.shape[0], 1),
piexif.ImageIFD.Make: "DJI" if video_info.basename.lower().startswith("dji") else "Unknown",
piexif.ImageIFD.Model: "Unknown"
},
"Exif": {
piexif.ExifIFD.DateTimeOriginal: elapsed_time_str,
piexif.ExifIFD.DateTimeDigitized: elapsed_time_str,
piexif.ExifIFD.SubSecTime: subsec_time_str,
piexif.ExifIFD.PixelXDimension: frame.shape[1],
piexif.ExifIFD.PixelYDimension: frame.shape[0],
}}
if entry is not None:
if entry["shutter"] is not None:
exif_dict["Exif"][piexif.ExifIFD.ExposureTime] = (1, int(entry["shutter"]))
if entry["focal_len"] is not None:
exif_dict["Exif"][piexif.ExifIFD.FocalLength] = (entry["focal_len"], 100)
if entry["fnum"] is not None:
exif_dict["Exif"][piexif.ExifIFD.FNumber] = float_to_rational(entry["fnum"])
if entry["iso"] is not None:
exif_dict["Exif"][piexif.ExifIFD.ISOSpeedRatings] = entry["iso"]
if gps_coords is not None:
exif_dict["GPS"] = get_gps_location(elapsed_time, gps_coords[1], gps_coords[0], gps_coords[2])
exif_bytes = piexif.dump(exif_dict)
img.save(path, exif=exif_bytes, quality=95)
return path
def WriteStats(self, input_file, stats):
self.f.write("{};{};{};{};{};{};{};{};{};{}\n".format(
stats["global_idx"],
input_file,
stats["frame_index"],
stats["blur_score"] if "blur_score" in stats else "",
stats["is_blurry"] if "is_blurry" in stats else "",
stats["is_black"] if "is_black" in stats else "",
stats["last_frame_index"] if "last_frame_index" in stats else "",
stats["similarity_score"] if "similarity_score" in stats else "",
stats["is_similar"] if "is_similar" in stats else "",
stats["written"] if "written" in stats else "").replace(".", ","))
def get_video_info(input_file):
video = cv2.VideoCapture(input_file)
basename = os.path.splitext(os.path.basename(input_file))[0]
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
frame_rate = video.get(cv2.CAP_PROP_FPS)
video.release()
return collections.namedtuple("VideoInfo", ["total_frames", "frame_rate", "basename"])(total_frames, frame_rate, basename)
def float_to_rational(f):
f = Fraction(f).limit_denominator()
return (f.numerator, f.denominator)
def limit_files(paths, limit):
if len(paths) <= limit:
return paths
to_keep = []
all_idxes = np.arange(0, len(paths))
keep_idxes = np.linspace(0, len(paths) - 1, limit, dtype=int)
remove_idxes = set(all_idxes) - set(keep_idxes)
p = np.array(paths)
to_keep = list(p[keep_idxes])
for idx in remove_idxes:
os.remove(paths[idx])
return to_keep
def to_deg(value, loc):
"""convert decimal coordinates into degrees, munutes and seconds tuple
Keyword arguments: value is float gps-value, loc is direction list ["S", "N"] or ["W", "E"]
return: tuple like (25, 13, 48.343 ,'N')
"""
if value < 0:
loc_value = loc[0]
elif value > 0:
loc_value = loc[1]
else:
loc_value = ""
abs_value = abs(value)
deg = int(abs_value)
t1 = (abs_value-deg)*60
min = int(t1)
sec = round((t1 - min)* 60, 5)
return (deg, min, sec, loc_value)
def get_gps_location(elapsed_time, lat, lng, altitude):
lat_deg = to_deg(lat, ["S", "N"])
lng_deg = to_deg(lng, ["W", "E"])
exiv_lat = (float_to_rational(lat_deg[0]), float_to_rational(lat_deg[1]), float_to_rational(lat_deg[2]))
exiv_lng = (float_to_rational(lng_deg[0]), float_to_rational(lng_deg[1]), float_to_rational(lng_deg[2]))
gps_ifd = {
piexif.GPSIFD.GPSVersionID: (2, 0, 0, 0),
piexif.GPSIFD.GPSDateStamp: elapsed_time.strftime('%Y:%m:%d')
}
if lat is not None and lng is not None:
gps_ifd[piexif.GPSIFD.GPSLatitudeRef] = lat_deg[3]
gps_ifd[piexif.GPSIFD.GPSLatitude] = exiv_lat
gps_ifd[piexif.GPSIFD.GPSLongitudeRef] = lng_deg[3]
gps_ifd[piexif.GPSIFD.GPSLongitude] = exiv_lng
if altitude is not None:
gps_ifd[piexif.GPSIFD.GPSAltitudeRef] = 0
gps_ifd[piexif.GPSIFD.GPSAltitude] = float_to_rational(round(altitude))
return gps_ifd

Wyświetl plik

@ -34,3 +34,4 @@ onnxruntime==1.12.1
codem==0.24.0
trimesh==3.17.1
pandas==1.5.2
piexif==1.1.3

Wyświetl plik

@ -15,6 +15,7 @@ from opendm import ai
from opendm.skyremoval.skyfilter import SkyFilter
from opendm.bgfilter import BgFilter
from opendm.concurrency import parallel_map
from opendm.video.video2dataset import Parameters, Video2Dataset
def save_images_database(photos, database_file):
with open(database_file, 'w') as f:
@ -58,22 +59,25 @@ class ODMLoadDatasetStage(types.ODM_Stage):
except Exception as e:
log.ODM_WARNING("Cannot write benchmark file: %s" % str(e))
# check if the image filename is supported
def valid_image_filename(filename):
def valid_filename(filename, supported_extensions):
(pathfn, ext) = os.path.splitext(filename)
return ext.lower() in context.supported_extensions and pathfn[-5:] != "_mask"
return ext.lower() in supported_extensions and pathfn[-5:] != "_mask"
# Get supported images from dir
def get_images(in_dir):
entries = os.listdir(in_dir)
valid, rejects = [], []
for f in entries:
if valid_image_filename(f):
if valid_filename(f, context.supported_extensions):
valid.append(f)
else:
rejects.append(f)
return valid, rejects
def search_video_files(in_dir):
entries = os.listdir(in_dir)
return [os.path.join(in_dir, f) for f in entries if valid_filename(f, context.supported_video_extensions)]
def find_mask(photo_path, masks):
(pathfn, ext) = os.path.splitext(os.path.basename(photo_path))
k = "{}_mask".format(pathfn)
@ -85,6 +89,8 @@ class ODMLoadDatasetStage(types.ODM_Stage):
return mask
else:
log.ODM_WARNING("Image mask {} has a space. Spaces are currently not supported for image masks.".format(mask))
# get images directory
images_dir = tree.dataset_raw
@ -100,6 +106,51 @@ class ODMLoadDatasetStage(types.ODM_Stage):
if not os.path.exists(images_dir):
raise system.ExitException("There are no images in %s! Make sure that your project path and dataset name is correct. The current is set to: %s" % (images_dir, args.project_path))
# Check if we need to extract video frames
frames_db_file = os.path.join(images_dir, 'frames.json')
if not os.path.exists(frames_db_file) or self.rerun():
video_files = search_video_files(images_dir)
# If we're re-running the pipeline, and frames have been extracted during a previous run
# we need to remove those before re-extracting them
if len(video_files) > 0 and os.path.exists(frames_db_file) and self.rerun():
log.ODM_INFO("Re-run, removing previously extracted video frames")
frames = []
try:
with open(frames_db_file, 'r') as f:
frames = json.loads(f.read())
except Exception as e:
log.ODM_WARNING("Cannot check previous video extraction: %s" % str(e))
for f in frames:
fp = os.path.join(images_dir, f)
if os.path.isfile(fp):
os.remove(fp)
if len(video_files) > 0:
log.ODM_INFO("Found video files (%s), extracting frames" % len(video_files))
try:
params = Parameters({
"input": video_files,
"output": images_dir,
"blur_threshold": 300,
"distance_threshold": 10,
"black_ratio_threshold": 0.98,
"pixel_black_threshold": 0.30,
"use_srt": True,
"max_dimension": args.video_resolution,
"limit": args.video_limit,
})
v2d = Video2Dataset(params)
frames = v2d.ProcessVideo()
with open(frames_db_file, 'w') as f:
f.write(json.dumps([os.path.basename(f) for f in frames]))
except Exception as e:
log.ODM_WARNING("Could not extract video frames: %s" % str(e))
files, rejects = get_images(images_dir)
if files:
# create ODMPhoto list