kopia lustrzana https://github.com/OpenDroneMap/ODM
commit
643f92a66d
|
@ -259,6 +259,10 @@ Experimental flags need to be enabled in Docker to use the ```--squash``` flag.
|
|||
|
||||
After this, you must restart docker.
|
||||
|
||||
## Video Support
|
||||
|
||||
Starting from version 3.0.4, ODM can automatically extract images from video files (.mp4 or .mov). Just place one or more video files into the `images` folder and run the program as usual. Subtitles files (.srt) with GPS information are also supported. Place .srt files in the `images` folder, making sure that the filenames match. For example, `my_video.mp4` ==> `my_video.srt` (case-sensitive).
|
||||
|
||||
## Developers
|
||||
|
||||
Help improve our software! We welcome contributions from everyone, whether to add new features, improve speed, fix existing bugs or add support for more cameras. Check our [code of conduct](https://github.com/OpenDroneMap/documents/blob/master/CONDUCT.md), the [contributing guidelines](https://github.com/OpenDroneMap/documents/blob/master/CONTRIBUTING.md) and [how decisions are made](https://github.com/OpenDroneMap/documents/blob/master/GOVERNANCE.md#how-decisions-are-made).
|
||||
|
|
|
@ -55,7 +55,7 @@ ExternalProject_Add(${_proj_name}
|
|||
-DBUILD_opencv_photo=ON
|
||||
-DBUILD_opencv_legacy=ON
|
||||
-DBUILD_opencv_python3=ON
|
||||
-DWITH_FFMPEG=OFF
|
||||
-DWITH_FFMPEG=ON
|
||||
-DWITH_CUDA=OFF
|
||||
-DWITH_GTK=OFF
|
||||
-DWITH_VTK=OFF
|
||||
|
|
2
VERSION
2
VERSION
|
@ -1 +1 @@
|
|||
3.0.3
|
||||
3.0.4
|
||||
|
|
|
@ -653,6 +653,20 @@ def config(argv=None, parser=None):
|
|||
version='ODM {0}'.format(__version__),
|
||||
help='Displays version number and exits. ')
|
||||
|
||||
parser.add_argument('--video-limit',
|
||||
type=int,
|
||||
action=StoreValue,
|
||||
default=500,
|
||||
metavar='<positive integer>',
|
||||
help='Maximum number of frames to extract from video files for processing. Set to 0 for no limit. Default: %(default)s')
|
||||
|
||||
parser.add_argument('--video-resolution',
|
||||
type=int,
|
||||
action=StoreValue,
|
||||
default=4000,
|
||||
metavar='<positive integer>',
|
||||
help='The maximum output resolution of extracted video frames in pixels. Default: %(default)s')
|
||||
|
||||
parser.add_argument('--split',
|
||||
type=int,
|
||||
action=StoreValue,
|
||||
|
|
|
@ -41,6 +41,7 @@ settings_path = os.path.join(root_path, 'settings.yaml')
|
|||
|
||||
# Define supported image extensions
|
||||
supported_extensions = {'.jpg','.jpeg','.png', '.tif', '.tiff', '.bmp'}
|
||||
supported_video_extensions = {'.mp4', '.mov'}
|
||||
|
||||
# Define the number of cores
|
||||
num_cores = multiprocessing.cpu_count()
|
||||
|
|
|
@ -119,7 +119,6 @@ def parse_srs_header(header):
|
|||
:param header (str) line
|
||||
:return Proj object
|
||||
"""
|
||||
log.ODM_INFO('Parsing SRS header: %s' % header)
|
||||
header = header.strip()
|
||||
ref = header.split(' ')
|
||||
|
||||
|
@ -156,3 +155,14 @@ def parse_srs_header(header):
|
|||
raise RuntimeError(e)
|
||||
|
||||
return srs
|
||||
|
||||
def utm_srs_from_ll(lon, lat):
|
||||
utm_zone, hemisphere = get_utm_zone_and_hemisphere_from(lon, lat)
|
||||
return parse_srs_header("WGS84 UTM %s%s" % (utm_zone, hemisphere))
|
||||
|
||||
def utm_transformers_from_ll(lon, lat):
|
||||
source_srs = CRS.from_epsg(4326)
|
||||
target_srs = utm_srs_from_ll(lon, lat)
|
||||
ll_to_utm = transformer(source_srs, target_srs)
|
||||
utm_to_ll = transformer(target_srs, source_srs)
|
||||
return ll_to_utm, utm_to_ll
|
|
@ -0,0 +1,128 @@
|
|||
import cv2
|
||||
import numpy as np
|
||||
|
||||
class ThresholdBlurChecker:
|
||||
def __init__(self, threshold):
|
||||
self.threshold = threshold
|
||||
|
||||
def NeedPreProcess(self):
|
||||
return False
|
||||
|
||||
def PreProcess(self, video_path, start_frame, end_frame):
|
||||
return
|
||||
|
||||
def IsBlur(self, image_bw, id):
|
||||
var = cv2.Laplacian(image_bw, cv2.CV_64F).var()
|
||||
return var, var < self.threshold
|
||||
|
||||
class SimilarityChecker:
|
||||
def __init__(self, threshold, max_features=500):
|
||||
self.threshold = threshold
|
||||
self.max_features = max_features
|
||||
self.last_image = None
|
||||
self.last_image_id = None
|
||||
self.last_image_features = None
|
||||
|
||||
def IsSimilar(self, image_bw, id):
|
||||
|
||||
if self.last_image is None:
|
||||
self.last_image = image_bw
|
||||
self.last_image_id = id
|
||||
self.last_image_features = cv2.goodFeaturesToTrack(image_bw, self.max_features, 0.01, 10)
|
||||
return 0, False, None
|
||||
|
||||
# Detect features
|
||||
features, status, _ = cv2.calcOpticalFlowPyrLK(self.last_image, image_bw, self.last_image_features, None)
|
||||
|
||||
# Filter out the "bad" features (i.e. those that are not tracked successfully)
|
||||
good_features = features[status == 1]
|
||||
good_features2 = self.last_image_features[status == 1]
|
||||
|
||||
# Calculate the difference between the locations of the good features in the two frames
|
||||
distance = np.average(np.abs(good_features2 - good_features))
|
||||
|
||||
res = distance < self.threshold
|
||||
|
||||
if (not res):
|
||||
self.last_image = image_bw
|
||||
self.last_image_id = id
|
||||
self.last_image_features = cv2.goodFeaturesToTrack(image_bw, self.max_features, 0.01, 10)
|
||||
|
||||
return distance, res, self.last_image_id
|
||||
|
||||
|
||||
class NaiveBlackFrameChecker:
|
||||
def __init__(self, threshold):
|
||||
self.threshold = threshold
|
||||
|
||||
def PreProcess(self, video_path, start_frame, end_frame, width=800, height=600):
|
||||
return
|
||||
|
||||
def NeedPreProcess(self):
|
||||
return False
|
||||
|
||||
def IsBlack(self, image_bw, id):
|
||||
return np.average(image_bw) < self.threshold
|
||||
|
||||
|
||||
class BlackFrameChecker:
|
||||
def __init__(self, picture_black_ratio_th=0.98, pixel_black_th=0.30):
|
||||
self.picture_black_ratio_th = picture_black_ratio_th if picture_black_ratio_th is not None else 0.98
|
||||
self.pixel_black_th = pixel_black_th if pixel_black_th is not None else 0.30
|
||||
self.luminance_minimum_value = None
|
||||
self.luminance_range_size = None
|
||||
self.absolute_threshold = None
|
||||
|
||||
def NeedPreProcess(self):
|
||||
return True
|
||||
|
||||
def PreProcess(self, video_path, start_frame, end_frame):
|
||||
# Open video file
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
|
||||
# Set frame start and end indices
|
||||
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
|
||||
frame_end = end_frame
|
||||
if end_frame == -1:
|
||||
frame_end = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
# Initialize luminance range size and minimum value
|
||||
self.luminance_range_size = 0
|
||||
self.luminance_minimum_value = 255
|
||||
|
||||
frame_index = start_frame if start_frame is not None else 0
|
||||
|
||||
# Read and process frames from video file
|
||||
while (cap.isOpened() and (end_frame is None or frame_index <= end_frame)):
|
||||
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Convert frame to grayscale
|
||||
gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
gray_frame_min = gray_frame.min()
|
||||
gray_frame_max = gray_frame.max()
|
||||
|
||||
# Update luminance range size and minimum value
|
||||
self.luminance_range_size = max(self.luminance_range_size, gray_frame_max - gray_frame_min)
|
||||
self.luminance_minimum_value = min(self.luminance_minimum_value, gray_frame_min)
|
||||
|
||||
frame_index += 1
|
||||
|
||||
# Calculate absolute threshold for considering a pixel "black"
|
||||
self.absolute_threshold = self.luminance_minimum_value + self.pixel_black_th * self.luminance_range_size
|
||||
|
||||
# Close video file
|
||||
cap.release()
|
||||
|
||||
def IsBlack(self, image_bw, id):
|
||||
|
||||
# Count number of pixels < self.absolute_threshold
|
||||
nb_black_pixels = np.sum(image_bw < self.absolute_threshold)
|
||||
|
||||
# Calculate ratio of black pixels
|
||||
ratio_black_pixels = nb_black_pixels / (image_bw.shape[0] * image_bw.shape[1])
|
||||
|
||||
# Check if ratio of black pixels is above threshold
|
||||
return ratio_black_pixels >= self.picture_black_ratio_th
|
|
@ -0,0 +1,46 @@
|
|||
|
||||
import argparse
|
||||
import datetime
|
||||
import os
|
||||
|
||||
class Parameters:
|
||||
|
||||
def __init__(self, args):
|
||||
|
||||
# "input" -> path to input video file(s), use ',' to separate multiple files")
|
||||
# "output" -> path to output directory")
|
||||
# "start" -> start frame index")
|
||||
# "end" -> end frame index")
|
||||
# "output-resolution" -> Override output resolution (ex. 1024)")
|
||||
# "blur-threshold" -> blur measures that fall below this value will be considered 'blurry'. Good value is 300
|
||||
# "distance-threshold" -> distance measures that fall below this value will be considered 'similar'")
|
||||
# "black-ratio-threshold" -> Set the threshold for considering a frame 'black'. Express the minimum value for the ratio: nb_black_pixels / nb_pixels. Default value is 0.98")
|
||||
# "pixel-black-threshold" -> Set the threshold for considering a pixel 'black'. The threshold expresses the maximum pixel luminance value for which a pixel is considered 'black'. Good value is 0.30 (30%)")
|
||||
# "use-srt" -> Use SRT files for extracting metadata (same name as video file with .srt extension)")
|
||||
# "limit" -> Maximum number of output frames
|
||||
# "frame-format" -> frame format (jpg, png, tiff, etc.)")
|
||||
# "stats-file" -> Save statistics to csv file")
|
||||
|
||||
if not os.path.exists(args["output"]):
|
||||
os.makedirs(args["output"])
|
||||
|
||||
self.input = args["input"]
|
||||
if isinstance(self.input, str):
|
||||
self.input = [self.input]
|
||||
|
||||
self.output = args["output"]
|
||||
self.start = args.get("start", 0)
|
||||
self.end = args.get("end", None)
|
||||
self.limit = args.get("limit", None)
|
||||
self.blur_threshold = args.get("blur_threshold", None)
|
||||
self.distance_threshold = args.get("distance_threshold", None)
|
||||
self.black_ratio_threshold = args.get("black_ratio_threshold", None)
|
||||
self.pixel_black_threshold = args.get("pixel_black_threshold", None)
|
||||
self.use_srt = "use_srt" in args
|
||||
self.frame_format = args.get("frame_format", "jpg")
|
||||
self.max_dimension = args.get("max_dimension", None)
|
||||
|
||||
self.stats_file = args.get("stats_file", None)
|
||||
|
||||
# We will resize the image to this size before processing
|
||||
self.internal_resolution = 800
|
|
@ -0,0 +1,206 @@
|
|||
from datetime import datetime
|
||||
from opendm import location, log
|
||||
import re
|
||||
|
||||
|
||||
def match_single(regexes, line, dtype=int):
|
||||
if isinstance(regexes, str):
|
||||
regexes = [(regexes, dtype)]
|
||||
|
||||
for i in range(len(regexes)):
|
||||
if isinstance(regexes[i], str):
|
||||
regexes[i] = (regexes[i], dtype)
|
||||
|
||||
try:
|
||||
for r, transform in regexes:
|
||||
match = re.search(r, line)
|
||||
if match:
|
||||
res = match.group(1)
|
||||
return transform(res)
|
||||
except Exception as e:
|
||||
log.ODM_WARNING("Cannot parse SRT line \"%s\": %s", (line, str(e)))
|
||||
|
||||
return None
|
||||
|
||||
class SrtFileParser:
|
||||
def __init__(self, filename):
|
||||
self.filename = filename
|
||||
self.data = []
|
||||
self.gps_data = []
|
||||
self.ll_to_utm = None
|
||||
self.utm_to_ll = None
|
||||
|
||||
def get_entry(self, timestamp: datetime):
|
||||
if not self.data:
|
||||
self.parse()
|
||||
|
||||
# check min and max
|
||||
if timestamp < self.data[0]["start"] or timestamp > self.data[len(self.data) - 1]["end"]:
|
||||
return None
|
||||
|
||||
for entry in self.data:
|
||||
if entry["start"] <= timestamp and entry["end"] >= timestamp:
|
||||
return entry
|
||||
|
||||
return None
|
||||
|
||||
def get_gps(self, timestamp):
|
||||
if not self.data:
|
||||
self.parse()
|
||||
|
||||
# Initialize on first call
|
||||
prev_coords = None
|
||||
|
||||
if not self.gps_data:
|
||||
for d in self.data:
|
||||
lat, lon, alt = d.get('latitude'), d.get('longitude'), d.get('altitude')
|
||||
tm = d.get('start')
|
||||
|
||||
if lat is not None and lon is not None:
|
||||
if self.ll_to_utm is None:
|
||||
self.ll_to_utm, self.utm_to_ll = location.utm_transformers_from_ll(lon, lat)
|
||||
|
||||
coords = self.ll_to_utm.TransformPoint(lon, lat, alt)
|
||||
|
||||
# First or new (in X/Y only)
|
||||
add = (not len(self.gps_data)) or (coords[0], coords[1]) != (self.gps_data[-1][1][0], self.gps_data[-1][1][1])
|
||||
if add:
|
||||
self.gps_data.append((tm, coords))
|
||||
|
||||
# No data available
|
||||
if not len(self.gps_data) or self.gps_data[0][0] > timestamp:
|
||||
return None
|
||||
|
||||
# Interpolate
|
||||
start = None
|
||||
for i in range(len(self.gps_data)):
|
||||
tm, coords = self.gps_data[i]
|
||||
|
||||
# Perfect match
|
||||
if timestamp == tm:
|
||||
return self.utm_to_ll.TransformPoint(*coords)
|
||||
|
||||
elif tm > timestamp:
|
||||
end = i
|
||||
start = i - 1
|
||||
if start < 0:
|
||||
return None
|
||||
|
||||
gd_s = self.gps_data[start]
|
||||
gd_e = self.gps_data[end]
|
||||
sx, sy, sz = gd_s[1]
|
||||
ex, ey, ez = gd_e[1]
|
||||
|
||||
dt = (gd_e[0] - gd_s[0]).total_seconds()
|
||||
if dt >= 10:
|
||||
return None
|
||||
|
||||
dx = (ex - sx) / dt
|
||||
dy = (ey - sy) / dt
|
||||
dz = (ez - sz) / dt
|
||||
t = (timestamp - gd_s[0]).total_seconds()
|
||||
|
||||
return self.utm_to_ll.TransformPoint(
|
||||
sx + dx * t,
|
||||
sy + dy * t,
|
||||
sz + dz * t
|
||||
)
|
||||
|
||||
def parse(self):
|
||||
|
||||
# SRT metadata is not standarized, we support the following formats:
|
||||
|
||||
# DJI mavic air 2
|
||||
# 1
|
||||
# 00:00:00,000 --> 00:00:00,016
|
||||
# <font size="36">SrtCnt : 1, DiffTime : 16ms
|
||||
# 2023-01-06 18:56:48,380,821
|
||||
# [iso : 3200] [shutter : 1/60.0] [fnum : 280] [ev : 0] [ct : 3925] [color_md : default] [focal_len : 240] [latitude: 0.000000] [longitude: 0.000000] [altitude: 0.000000] </font>
|
||||
|
||||
# DJI Mavic Mini
|
||||
# 1
|
||||
# 00:00:00,000 --> 00:00:01,000
|
||||
# F/2.8, SS 206.14, ISO 150, EV 0, GPS (-82.6669, 27.7716, 10), D 2.80m, H 0.00m, H.S 0.00m/s, V.S 0.00m/s
|
||||
|
||||
with open(self.filename, 'r') as f:
|
||||
|
||||
iso = None
|
||||
shutter = None
|
||||
fnum = None
|
||||
focal_len = None
|
||||
latitude = None
|
||||
longitude = None
|
||||
altitude = None
|
||||
start = None
|
||||
end = None
|
||||
|
||||
for line in f:
|
||||
|
||||
# Check if line is empty
|
||||
if not line.strip():
|
||||
if start is not None:
|
||||
self.data.append({
|
||||
"start": start,
|
||||
"end": end,
|
||||
"iso": iso,
|
||||
"shutter": shutter,
|
||||
"fnum": fnum,
|
||||
"focal_len": focal_len,
|
||||
"latitude": latitude,
|
||||
"longitude": longitude,
|
||||
"altitude": altitude
|
||||
})
|
||||
|
||||
iso = None
|
||||
shutter = None
|
||||
fnum = None
|
||||
ct = None
|
||||
focal_len = None
|
||||
latitude = None
|
||||
longitude = None
|
||||
altitude = None
|
||||
start = None
|
||||
end = None
|
||||
|
||||
continue
|
||||
|
||||
# Remove html tags
|
||||
line = re.sub('<[^<]+?>', '', line)
|
||||
|
||||
# Search this "00:00:00,000 --> 00:00:00,016"
|
||||
match = re.search("(\d{2}:\d{2}:\d{2},\d+) --> (\d{2}:\d{2}:\d{2},\d+)", line)
|
||||
if match:
|
||||
start = datetime.strptime(match.group(1), "%H:%M:%S,%f")
|
||||
end = datetime.strptime(match.group(2), "%H:%M:%S,%f")
|
||||
|
||||
iso = match_single([
|
||||
"iso : (\d+)",
|
||||
"ISO (\d+)"
|
||||
], line)
|
||||
|
||||
shutter = match_single([
|
||||
"shutter : \d+/(\d+\.?\d*)"
|
||||
"SS (\d+\.?\d*)"
|
||||
], line)
|
||||
|
||||
fnum = match_single([
|
||||
("fnum : (\d+)", lambda v: float(v)/100.0),
|
||||
("F/([\d\.]+)", float),
|
||||
], line)
|
||||
|
||||
focal_len = match_single("focal_len : (\d+)", line)
|
||||
|
||||
latitude = match_single([
|
||||
("latitude: ([\d\.\-]+)", lambda v: float(v) if v != 0 else None),
|
||||
("GPS \([\d\.\-]+,? ([\d\.\-]+),? [\d\.\-]+\)", lambda v: float(v) if v != 0 else None),
|
||||
], line)
|
||||
|
||||
longitude = match_single([
|
||||
("longitude: ([\d\.\-]+)", lambda v: float(v) if v != 0 else None),
|
||||
("GPS \(([\d\.\-]+),? [\d\.\-]+,? [\d\.\-]+\)", lambda v: float(v) if v != 0 else None),
|
||||
], line)
|
||||
|
||||
altitude = match_single([
|
||||
("altitude: ([\d\.\-]+)", lambda v: float(v) if v != 0 else None),
|
||||
("GPS \([\d\.\-]+,? [\d\.\-]+,? ([\d\.\-]+)\)", lambda v: float(v) if v != 0 else None),
|
||||
], line)
|
|
@ -0,0 +1,351 @@
|
|||
import datetime
|
||||
from fractions import Fraction
|
||||
import io
|
||||
from math import ceil, floor
|
||||
import time
|
||||
import cv2
|
||||
import os
|
||||
import collections
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import piexif
|
||||
from opendm import log
|
||||
from opendm.video.srtparser import SrtFileParser
|
||||
from opendm.video.parameters import Parameters
|
||||
from opendm.video.checkers import BlackFrameChecker, SimilarityChecker, ThresholdBlurChecker
|
||||
|
||||
class Video2Dataset:
|
||||
|
||||
def __init__(self, parameters : Parameters):
|
||||
self.parameters = parameters
|
||||
|
||||
self.blur_checker = ThresholdBlurChecker(parameters.blur_threshold) if parameters.blur_threshold is not None else None
|
||||
self.similarity_checker = SimilarityChecker(parameters.distance_threshold) if parameters.distance_threshold is not None else None
|
||||
self.black_checker = BlackFrameChecker(parameters.black_ratio_threshold, parameters.pixel_black_threshold) if parameters.black_ratio_threshold is not None or parameters.pixel_black_threshold is not None else None
|
||||
|
||||
self.frame_index = parameters.start
|
||||
self.f = None
|
||||
|
||||
|
||||
def ProcessVideo(self):
|
||||
self.date_now = None
|
||||
start = time.time()
|
||||
|
||||
if (self.parameters.stats_file is not None):
|
||||
self.f = open(self.parameters.stats_file, "w")
|
||||
self.f.write("global_idx;file_name;frame_index;blur_score;is_blurry;is_black;last_frame_index;similarity_score;is_similar;written\n")
|
||||
|
||||
self.global_idx = 0
|
||||
|
||||
output_file_paths = []
|
||||
|
||||
# foreach input file
|
||||
for input_file in self.parameters.input:
|
||||
# get file name
|
||||
file_name = os.path.basename(input_file)
|
||||
log.ODM_INFO("Processing video: {}".format(input_file))
|
||||
|
||||
# get video info
|
||||
video_info = get_video_info(input_file)
|
||||
log.ODM_INFO(video_info)
|
||||
|
||||
# Set pseudo start time
|
||||
if self.date_now is None:
|
||||
try:
|
||||
self.date_now = datetime.datetime.fromtimestamp(os.path.getmtime(input_file))
|
||||
except:
|
||||
self.date_now = datetime.datetime.now()
|
||||
else:
|
||||
self.date_now += datetime.timedelta(seconds=video_info.total_frames / video_info.frame_rate)
|
||||
|
||||
log.ODM_INFO("Use pseudo start time: %s" % self.date_now)
|
||||
|
||||
if self.parameters.use_srt:
|
||||
|
||||
name = os.path.splitext(input_file)[0]
|
||||
|
||||
srt_files = [name + ".srt", name + ".SRT"]
|
||||
srt_parser = None
|
||||
|
||||
for srt_file in srt_files:
|
||||
if os.path.exists(srt_file):
|
||||
log.ODM_INFO("Loading SRT file: {}".format(srt_file))
|
||||
try:
|
||||
srt_parser = SrtFileParser(srt_file)
|
||||
srt_parser.parse()
|
||||
break
|
||||
except Exception as e:
|
||||
log.ODM_INFO("Error parsing SRT file: {}".format(e))
|
||||
srt_parser = None
|
||||
else:
|
||||
srt_parser = None
|
||||
|
||||
if (self.black_checker is not None and self.black_checker.NeedPreProcess()):
|
||||
start2 = time.time()
|
||||
log.ODM_INFO("Preprocessing for black frame checker... this might take a bit")
|
||||
self.black_checker.PreProcess(input_file, self.parameters.start, self.parameters.end)
|
||||
end = time.time()
|
||||
log.ODM_INFO("Preprocessing time: {:.2f}s".format(end - start2))
|
||||
log.ODM_INFO("Calculated luminance_range_size is {}".format(self.black_checker.luminance_range_size))
|
||||
log.ODM_INFO("Calculated luminance_minimum_value is {}".format(self.black_checker.luminance_minimum_value))
|
||||
log.ODM_INFO("Calculated absolute_threshold is {}".format(self.black_checker.absolute_threshold))
|
||||
|
||||
# open video file
|
||||
cap = cv2.VideoCapture(input_file)
|
||||
if (not cap.isOpened()):
|
||||
log.ODM_INFO("Error opening video stream or file")
|
||||
return
|
||||
|
||||
if (self.parameters.start is not None):
|
||||
cap.set(cv2.CAP_PROP_POS_FRAMES, self.parameters.start)
|
||||
self.frame_index = self.parameters.start
|
||||
start_frame = self.parameters.start
|
||||
else:
|
||||
start_frame = 0
|
||||
|
||||
frames_to_process = self.parameters.end - start_frame + 1 if (self.parameters.end is not None) else video_info.total_frames - start_frame
|
||||
|
||||
progress = 0
|
||||
while (cap.isOpened()):
|
||||
ret, frame = cap.read()
|
||||
|
||||
if not ret:
|
||||
break
|
||||
|
||||
if (self.parameters.end is not None and self.frame_index > self.parameters.end):
|
||||
break
|
||||
|
||||
# Calculate progress percentage
|
||||
prev_progress = progress
|
||||
progress = floor((self.frame_index - start_frame + 1) / frames_to_process * 100)
|
||||
if progress != prev_progress:
|
||||
print("[{}][{:3d}%] Processing frame {}/{}: ".format(file_name, progress, self.frame_index - start_frame + 1, frames_to_process), end="\r")
|
||||
|
||||
stats = self.ProcessFrame(frame, video_info, srt_parser)
|
||||
|
||||
if stats is not None and self.parameters.stats_file is not None:
|
||||
self.WriteStats(input_file, stats)
|
||||
|
||||
# Add element to array
|
||||
if stats is not None and "written" in stats.keys():
|
||||
output_file_paths.append(stats["path"])
|
||||
|
||||
cap.release()
|
||||
|
||||
if self.f is not None:
|
||||
self.f.close()
|
||||
|
||||
if self.parameters.limit is not None and self.parameters.limit > 0 and self.global_idx >= self.parameters.limit:
|
||||
log.ODM_INFO("Limit of {} frames reached, trimming dataset".format(self.parameters.limit))
|
||||
output_file_paths = limit_files(output_file_paths, self.parameters.limit)
|
||||
|
||||
end = time.time()
|
||||
log.ODM_INFO("Total processing time: {:.2f}s".format(end - start))
|
||||
return output_file_paths
|
||||
|
||||
|
||||
def ProcessFrame(self, frame, video_info, srt_parser):
|
||||
|
||||
res = {"frame_index": self.frame_index, "global_idx": self.global_idx}
|
||||
|
||||
frame_bw = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
h, w = frame_bw.shape
|
||||
resolution = self.parameters.internal_resolution
|
||||
if resolution < w or resolution < h:
|
||||
m = max(w, h)
|
||||
factor = resolution / m
|
||||
frame_bw = cv2.resize(frame_bw, (int(ceil(w * factor)), int(ceil(h * factor))), interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
if (self.blur_checker is not None):
|
||||
blur_score, is_blurry = self.blur_checker.IsBlur(frame_bw, self.frame_index)
|
||||
res["blur_score"] = blur_score
|
||||
res["is_blurry"] = is_blurry
|
||||
|
||||
if is_blurry:
|
||||
# print ("blurry, skipping")
|
||||
self.frame_index += 1
|
||||
return res
|
||||
|
||||
if (self.black_checker is not None):
|
||||
is_black = self.black_checker.IsBlack(frame_bw, self.frame_index)
|
||||
res["is_black"] = is_black
|
||||
|
||||
if is_black:
|
||||
# print ("black, skipping")
|
||||
self.frame_index += 1
|
||||
return res
|
||||
|
||||
if (self.similarity_checker is not None):
|
||||
similarity_score, is_similar, last_frame_index = self.similarity_checker.IsSimilar(frame_bw, self.frame_index)
|
||||
res["similarity_score"] = similarity_score
|
||||
res["is_similar"] = is_similar
|
||||
res["last_frame_index"] = last_frame_index
|
||||
|
||||
if is_similar:
|
||||
# print ("similar to {}, skipping".format(self.similarity_checker.last_image_id))
|
||||
self.frame_index += 1
|
||||
return res
|
||||
|
||||
path = self.SaveFrame(frame, video_info, srt_parser)
|
||||
res["written"] = True
|
||||
res["path"] = path
|
||||
self.frame_index += 1
|
||||
self.global_idx += 1
|
||||
|
||||
return res
|
||||
|
||||
def SaveFrame(self, frame, video_info, srt_parser: SrtFileParser):
|
||||
max_dim = self.parameters.max_dimension
|
||||
if max_dim is not None:
|
||||
h, w, _ = frame.shape
|
||||
if max_dim < w or max_dim < h:
|
||||
m = max(w, h)
|
||||
factor = max_dim / m
|
||||
frame = cv2.resize(frame, (int(ceil(w * factor)), int(ceil(h * factor))), interpolation=cv2.INTER_AREA)
|
||||
|
||||
path = os.path.join(self.parameters.output,
|
||||
"{}_{}_{}.{}".format(video_info.basename, self.global_idx, self.frame_index, self.parameters.frame_format))
|
||||
|
||||
_, buf = cv2.imencode('.' + self.parameters.frame_format, frame)
|
||||
|
||||
delta = datetime.timedelta(seconds=(self.frame_index / video_info.frame_rate))
|
||||
elapsed_time = datetime.datetime(1900, 1, 1) + delta
|
||||
|
||||
img = Image.open(io.BytesIO(buf))
|
||||
|
||||
entry = gps_coords = None
|
||||
if srt_parser is not None:
|
||||
entry = srt_parser.get_entry(elapsed_time)
|
||||
gps_coords = srt_parser.get_gps(elapsed_time)
|
||||
|
||||
exif_time = (elapsed_time + (self.date_now - datetime.datetime(1900, 1, 1)))
|
||||
elapsed_time_str = exif_time.strftime("%Y:%m:%d %H:%M:%S")
|
||||
subsec_time_str = exif_time.strftime("%f")
|
||||
|
||||
# Exif dict contains the following keys: '0th', 'Exif', 'GPS', '1st', 'thumbnail'
|
||||
# Set the EXIF metadata
|
||||
exif_dict = {
|
||||
"0th": {
|
||||
piexif.ImageIFD.Software: "ODM",
|
||||
piexif.ImageIFD.DateTime: elapsed_time_str,
|
||||
piexif.ImageIFD.XResolution: (frame.shape[1], 1),
|
||||
piexif.ImageIFD.YResolution: (frame.shape[0], 1),
|
||||
piexif.ImageIFD.Make: "DJI" if video_info.basename.lower().startswith("dji") else "Unknown",
|
||||
piexif.ImageIFD.Model: "Unknown"
|
||||
},
|
||||
"Exif": {
|
||||
piexif.ExifIFD.DateTimeOriginal: elapsed_time_str,
|
||||
piexif.ExifIFD.DateTimeDigitized: elapsed_time_str,
|
||||
piexif.ExifIFD.SubSecTime: subsec_time_str,
|
||||
piexif.ExifIFD.PixelXDimension: frame.shape[1],
|
||||
piexif.ExifIFD.PixelYDimension: frame.shape[0],
|
||||
}}
|
||||
|
||||
if entry is not None:
|
||||
if entry["shutter"] is not None:
|
||||
exif_dict["Exif"][piexif.ExifIFD.ExposureTime] = (1, int(entry["shutter"]))
|
||||
if entry["focal_len"] is not None:
|
||||
exif_dict["Exif"][piexif.ExifIFD.FocalLength] = (entry["focal_len"], 100)
|
||||
if entry["fnum"] is not None:
|
||||
exif_dict["Exif"][piexif.ExifIFD.FNumber] = float_to_rational(entry["fnum"])
|
||||
if entry["iso"] is not None:
|
||||
exif_dict["Exif"][piexif.ExifIFD.ISOSpeedRatings] = entry["iso"]
|
||||
|
||||
if gps_coords is not None:
|
||||
exif_dict["GPS"] = get_gps_location(elapsed_time, gps_coords[1], gps_coords[0], gps_coords[2])
|
||||
|
||||
exif_bytes = piexif.dump(exif_dict)
|
||||
img.save(path, exif=exif_bytes, quality=95)
|
||||
|
||||
return path
|
||||
|
||||
|
||||
def WriteStats(self, input_file, stats):
|
||||
self.f.write("{};{};{};{};{};{};{};{};{};{}\n".format(
|
||||
stats["global_idx"],
|
||||
input_file,
|
||||
stats["frame_index"],
|
||||
stats["blur_score"] if "blur_score" in stats else "",
|
||||
stats["is_blurry"] if "is_blurry" in stats else "",
|
||||
stats["is_black"] if "is_black" in stats else "",
|
||||
stats["last_frame_index"] if "last_frame_index" in stats else "",
|
||||
stats["similarity_score"] if "similarity_score" in stats else "",
|
||||
stats["is_similar"] if "is_similar" in stats else "",
|
||||
stats["written"] if "written" in stats else "").replace(".", ","))
|
||||
|
||||
|
||||
def get_video_info(input_file):
|
||||
|
||||
video = cv2.VideoCapture(input_file)
|
||||
basename = os.path.splitext(os.path.basename(input_file))[0]
|
||||
|
||||
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
frame_rate = video.get(cv2.CAP_PROP_FPS)
|
||||
|
||||
video.release()
|
||||
|
||||
return collections.namedtuple("VideoInfo", ["total_frames", "frame_rate", "basename"])(total_frames, frame_rate, basename)
|
||||
|
||||
def float_to_rational(f):
|
||||
f = Fraction(f).limit_denominator()
|
||||
return (f.numerator, f.denominator)
|
||||
|
||||
def limit_files(paths, limit):
|
||||
if len(paths) <= limit:
|
||||
return paths
|
||||
|
||||
to_keep = []
|
||||
all_idxes = np.arange(0, len(paths))
|
||||
keep_idxes = np.linspace(0, len(paths) - 1, limit, dtype=int)
|
||||
remove_idxes = set(all_idxes) - set(keep_idxes)
|
||||
|
||||
p = np.array(paths)
|
||||
to_keep = list(p[keep_idxes])
|
||||
|
||||
for idx in remove_idxes:
|
||||
os.remove(paths[idx])
|
||||
|
||||
return to_keep
|
||||
|
||||
def to_deg(value, loc):
|
||||
"""convert decimal coordinates into degrees, munutes and seconds tuple
|
||||
Keyword arguments: value is float gps-value, loc is direction list ["S", "N"] or ["W", "E"]
|
||||
return: tuple like (25, 13, 48.343 ,'N')
|
||||
"""
|
||||
if value < 0:
|
||||
loc_value = loc[0]
|
||||
elif value > 0:
|
||||
loc_value = loc[1]
|
||||
else:
|
||||
loc_value = ""
|
||||
abs_value = abs(value)
|
||||
deg = int(abs_value)
|
||||
t1 = (abs_value-deg)*60
|
||||
min = int(t1)
|
||||
sec = round((t1 - min)* 60, 5)
|
||||
return (deg, min, sec, loc_value)
|
||||
|
||||
def get_gps_location(elapsed_time, lat, lng, altitude):
|
||||
|
||||
lat_deg = to_deg(lat, ["S", "N"])
|
||||
lng_deg = to_deg(lng, ["W", "E"])
|
||||
|
||||
exiv_lat = (float_to_rational(lat_deg[0]), float_to_rational(lat_deg[1]), float_to_rational(lat_deg[2]))
|
||||
exiv_lng = (float_to_rational(lng_deg[0]), float_to_rational(lng_deg[1]), float_to_rational(lng_deg[2]))
|
||||
|
||||
gps_ifd = {
|
||||
piexif.GPSIFD.GPSVersionID: (2, 0, 0, 0),
|
||||
piexif.GPSIFD.GPSDateStamp: elapsed_time.strftime('%Y:%m:%d')
|
||||
}
|
||||
|
||||
if lat is not None and lng is not None:
|
||||
gps_ifd[piexif.GPSIFD.GPSLatitudeRef] = lat_deg[3]
|
||||
gps_ifd[piexif.GPSIFD.GPSLatitude] = exiv_lat
|
||||
gps_ifd[piexif.GPSIFD.GPSLongitudeRef] = lng_deg[3]
|
||||
gps_ifd[piexif.GPSIFD.GPSLongitude] = exiv_lng
|
||||
if altitude is not None:
|
||||
gps_ifd[piexif.GPSIFD.GPSAltitudeRef] = 0
|
||||
gps_ifd[piexif.GPSIFD.GPSAltitude] = float_to_rational(round(altitude))
|
||||
|
||||
return gps_ifd
|
|
@ -34,3 +34,4 @@ onnxruntime==1.12.1
|
|||
codem==0.24.0
|
||||
trimesh==3.17.1
|
||||
pandas==1.5.2
|
||||
piexif==1.1.3
|
||||
|
|
|
@ -15,6 +15,7 @@ from opendm import ai
|
|||
from opendm.skyremoval.skyfilter import SkyFilter
|
||||
from opendm.bgfilter import BgFilter
|
||||
from opendm.concurrency import parallel_map
|
||||
from opendm.video.video2dataset import Parameters, Video2Dataset
|
||||
|
||||
def save_images_database(photos, database_file):
|
||||
with open(database_file, 'w') as f:
|
||||
|
@ -58,22 +59,25 @@ class ODMLoadDatasetStage(types.ODM_Stage):
|
|||
except Exception as e:
|
||||
log.ODM_WARNING("Cannot write benchmark file: %s" % str(e))
|
||||
|
||||
# check if the image filename is supported
|
||||
def valid_image_filename(filename):
|
||||
def valid_filename(filename, supported_extensions):
|
||||
(pathfn, ext) = os.path.splitext(filename)
|
||||
return ext.lower() in context.supported_extensions and pathfn[-5:] != "_mask"
|
||||
return ext.lower() in supported_extensions and pathfn[-5:] != "_mask"
|
||||
|
||||
# Get supported images from dir
|
||||
def get_images(in_dir):
|
||||
entries = os.listdir(in_dir)
|
||||
valid, rejects = [], []
|
||||
for f in entries:
|
||||
if valid_image_filename(f):
|
||||
if valid_filename(f, context.supported_extensions):
|
||||
valid.append(f)
|
||||
else:
|
||||
rejects.append(f)
|
||||
return valid, rejects
|
||||
|
||||
def search_video_files(in_dir):
|
||||
entries = os.listdir(in_dir)
|
||||
return [os.path.join(in_dir, f) for f in entries if valid_filename(f, context.supported_video_extensions)]
|
||||
|
||||
def find_mask(photo_path, masks):
|
||||
(pathfn, ext) = os.path.splitext(os.path.basename(photo_path))
|
||||
k = "{}_mask".format(pathfn)
|
||||
|
@ -86,6 +90,8 @@ class ODMLoadDatasetStage(types.ODM_Stage):
|
|||
else:
|
||||
log.ODM_WARNING("Image mask {} has a space. Spaces are currently not supported for image masks.".format(mask))
|
||||
|
||||
|
||||
|
||||
# get images directory
|
||||
images_dir = tree.dataset_raw
|
||||
|
||||
|
@ -100,6 +106,51 @@ class ODMLoadDatasetStage(types.ODM_Stage):
|
|||
if not os.path.exists(images_dir):
|
||||
raise system.ExitException("There are no images in %s! Make sure that your project path and dataset name is correct. The current is set to: %s" % (images_dir, args.project_path))
|
||||
|
||||
# Check if we need to extract video frames
|
||||
frames_db_file = os.path.join(images_dir, 'frames.json')
|
||||
if not os.path.exists(frames_db_file) or self.rerun():
|
||||
video_files = search_video_files(images_dir)
|
||||
|
||||
# If we're re-running the pipeline, and frames have been extracted during a previous run
|
||||
# we need to remove those before re-extracting them
|
||||
if len(video_files) > 0 and os.path.exists(frames_db_file) and self.rerun():
|
||||
log.ODM_INFO("Re-run, removing previously extracted video frames")
|
||||
frames = []
|
||||
try:
|
||||
with open(frames_db_file, 'r') as f:
|
||||
frames = json.loads(f.read())
|
||||
except Exception as e:
|
||||
log.ODM_WARNING("Cannot check previous video extraction: %s" % str(e))
|
||||
|
||||
for f in frames:
|
||||
fp = os.path.join(images_dir, f)
|
||||
if os.path.isfile(fp):
|
||||
os.remove(fp)
|
||||
|
||||
if len(video_files) > 0:
|
||||
log.ODM_INFO("Found video files (%s), extracting frames" % len(video_files))
|
||||
|
||||
try:
|
||||
params = Parameters({
|
||||
"input": video_files,
|
||||
"output": images_dir,
|
||||
|
||||
"blur_threshold": 300,
|
||||
"distance_threshold": 10,
|
||||
"black_ratio_threshold": 0.98,
|
||||
"pixel_black_threshold": 0.30,
|
||||
"use_srt": True,
|
||||
"max_dimension": args.video_resolution,
|
||||
"limit": args.video_limit,
|
||||
})
|
||||
v2d = Video2Dataset(params)
|
||||
frames = v2d.ProcessVideo()
|
||||
|
||||
with open(frames_db_file, 'w') as f:
|
||||
f.write(json.dumps([os.path.basename(f) for f in frames]))
|
||||
except Exception as e:
|
||||
log.ODM_WARNING("Could not extract video frames: %s" % str(e))
|
||||
|
||||
files, rejects = get_images(images_dir)
|
||||
if files:
|
||||
# create ODMPhoto list
|
||||
|
|
Ładowanie…
Reference in New Issue