Removed pymediainfo, added output limit, improved srt parser and fixed gps exif writing

pull/1567/head
Luca Di Leo 2023-01-13 12:08:16 +01:00
rodzic 02257a62cd
commit 60125010f2
4 zmienionych plików z 165 dodań i 179 usunięć

Wyświetl plik

@ -19,6 +19,7 @@ class Parameters:
timezone = None
frame_format = None
stats_file = None
limit = None
def __init__(self, args):
@ -27,13 +28,12 @@ class Parameters:
# "start" -> start frame index")
# "end" -> end frame index")
# "output-resolution" -> Override output resolution (ex. 640x480)")
# "blur-percentage" -> discard the lowest X percent of frames based on blur score (allowed values from 0.0 to 1.0)")
# "blur-threshold" -> blur measures that fall below this value will be considered 'blurry' (to be used in exclusion with --blur-percentage)")
# "blur-threshold" -> blur measures that fall below this value will be considered 'blurry'. Good value is 300
# "distance-threshold" -> distance measures that fall below this value will be considered 'similar'")
# "black-ratio-threshold" -> Set the threshold for considering a frame 'black'. Express the minimum value for the ratio: nb_black_pixels / nb_pixels. Default value is 0.98")
# "pixel-black-threshold" -> Set the threshold for considering a pixel 'black'. The threshold expresses the maximum pixel luminance value for which a pixel is considered 'black'. Good value is 0.30 (30%)")
# "use-srt" -> Use SRT files for extracting metadata (same name as video file with .srt extension)")
# "timezone" -> UTC timezone offset (ex. -5 for EST). Default to local timezone")
# "limit" -> Maximum number of output frames
# "frame-format" -> frame format (jpg, png, tiff, etc.)")
# "stats-file" -> Save statistics to csv file")
@ -50,7 +50,8 @@ class Parameters:
self.start = args["start"] if args["start"] else 0
self.end = args["end"] if args["end"] else None
self.blur_percentage = args["blur_percentage"] if args["blur_percentage"] else None
self.limit = args["limit"] if args["limit"] else None
self.blur_threshold = args["blur_threshold"] if args["blur_threshold"] else None
self.distance_threshold = args["distance_threshold"] if args["distance_threshold"] else None
@ -60,8 +61,6 @@ class Parameters:
self.use_srt = args["use_srt"]
self.utc_offset = datetime.timedelta(hours=int(args["timezone"])) if args["timezone"] != "local" else datetime.datetime.now() - datetime.datetime.utcnow()
self.frame_format = args["frame_format"]
self.output_resolution = tuple(map(int, args["output_resolution"].split("x"))) if args["output_resolution"] else None
@ -88,29 +87,19 @@ class Parameters:
print("Start frame index must be greater than 0")
return False
if args["limit"] and args["limit"] < 0:
print("Limit must be greater than 0")
return False
if args["end"]:
if args["end"] < 0:
print("End frame index must be greater than 0")
return False
if args["end"] < args["start"]:
if args["start"] is not None and args["end"] < args["start"]:
print("End frame index must be greater than start frame index")
return False
if args["timezone"] and args["timezone"] != "local":
try:
val = int(args["timezone"])
if val < -12 or val > 14:
print("Timezone must be in the range -12 to 14")
return False
except ValueError:
print("Timezone must be a valid integer")
return False
if args["blur_percentage"] and (args["blur_percentage"] < 0 or args["blur_percentage"] > 1):
print("Blur percentage must be in the range 0.0 to 1.0")
return False
if args["blur_threshold"] and args["blur_threshold"] < 0:
print("Blur threshold must be greater than 0")
return False

Wyświetl plik

@ -1,24 +1,23 @@
from datetime import datetime, timedelta
from datetime import datetime
import re
class SrtFileParser:
def __init__(self, filename, utc_offset):
def __init__(self, filename):
self.filename = filename
self.data = []
self.utc_offset = utc_offset
def get_entry(self, timestamp):
def get_entry(self, timestamp: datetime):
if not self.data:
self.parse()
# check min and max
if timestamp < self.min or timestamp > self.max:
if timestamp < self.data[0]["start"] or timestamp > self.data[len(self.data) - 1]["end"]:
return None
for entry in self.data:
if entry["timestamp"] <= timestamp:
if entry["start"] <= timestamp and entry["end"] >= timestamp:
return entry
return self.data[len(self.data) - 1]
return None
def parse(self):
@ -30,17 +29,32 @@ class SrtFileParser:
# 2023-01-06 18:56:48,380,821
# [iso : 3200] [shutter : 1/60.0] [fnum : 280] [ev : 0] [ct : 3925] [color_md : default] [focal_len : 240] [latitude: 0.000000] [longitude: 0.000000] [altitude: 0.000000] </font>
self.min = datetime.max
self.max = datetime.min
with open(self.filename, 'r') as f:
srtcnt = None
difftime = None
timestamp = None
iso = None
shutter = None
fnum = None
ev = None
ct = None
color_md = None
focal_len = None
latitude = None
longitude = None
altitude = None
start = None
end = None
for line in f:
# Check if line is empty
if not line.strip():
if srtcnt is not None:
self.data.append({
"start": start,
"end": end,
"srtcnt": srtcnt,
"difftime": difftime,
"timestamp": timestamp,
@ -55,9 +69,6 @@ class SrtFileParser:
"longitude": longitude,
"altitude": altitude
})
self.min = min(self.min, timestamp)
# account for the difftime milliseconds to get the actual max
self.max = max(self.max, timestamp + timedelta(milliseconds=difftime))
srtcnt = None
difftime = None
@ -72,11 +83,21 @@ class SrtFileParser:
latitude = None
longitude = None
altitude = None
start = None
end = None
continue
# Remove the html font tag
line = re.sub('<[^<]+?>', '', line)
# Search this "00:00:00,000 --> 00:00:00,016"
match = re.search("(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})", line)
if match:
start = datetime.strptime(match.group(1), "%H:%M:%S,%f")
end = datetime.strptime(match.group(2), "%H:%M:%S,%f")
match = re.search("SrtCnt : (\d+)", line)
if match:
srtcnt = int(match.group(1))
@ -89,8 +110,6 @@ class SrtFileParser:
if match:
timestamp = match.group(1)
timestamp = datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S,%f")
# The timestamp is in local time, so we need to subtract the UTC offset
timestamp = timestamp - self.utc_offset
match = re.search("iso : (\d+)", line)
if match:
@ -134,9 +153,3 @@ class SrtFileParser:
if match:
altitude = float(match.group(1))
altitude = altitude if altitude != 0 else None
self.data.reverse()
self.max = self.max.replace(microsecond=0)
self.min = self.min.replace(microsecond=0)

Wyświetl plik

@ -2,31 +2,21 @@ from opendm.video.parameters import Parameters
import datetime
from fractions import Fraction
import io
from math import floor
from math import ceil, floor
import time
import cv2
import os
import pymediainfo
import collections
from PIL import Image
from checkers import BlackFrameChecker, PercentageBlurChecker, SimilarityChecker, ThresholdBlurChecker
from srtparser import SrtFileParser
import piexif
class Video2Dataset:
def __init__(self, parameters : Parameters):
self.parameters = parameters
# We prioritize blur threshold over blur percentage.
if parameters.blur_threshold is not None:
self.blur_checker = ThresholdBlurChecker(parameters.blur_threshold)
else:
if parameters.blur_percentage is not None:
self.blur_checker = PercentageBlurChecker(parameters.blur_percentage)
else:
self.blur_checker = None
self.blur_checker = ThresholdBlurChecker(parameters.blur_threshold) if parameters.blur_threshold is not None else None
self.similarity_checker = SimilarityChecker(parameters.distance_threshold) if parameters.distance_threshold is not None else None
self.black_checker = BlackFrameChecker(parameters.black_ratio_threshold, parameters.pixel_black_threshold) if parameters.black_ratio_threshold is not None or parameters.pixel_black_threshold is not None else None
@ -52,26 +42,26 @@ class Video2Dataset:
print("Processing video: {}".format(input_file))
# get video info
video_info = self.GetVideoInfo(input_file)
video_info = get_video_info(input_file)
print(video_info)
utc_offset = self.parameters.utc_offset if self.parameters.utc_offset is not None else video_info.utc_offset
if self.parameters.use_srt:
srt_file = os.path.splitext(input_file)[0] + ".srt"
if os.path.exists(srt_file):
print("Loading SRT file: {}".format(srt_file))
srt_parser = SrtFileParser(srt_file, utc_offset)
srt_parser.parse()
else:
srt_file = os.path.splitext(input_file)[0] + ".SRT"
name = os.path.splitext(input_file)[0]
srt_files = [name + ".srt", name + ".SRT"]
srt_parser = None
for srt_file in srt_files:
if os.path.exists(srt_file):
print("Loading SRT file: {}".format(srt_file))
srt_parser = SrtFileParser(srt_file, utc_offset)
srt_parser.parse()
else:
print("SRT file not found: {}".format(srt_file))
srt_parser = None
try:
srt_parser = SrtFileParser(srt_file)
srt_parser.parse()
break
except Exception as e:
print("Error parsing SRT file: {}".format(e))
srt_parser = None
else:
srt_parser = None
@ -98,8 +88,6 @@ class Video2Dataset:
print("Error opening video stream or file")
return
frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if (self.parameters.start is not None):
cap.set(cv2.CAP_PROP_POS_FRAMES, self.parameters.start)
self.frame_index = self.parameters.start
@ -107,7 +95,9 @@ class Video2Dataset:
else:
start_frame = 0
frames_to_process = self.parameters.end - start_frame + 1 if (self.parameters.end is not None) else frames_count - start_frame
frames_to_process = self.parameters.end - start_frame + 1 if (self.parameters.end is not None) else video_info.total_frames - start_frame
output_file_paths = []
while (cap.isOpened()):
ret, frame = cap.read()
@ -127,11 +117,19 @@ class Video2Dataset:
if stats is not None and self.parameters.stats_file is not None:
self.WriteStats(input_file, stats)
# Add element to array
if stats is not None and "written" in stats.keys():
output_file_paths.append(stats["path"])
cap.release()
if self.f is not None:
self.f.close()
if self.parameters.limit is not None and self.global_idx >= self.parameters.limit:
print("Limit of {} frames reached, trimming dataset to {} frames".format(self.parameters.limit, self.global_idx))
limit_files(output_file_paths, self.parameters.limit)
end = time.time()
print("Total processing time: {:.2f}s".format(end - start))
@ -173,8 +171,9 @@ class Video2Dataset:
self.frame_index += 1
return res
self.SaveFrame(frame, video_info, srt_parser)
path = self.SaveFrame(frame, video_info, srt_parser)
res["written"] = True
res["path"] = path
self.frame_index += 1
self.global_idx += 1
@ -192,40 +191,38 @@ class Video2Dataset:
_, buf = cv2.imencode('.' + self.parameters.frame_format, frame)
start_time_utc = video_info.start_time_utc if video_info.start_time_utc is not None \
else srt_parser.data[0].timestamp if srt_parser is not None \
else datetime.datetime.now()
#start_time_utc = video_info.start_time_utc if video_info.start_time_utc is not None \
# else srt_parser.data[0].timestamp if srt_parser is not None \
# else datetime.datetime.now()
elapsed_time_utc = start_time_utc + datetime.timedelta(seconds=(self.frame_index / video_info.frame_rate))
elapsed_time = elapsed_time_utc + srt_parser.utc_offset
#elapsed_time_utc = start_time_utc + datetime.timedelta(seconds=(self.frame_index / video_info.frame_rate))
#elapsed_time = elapsed_time_utc + srt_parser.utc_offset if srt_parser is not None else elapsed_time_utc
delta = datetime.timedelta(seconds=(self.frame_index / video_info.frame_rate))
# convert to datetime
elapsed_time = datetime.datetime(1900, 1, 1) + delta
img = Image.open(io.BytesIO(buf))
# Exif dict contains the following keys: '0th', 'Exif', 'GPS', '1st', 'thumbnail'
entry = srt_parser.get_entry(elapsed_time) if srt_parser is not None else None
elapsed_time_str = (elapsed_time + (datetime.datetime.now() - datetime.datetime(1900, 1, 1))).strftime("%Y:%m:%d %H:%M:%S.%f")
# Exif dict contains the following keys: '0th', 'Exif', 'GPS', '1st', 'thumbnail'
# Set the EXIF metadata
exif_dict = {
"0th": {
piexif.ImageIFD.Software: "ODM",
piexif.ImageIFD.DateTime: elapsed_time.strftime('%Y:%m:%d %H:%M:%S'),
piexif.ImageIFD.DateTime: elapsed_time_str,
piexif.ImageIFD.XResolution: (frame.shape[1], 1),
piexif.ImageIFD.YResolution: (frame.shape[0], 1),
},
"Exif": {
piexif.ExifIFD.DateTimeOriginal: elapsed_time.strftime('%Y:%m:%d %H:%M:%S'),
piexif.ExifIFD.DateTimeDigitized: elapsed_time.strftime('%Y:%m:%d %H:%M:%S'),
piexif.ExifIFD.DateTimeOriginal: elapsed_time_str,
piexif.ExifIFD.DateTimeDigitized: elapsed_time_str,
piexif.ExifIFD.PixelXDimension: (frame.shape[1], 1),
piexif.ExifIFD.PixelYDimension: (frame.shape[0], 1),
}}
if video_info.orientation is not None:
exif_dict["0th"][piexif.ImageIFD.Orientation] = video_info.orientation
if video_info.model is not None:
exif_dict["Exif"][piexif.ImageIFD.Model] = video_info.model
entry = srt_parser.get_entry(elapsed_time_utc) if srt_parser is not None else None
if entry is not None:
segs = entry["shutter"].split("/")
exif_dict["Exif"][piexif.ExifIFD.ExposureTime] = (int(float(segs[0])), int(float(segs[1])))
@ -233,24 +230,14 @@ class Video2Dataset:
exif_dict["Exif"][piexif.ExifIFD.FNumber] = (entry["fnum"], 1)
exif_dict["Exif"][piexif.ExifIFD.ISOSpeedRatings] = (entry["iso"], 1)
exif_dict["GPS"] = {
piexif.GPSIFD.GPSMapDatum: "WGS-84",
piexif.GPSIFD.GPSDateStamp: elapsed_time.strftime('%Y:%m:%d %H:%M:%S')
}
if (entry["latitude"] is not None):
exif_dict["GPS"][piexif.GPSIFD.GPSLatitude] = FloatToRational(entry["latitude"])
if (entry["longitude"] is not None):
exif_dict["GPS"][piexif.GPSIFD.GPSLongitude] = FloatToRational(entry["longitude"])
if (entry["altitude"] is not None):
exif_dict["GPS"][piexif.GPSIFD.GPSAltitude] = FloatToRational(entry["altitude"])
exif_dict["GPS"] = get_gps_location(elapsed_time, entry["latitude"], entry["longitude"], entry["altitude"])
exif_bytes = piexif.dump(exif_dict)
img.save(path, exif=exif_bytes)
return path
def WriteStats(self, input_file, stats):
self.f.write("{};{};{};{};{};{};{};{};{};{}\n".format(
@ -266,84 +253,82 @@ class Video2Dataset:
stats["written"] if "written" in stats else "").replace(".", ","))
def GetVideoInfo(self, input_file):
def get_video_info(input_file):
video = cv2.VideoCapture(input_file)
video = cv2.VideoCapture(input_file)
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
frame_rate = video.get(cv2.CAP_PROP_FPS)
start_time_utc, utc_offset, orientation, model = self.GetVideoMetadata(input_file)
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
frame_rate = video.get(cv2.CAP_PROP_FPS)
video.release()
video.release()
return collections.namedtuple("VideoInfo", ["total_frames", "frame_rate", "start_time_utc", "utc_offset", "orientation", "model"])(total_frames, frame_rate, start_time_utc, utc_offset, orientation, model)
return collections.namedtuple("VideoInfo", ["total_frames", "frame_rate"])(total_frames, frame_rate)
def GetVideoMetadata(self, input_file):
try:
metadata = pymediainfo.MediaInfo.parse(input_file).to_data()
start_time_utc = None
orientation = 1
performer = None
utc_offset = None
if metadata is not None and 'tracks' in metadata:
# Check if it is safe to access the first element of the tracks list
if len(metadata['tracks']) > 0:
start_time_utc = metadata['tracks'][0].get('encoded_date') or \
metadata['tracks'][0].get('tagged_date')
start_time_utc = datetime.datetime.strptime(start_time_utc, '%Z %Y-%m-%d %H:%M:%S') if start_time_utc is not None else None
file_creation_date_utc = metadata['tracks'][0].get('file_creation_date')
file_creation_date_utc = datetime.datetime.strptime(file_creation_date_utc, '%Z %Y-%m-%d %H:%M:%S.%f') if file_creation_date_utc is not None else None
file_creation_date = metadata['tracks'][0].get('file_creation_date__local')
file_creation_date = datetime.datetime.strptime(file_creation_date, '%Y-%m-%d %H:%M:%S.%f') if file_creation_date is not None else None
if file_creation_date_utc is not None and file_creation_date is not None:
utc_offset = file_creation_date - file_creation_date_utc
performer = metadata['tracks'][0].get('performer')
# Check if it is safe to access the second element of the tracks list
if len(metadata['tracks']) > 1:
orientation = metadata['tracks'][1].get('rotation')
if orientation is not None:
orientation = int(float(orientation))
# The 8 EXIF orientation values are numbered 1 to 8.
# 1 = 0°: the correct orientation, no adjustment is required.
# 2 = 0°, mirrored: image has been flipped back-to-front.
# 3 = 180°: image is upside down.
# 4 = 180°, mirrored: image has been flipped back-to-front and is upside down.
# 5 = 90°: image has been flipped back-to-front and is on its side.
# 6 = 90°, mirrored: image is on its side.
# 7 = 270°: image has been flipped back-to-front and is on its far side.
# 8 = 270°, mirrored: image is on its far side.
if orientation == 0:
orientation = 1
elif orientation == 90:
orientation = 8
elif orientation == 180:
orientation = 3
elif orientation == 270:
orientation = 6
return start_time_utc, utc_offset, orientation, performer
except Exception as e:
return start_time_utc, utc_offset, orientation, performer
def FloatToRational(f):
def float_to_rational(f):
f = Fraction(f).limit_denominator()
return (f.numerator, f.denominator)
return (f.numerator, f.denominator)
def limit_files(paths, limit):
cnt = len(paths)
num_to_delete = cnt - limit
if num_to_delete <= 0:
return
skip = floor(num_to_delete / limit) if num_to_delete > cnt else ceil(cnt / num_to_delete)
to_keep = []
for i in range(len(paths)):
if i % skip == 0:
os.remove(paths[i])
else:
to_keep.append(paths[i])
limit_files(to_keep, limit)
def to_deg(value, loc):
"""convert decimal coordinates into degrees, munutes and seconds tuple
Keyword arguments: value is float gps-value, loc is direction list ["S", "N"] or ["W", "E"]
return: tuple like (25, 13, 48.343 ,'N')
"""
if value < 0:
loc_value = loc[0]
elif value > 0:
loc_value = loc[1]
else:
loc_value = ""
abs_value = abs(value)
deg = int(abs_value)
t1 = (abs_value-deg)*60
min = int(t1)
sec = round((t1 - min)* 60, 5)
return (deg, min, sec, loc_value)
def get_gps_location(elapsed_time, lat, lng, altitude):
lat_deg = to_deg(lat, ["S", "N"])
lng_deg = to_deg(lng, ["W", "E"])
exiv_lat = (float_to_rational(lat_deg[0]), float_to_rational(lat_deg[1]), float_to_rational(lat_deg[2]))
exiv_lng = (float_to_rational(lng_deg[0]), float_to_rational(lng_deg[1]), float_to_rational(lng_deg[2]))
gps_ifd = {
piexif.GPSIFD.GPSVersionID: (2, 0, 0, 0),
piexif.GPSIFD.GPSDateStamp: elapsed_time.strftime('%Y:%m:%d %H:%M:%S.%f')
}
if altitude is not None:
gps_ifd[piexif.GPSIFD.GPSAltitudeRef] = 0
gps_ifd[piexif.GPSIFD.GPSAltitude] = float_to_rational(round(altitude))
if lat is not None:
gps_ifd[piexif.GPSIFD.GPSLatitudeRef] = lat_deg[3]
gps_ifd[piexif.GPSIFD.GPSLatitude] = exiv_lat
if lng is not None:
gps_ifd[piexif.GPSIFD.GPSLongitudeRef] = lng_deg[3]
gps_ifd[piexif.GPSIFD.GPSLongitude] = exiv_lng
return gps_ifd

Wyświetl plik

@ -36,5 +36,4 @@ trimesh==3.17.1
pandas==1.5.2
# for video support
pymediainfo==6.0.1
piexif==1.1.3