From 9beb0cc2e5fc977c5d8de99dd544dbab70fef239 Mon Sep 17 00:00:00 2001 From: Seth Fitzsimmons Date: Fri, 30 Sep 2016 02:42:10 -0700 Subject: [PATCH] Use multiprocessing to parallelize early tasks --- scripts/dataset.py | 20 +++++---- scripts/resize.py | 101 +++++++++++++++++++++++++-------------------- 2 files changed, 70 insertions(+), 51 deletions(-) diff --git a/scripts/dataset.py b/scripts/dataset.py index c24d83d8..c2b3b592 100644 --- a/scripts/dataset.py +++ b/scripts/dataset.py @@ -1,12 +1,20 @@ import os import ecto +from functools import partial +from multiprocessing import Pool from opendm import context from opendm import io from opendm import types from opendm import log +def make_odm_photo(force_focal, force_ccd, path_file): + return types.ODM_Photo(path_file, + force_focal, + force_ccd) + + class ODMLoadDatasetCell(ecto.Cell): def declare_params(self, params): @@ -49,13 +57,11 @@ class ODMLoadDatasetCell(ecto.Cell): if files: # create ODMPhoto list - photos = [] - for f in files: - path_file = io.join_paths(images_dir, f) - photo = types.ODM_Photo(path_file, - self.params.force_focal, - self.params.force_ccd) - photos.append(photo) + path_files = [io.join_paths(images_dir, f) for f in files] + photos = Pool().map( + partial(make_odm_photo, self.params.force_focal, self.params.force_ccd), + path_files + ) log.ODM_INFO('Found %s usable images' % len(photos)) else: diff --git a/scripts/resize.py b/scripts/resize.py index 34dfaa63..b67b4c25 100644 --- a/scripts/resize.py +++ b/scripts/resize.py @@ -2,12 +2,61 @@ import ecto import cv2 import pyexiv2 +from functools import partial +from multiprocessing import Pool from opendm import log from opendm import system from opendm import io from opendm import types +def resize(src_dir, target_dir, resize_to, rerun_cell, photo): + # define image paths + path_file = photo.path_file + new_path_file = io.join_paths(target_dir, photo.filename) + # set raw image path in case we want to rerun cell + if io.file_exists(new_path_file) and rerun_cell: + path_file = io.join_paths(src_dir, photo.filename) + + if not io.file_exists(new_path_file) or rerun_cell: + # open and resize image with opencv + img = cv2.imread(path_file) + # compute new size + max_side = max(img.shape[0], img.shape[1]) + if max_side <= resize_to: + log.ODM_WARNING('Resize Parameter is greater than the largest side of the image') + ratio = float(resize_to) / float(max_side) + img_r = cv2.resize(img, None, fx=ratio, fy=ratio) + # write image with opencv + cv2.imwrite(new_path_file, img_r) + # read metadata with pyexiv2 + old_meta = pyexiv2.ImageMetadata(path_file) + new_meta = pyexiv2.ImageMetadata(new_path_file) + old_meta.read() + new_meta.read() + # copy metadata + old_meta.copy(new_meta) + # update metadata size + new_meta['Exif.Photo.PixelXDimension'] = img_r.shape[0] + new_meta['Exif.Photo.PixelYDimension'] = img_r.shape[1] + new_meta.write() + # update photos array with new values + photo.path_file = new_path_file + photo.width = img_r.shape[0] + photo.height = img_r.shape[1] + photo.update_focal() + + # log message + log.ODM_DEBUG('Resized %s | dimensions: %s' % + (photo.filename, img_r.shape)) + else: + # log message + log.ODM_WARNING('Already resized %s | dimensions: %s x %s' % + (photo.filename, photo.width, photo.height)) + + return photo + + class ODMResizeCell(ecto.Cell): def declare_params(self, params): params.declare("resize_to", "resizes images by the largest side", 2400) @@ -51,49 +100,14 @@ class ODMResizeCell(ecto.Cell): 'resize' in args.rerun_from) # loop over photos - for photo in photos: - # define image paths - path_file = photo.path_file - new_path_file = io.join_paths(tree.dataset_resize, photo.filename) - # set raw image path in case we want to rerun cell - if io.file_exists(new_path_file) and rerun_cell: - path_file = io.join_paths(tree.dataset_raw, photo.filename) - - if not io.file_exists(new_path_file) or rerun_cell: - # open and resize image with opencv - img = cv2.imread(path_file) - # compute new size - max_side = max(img.shape[0], img.shape[1]) - if max_side <= self.params.resize_to: - log.ODM_WARNING('Resize Parameter is greater than the largest side of the image') - ratio = float(self.params.resize_to) / float(max_side) - img_r = cv2.resize(img, None, fx=ratio, fy=ratio) - # write image with opencv - cv2.imwrite(new_path_file, img_r) - # read metadata with pyexiv2 - old_meta = pyexiv2.ImageMetadata(path_file) - new_meta = pyexiv2.ImageMetadata(new_path_file) - old_meta.read() - new_meta.read() - # copy metadata - old_meta.copy(new_meta) - # update metadata size - new_meta['Exif.Photo.PixelXDimension'] = img_r.shape[0] - new_meta['Exif.Photo.PixelYDimension'] = img_r.shape[1] - new_meta.write() - # update photos array with new values - photo.path_file = new_path_file - photo.width = img_r.shape[0] - photo.height = img_r.shape[1] - photo.update_focal() - - # log message - log.ODM_DEBUG('Resized %s | dimensions: %s' % - (photo.filename, img_r.shape)) - else: - # log message - log.ODM_WARNING('Already resized %s | dimensions: %s x %s' % - (photo.filename, photo.width, photo.height)) + photos = Pool().map( + partial(resize, + tree.dataset_raw, + tree.dataset_resize, + self.params.resize_to, + rerun_cell), + photos + ) log.ODM_INFO('Resized %s images' % len(photos)) @@ -105,4 +119,3 @@ class ODMResizeCell(ecto.Cell): log.ODM_INFO('Running ODM Resize Cell - Finished') return ecto.OK if args.end_with != 'resize' else ecto.QUIT -