From b1a36e840d22ba707d7d6b506faa0724faa8dc32 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Tue, 21 May 2019 12:38:30 -0400 Subject: [PATCH] Retry mve on error code 134, progress fix Former-commit-id: 9f72f3c0cd3b80527cc037dd820a6683bb671948 --- opendm/progress.py | 2 +- opendm/types.py | 10 ++++------ run.py | 2 +- stages/mve.py | 19 +++++++++++++++++-- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/opendm/progress.py b/opendm/progress.py index cc030461..3114b4f9 100644 --- a/opendm/progress.py +++ b/opendm/progress.py @@ -28,7 +28,7 @@ class Broadcaster: UDP_IP = "127.0.0.1" if global_progress > 100: - log.ODM_WARNING("Global progress is > 100, please contact the developers.") + log.ODM_WARNING("Global progress is > 100 (%s), please contact the developers." % global_progress) global_progress = 100 try: diff --git a/opendm/types.py b/opendm/types.py index e169db60..619a2f2b 100644 --- a/opendm/types.py +++ b/opendm/types.py @@ -381,12 +381,10 @@ class ODM_Stage: return max(0.0, self.progress) def previous_stages_progress(self): - sum = 0 - stage = self.prev_stage - while stage: - sum += stage.delta_progress() - stage = stage.prev_stage - return sum + if self.prev_stage: + return max(0.0, self.prev_stage.progress) + else: + return 0.0 def update_progress_end(self): self.update_progress(100.0) diff --git a/run.py b/run.py index 0327ef9d..cc4ab0c2 100755 --- a/run.py +++ b/run.py @@ -22,7 +22,7 @@ if __name__ == '__main__': for k in sorted(args_dict.keys()): # Don't leak token if k == 'sm_cluster' and args_dict[k] is not None: - log.ODM_DEBUG('%s: True') + log.ODM_DEBUG('%s: True' % k) else: log.ODM_DEBUG('%s: %s' % (k, args_dict[k])) log.ODM_DEBUG('==============') diff --git a/stages/mve.py b/stages/mve.py index 68263eef..7a7dbc6d 100644 --- a/stages/mve.py +++ b/stages/mve.py @@ -110,8 +110,23 @@ class ODMMveStage(types.ODM_Stage): log.ODM_INFO("Running dense reconstruction. This might take a while. Please be patient, the process is not dead or hung.") log.ODM_INFO(" Process is running") - system.run('%s %s %s' % (context.dmrecon_path, ' '.join(dmrecon_config), tree.mve), env_vars={'OMP_NUM_THREADS': args.max_concurrency}) - + # TODO: find out why MVE is crashing at random + # MVE *seems* to have a race condition, triggered randomly, regardless of dataset + # https://gist.github.com/pierotofy/6c9ce93194ba510b61e42e3698cfbb89 + # Temporary workaround is to retry the reconstruction until we get it right + # (up to a certain number of retries). + retry_count = 1 + while retry_count < 10: + try: + system.run('%s %s %s' % (context.dmrecon_path, ' '.join(dmrecon_config), tree.mve), env_vars={'OMP_NUM_THREADS': args.max_concurrency}) + break + except Exception as e: + if str(e) == "Child returned 134": + retry_count += 1 + log.ODM_WARNING("Caught error code, retrying attempt #%s" % retry_count) + else: + raise e + self.update_progress(90) scene2pset_config = [