Merge pull request #460 from pierotofy/hangfix

Fixed hanging error due to low memory on processing nodes, preset def…
pull/465/head
Piero Toffanin 2018-06-01 09:04:12 -04:00 zatwierdzone przez GitHub
commit 1ec47d27f2
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
4 zmienionych plików z 41 dodań i 18 usunięć

Wyświetl plik

@ -98,22 +98,16 @@ def add_default_presets():
try:
Preset.objects.update_or_create(name='DSM + DTM', system=True,
defaults={
'options': [{'name': 'dsm', 'value': True}, {'name': 'dtm', 'value': True},
{'name': 'mesh-octree-depth', 'value': 6},
{'name': 'mesh-solver-divide', 'value': 6}]})
'options': [{'name': 'dsm', 'value': True}, {'name': 'dtm', 'value': True}]})
Preset.objects.update_or_create(name='Fast Orthophoto', system=True,
defaults={'options': [{'name': 'fast-orthophoto', 'value': True}]})
Preset.objects.update_or_create(name='High Quality', system=True,
Preset.objects.update_or_create(name='High Resolution', system=True,
defaults={'options': [{'name': 'dsm', 'value': True},
{'name': 'mesh-octree-depth', 'value': 6},
{'name': 'mesh-solver-divide', 'value': 6},
{'name': 'dem-resolution', 'value': "0.04"},
{'name': 'orthophoto-resolution', 'value': "40"},
]})
Preset.objects.update_or_create(name='Default', system=True,
defaults={'options': [{'name': 'dsm', 'value': True},
{'name': 'mesh-octree-depth', 'value': 6},
{'name': 'mesh-solver-divide', 'value': 6}]})
defaults={'options': [{'name': 'dsm', 'value': True}]})
except MultipleObjectsReturned:
# Mostly to handle a legacy code problem where
# multiple system presets with the same name were

Wyświetl plik

@ -305,11 +305,24 @@ class Task(models.Model):
# Processing node assigned, but is offline and no errors
if self.processing_node and not self.processing_node.is_online():
# Detach processing node, will be processed at the next tick
logger.info("Processing node {} went offline, reassigning {}...".format(self.processing_node, self))
self.uuid = ''
self.processing_node = None
self.save()
# If we are queued up
# detach processing node, and reassignment
# will be processed at the next tick
if self.status == status_codes.QUEUED:
logger.info("Processing node {} went offline, reassigning {}...".format(self.processing_node, self))
self.uuid = ''
self.processing_node = None
self.status = None
self.save()
elif self.status == status_codes.RUNNING:
# Task was running and processing node went offline
# It could have crashed due to low memory
# or perhaps it went offline due to network errors.
# We can't easily differentiate between the two, so we need
# to notify the user because if it crashed due to low memory
# the user might need to take action (or be stuck in an infinite loop)
raise ProcessingError("Processing node went offline. This could be due to insufficient memory or a network error.")
if self.processing_node:
# Need to process some images (UUID not yet set and task doesn't have pending actions)?

Wyświetl plik

@ -23,7 +23,7 @@ class TestApiPreset(BootTestCase):
def check_default_presets(self):
self.assertTrue(Preset.objects.filter(name="Default", system=True).exists())
self.assertTrue(Preset.objects.filter(name="DSM + DTM", system=True).exists())
self.assertTrue(Preset.objects.filter(name="High Quality", system=True).exists())
self.assertTrue(Preset.objects.filter(name="High Resolution", system=True).exists())
def test_preset(self):
client = APIClient()
@ -55,7 +55,7 @@ class TestApiPreset(BootTestCase):
# Only ours and global presets are available
self.assertTrue(len(res.data) == 7)
self.assertTrue('My Local Preset' in [preset['name'] for preset in res.data])
self.assertTrue('High Quality' in [preset['name'] for preset in res.data])
self.assertTrue('High Resolution' in [preset['name'] for preset in res.data])
self.assertTrue('Global Preset #1' in [preset['name'] for preset in res.data])
self.assertTrue('Global Preset #2' in [preset['name'] for preset in res.data])
self.assertFalse('Local Preset #1' in [preset['name'] for preset in res.data])

Wyświetl plik

@ -570,9 +570,9 @@ class TestApiTask(BootTransactionTestCase):
another_pnode.last_refreshed = timezone.now()
another_pnode.save()
# Remove error
# Remove error, set status to queued
task.last_error = None
task.status = None
task.status = status_codes.QUEUED
task.save()
worker.tasks.process_pending_tasks()
@ -580,12 +580,28 @@ class TestApiTask(BootTransactionTestCase):
# Processing node is now cleared and a new one will be assigned on the next tick
task.refresh_from_db()
self.assertTrue(task.processing_node is None)
self.assertTrue(task.status is None)
worker.tasks.process_pending_tasks()
task.refresh_from_db()
self.assertTrue(task.processing_node.id == another_pnode.id)
# Set task to queued, bring node offline
task.last_error = None
task.status = status_codes.RUNNING
task.save()
another_pnode.last_refreshed = timezone.now() - timedelta(minutes=OFFLINE_MINUTES)
another_pnode.save()
worker.tasks.process_pending_tasks()
task.refresh_from_db()
# Processing node is still there, but task should have failed
self.assertTrue(task.status == status_codes.FAILED)
self.assertTrue("Processing node went offline." in task.last_error)
def test_task_manual_processing_node(self):
user = User.objects.get(username="testuser")
project = Project.objects.create(name="User Test Project", owner=user)