Merge pull request #892 from nchamo/add-github-import

Adding GitHub integration to Cloud Import plugin
pull/893/head
Nicolas Chamo 2020-07-26 18:26:41 -03:00 zatwierdzone przez GitHub
commit b56c3556a7
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
8 zmienionych plików z 118 dodań i 54 usunięć

Wyświetl plik

@ -1,5 +1,29 @@
import inspect
from worker.celery import app from worker.celery import app
# noinspection PyUnresolvedReferences # noinspection PyUnresolvedReferences
from worker.tasks import execute_grass_script from worker.tasks import execute_grass_script
task = app.task task = app.task
def run_function_async(func, *args, **kwargs):
"""
Run a function asynchronously using Celery.
Plugins should use this function so that they don't
have to register new Celery tasks at startup. Functions
should import any required library at the top of the function body.
:param {Function} a function to execute
"""
source = inspect.getsource(func)
return eval_async.delay(source, func.__name__, *args, **kwargs)
@app.task
def eval_async(source, funcname, *args, **kwargs):
"""
Run Python code asynchronously using Celery.
It's recommended to use run_function_async instead.
"""
ns = {}
code = compile(source, 'file', 'exec')
eval(code, ns, ns)
return ns[funcname](*args, **kwargs)

Wyświetl plik

@ -1,6 +1,6 @@
{ {
"name": "WebODM", "name": "WebODM",
"version": "1.4.1", "version": "1.4.2",
"description": "User-friendly, extendable application and API for processing aerial imagery.", "description": "User-friendly, extendable application and API for processing aerial imagery.",
"main": "index.js", "main": "index.js",
"scripts": { "scripts": {

Wyświetl plik

@ -11,7 +11,7 @@ Currently, we support these kinds of sources:
A **cloud platform** is an online platform that can store files, like [Dropbox](https://www.dropbox.com/ "Dropbox") or [Google Drive](https://www.google.com/drive/ "Google Drive"). Platforms have the concept of a folder or album, where files are stored. By entering the folder's URL, we will use each platform's API to retrieve all the images in those folders, and import them into WebODM. A **cloud platform** is an online platform that can store files, like [Dropbox](https://www.dropbox.com/ "Dropbox") or [Google Drive](https://www.google.com/drive/ "Google Drive"). Platforms have the concept of a folder or album, where files are stored. By entering the folder's URL, we will use each platform's API to retrieve all the images in those folders, and import them into WebODM.
Current platforms supported: Current platforms supported:
*None so far* * [GitHub](https://github.com/ "GitHub")
#### Cloud Libraries #### Cloud Libraries
A **cloud library** is an extension of a cloud platform that has images organized in folders or albums. It differs from a cloud platform, in the way that it can also list all folders it contains, so that a user can choose to import a specific folder from a list, instead of a URL. A **cloud library** is an extension of a cloud platform that has images organized in folders or albums. It differs from a cloud platform, in the way that it can also list all folders it contains, so that a user can choose to import a specific folder from a list, instead of a URL.
@ -39,5 +39,5 @@ Now, there are a few known gaps to the system that you might encounter or that y
Currently, when importing a folder, image resizing is not allowed. This might be a problem for users without a lot of disk space, so it might make sense to fix this. Currently, when importing a folder, image resizing is not allowed. This might be a problem for users without a lot of disk space, so it might make sense to fix this.
1. **Allow potential pagination when calling APIs** 1. **Allow potential pagination when calling APIs**
Currently, the workflow doesn't support calling APIs that requiere pagination. Currently, the workflow doesn't support calling APIs that requiere pagination.
1. **Make platform extension have their own js, like WebODM plugins** 1. **Make platform extensions have their own js, like WebODM plugins**
Currently, when a platform extension requires their own Javascript code, you will need to add this code manually to the already existing code. It would be much easier if this was handed automatically, like the other parts of the add-on. Currently, when a platform extension requires their own Javascript code, you will need to add this code manually to the already existing code. It would be much easier if this was handed automatically, like the other parts of the add-on.

Wyświetl plik

@ -5,8 +5,8 @@ from os import path
from app import models, pending_actions from app import models, pending_actions
from app.plugins.views import TaskView from app.plugins.views import TaskView
from app.plugins.worker import task from app.plugins.worker import run_function_async
from app.plugins import logger, get_current_plugin from app.plugins import get_current_plugin
from worker.celery import app from worker.celery import app
from rest_framework.response import Response from rest_framework.response import Response
@ -52,7 +52,7 @@ class ImportFolderTaskView(TaskView):
# Start importing the files in the background # Start importing the files in the background
serialized = [file.serialize() for file in files] serialized = [file.serialize() for file in files]
import_files.delay(task.id, serialized) run_function_async(import_files, task.id, serialized)
return Response({}, status=status.HTTP_200_OK) return Response({}, status=status.HTTP_200_OK)
@ -99,11 +99,21 @@ class PlatformsTaskView(TaskView):
return Response({'platforms': [platform.serialize(user = request.user) for platform in platforms]}, status=status.HTTP_200_OK) return Response({'platforms': [platform.serialize(user = request.user) for platform in platforms]}, status=status.HTTP_200_OK)
### ###
# CELERY TASK(S) #
### ###
@task
def import_files(task_id, files): def import_files(task_id, files):
import requests
from app import models
from app.plugins import logger
def download_file(task, file):
path = task.task_path(file['name'])
download_stream = requests.get(file['url'], stream=True, timeout=60)
with open(path, 'wb') as fd:
for chunk in download_stream.iter_content(4096):
fd.write(chunk)
models.ImageUpload.objects.create(task=task, image=path)
logger.info("Will import {} files".format(len(files))) logger.info("Will import {} files".format(len(files)))
task = models.Task.objects.get(pk=task_id) task = models.Task.objects.get(pk=task_id)
task.create_task_directories() task.create_task_directories()
@ -125,13 +135,3 @@ def import_files(task_id, files):
task.processing_time = 0 task.processing_time = 0
task.partial = False task.partial = False
task.save() task.save()
def download_file(task, file):
path = task.task_path(file['name'])
download_stream = requests.get(file['url'], stream=True, timeout=60)
with open(path, 'wb') as fd:
for chunk in download_stream.iter_content(4096):
fd.write(chunk)
models.ImageUpload.objects.create(task=task, image=path)

Wyświetl plik

@ -16,14 +16,14 @@ class CloudPlatform(ABC):
def verify_folder_url(self, folder_url): def verify_folder_url(self, folder_url):
try: try:
# Parse the url and get the id of the folder, and the server # Parse the url and get all necessary information
server_url, folder_id = self.get_server_and_folder_id_from_url(folder_url) information = self.parse_url(folder_url)
# Define the API url we will call to assert that the folder exists and is valid # Define the API url we will call to assert that the folder exists and is valid
folder_api_url = self.build_folder_api_url(server_url, folder_id) folder_api_url = self.build_folder_api_url(information)
# Call the API # Call the API
payload = self.call_api(folder_api_url) payload = self.call_api(folder_api_url)
# Parse payload into a Folder instance # Parse payload into a Folder instance
return self.parse_payload_into_folder(payload) return self.parse_payload_into_folder(folder_url, payload)
except Exception as e: except Exception as e:
logger.error(str(e)) logger.error(str(e))
return None return None
@ -34,10 +34,10 @@ class CloudPlatform(ABC):
if self.verify_folder_url(folder_url) == None: if self.verify_folder_url(folder_url) == None:
raise Exception('Invalid URL') raise Exception('Invalid URL')
# Parse the url and get the id of the folder, and the server # Parse the url and get all necessary information
server_url, folder_id = self.get_server_and_folder_id_from_url(folder_url) information = self.parse_url(folder_url)
# Define the API url we will call to get all the files in the folder # Define the API url we will call to get all the files in the folder
folder_api_url = self.build_list_files_in_folder_api_url(server_url, folder_id) folder_api_url = self.build_list_files_in_folder_api_url(information)
# Call the API # Call the API
payload = self.call_api(folder_api_url) payload = self.call_api(folder_api_url)
# Parse the payload into File instances # Parse the payload into File instances
@ -48,7 +48,9 @@ class CloudPlatform(ABC):
return [file for file in files if file.is_valid()] return [file for file in files if file.is_valid()]
def call_api(self, api_url): def call_api(self, api_url):
return requests.get(api_url, timeout=10).json() response = requests.get(api_url, timeout=10)
response.raise_for_status()
return response.json()
def platform_file_processing(self, files): def platform_file_processing(self, files):
"""This method does nothing, but each platform might want to do some processing of the files and they can, by overriding this method""" """This method does nothing, but each platform might want to do some processing of the files and they can, by overriding this method"""
@ -58,19 +60,19 @@ class CloudPlatform(ABC):
return {'name': self.name, 'folder_url_example': self.folder_url_example, 'type': 'platform'} return {'name': self.name, 'folder_url_example': self.folder_url_example, 'type': 'platform'}
@abstractmethod @abstractmethod
def get_server_and_folder_id_from_url(self, url): def parse_url(self, url):
"""Parse the given url and return the folder id, and the server url. Will throw an exception is the url is invalid""" """Parse the given url and return necessary information to prepare the next requests"""
@abstractmethod @abstractmethod
def build_list_files_in_folder_api_url(self, server_url, folder_id): def build_list_files_in_folder_api_url(self, information):
"""Build the api url from the folder id and the server url. This API should list all the files in the folder""" """Build the api url from the parsed information. This API should list all the files in the folder"""
@abstractmethod @abstractmethod
def build_folder_api_url(self, server_url, folder_id): def build_folder_api_url(self, information):
"""Build the api url from the folder id and the server url. This API should return the name (and maybe amount of files) for the folder""" """Build the api url from the parsed information. This API should return the name (and maybe amount of files) for the folder"""
@abstractmethod @abstractmethod
def parse_payload_into_folder(self, payload): def parse_payload_into_folder(self, original_url, payload):
"""Parse the api payload and return a Folder instance""" """Parse the api payload and return a Folder instance"""
@abstractmethod @abstractmethod
@ -96,7 +98,7 @@ class File:
def is_valid(self): def is_valid(self):
"""Only keep files that are images, or that are named 'gcp_list.txt'""" """Only keep files that are images, or that are named 'gcp_list.txt'"""
_, file_extension = path.splitext(self.name) _, file_extension = path.splitext(self.name)
return file_extension.lower() in VALID_IMAGE_EXTENSIONS or file_name == 'gcp_list.txt' return file_extension.lower() in VALID_IMAGE_EXTENSIONS or self.name == 'gcp_list.txt'
def serialize(self): def serialize(self):
return {'name': self.name, 'url': self.url} return {'name': self.name, 'url': self.url}

Wyświetl plik

@ -0,0 +1,36 @@
# Check https://github.com/
from urllib.parse import urlparse
from os import path
from plugins.cloudimport.cloud_platform import File, Folder, CloudPlatform
from app.plugins import logger
class Platform(CloudPlatform):
def __init__(self):
super().__init__('GitHub', 'https://github.com/{owner}/{repo}/tree/{commit/branch/tag}/{path to folder}')
# Cloud Platform
def parse_url(self, url):
parse_result = urlparse(url)
path_split = parse_result.path.split('/')
if len(path_split) < 5:
raise Exception('Wrong URL format')
_, owner, repo, _, ref, *paths = path_split
path = '/'.join(paths)
return [owner, repo, ref, path]
def build_folder_api_url(self, information):
[owner, repo, ref, path] = information
return 'https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={ref}'.format(owner = owner, repo = repo, ref = ref, path = path)
def parse_payload_into_folder(self, original_url, payload):
name = original_url.split('/')[-1].title()
return Folder(name, original_url, len(payload))
def build_list_files_in_folder_api_url(self, information):
# ToDo: add pagination
[owner, repo, ref, path] = information
return 'https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={ref}'.format(owner = owner, repo = repo, ref = ref, path = path)
def parse_payload_into_files(self, payload):
return [File(file['name'], file['download_url']) for file in payload]

Wyświetl plik

@ -15,7 +15,7 @@ class Platform(CloudLibrary):
# So basically we are taking any file that contains the string 'gcp_list' and has the extension '.txt' and rename it to 'gcp_list.txt' # So basically we are taking any file that contains the string 'gcp_list' and has the extension '.txt' and rename it to 'gcp_list.txt'
return [self._map_gcp_file_if_necessary(file) for file in files] return [self._map_gcp_file_if_necessary(file) for file in files]
def get_server_and_folder_id_from_url(self, url): def parse_url(self, url):
parse_result = urlparse(url) parse_result = urlparse(url)
paths = parse_result.query.split('/') paths = parse_result.query.split('/')
if not 'category' in paths or paths.index('category') >= len(paths) - 1: if not 'category' in paths or paths.index('category') >= len(paths) - 1:
@ -28,17 +28,19 @@ class Platform(CloudLibrary):
path = path[0:path.index('index.php')] path = path[0:path.index('index.php')]
server = parse_result.scheme + '://' + parse_result.netloc + '/' + path server = parse_result.scheme + '://' + parse_result.netloc + '/' + path
return server, category_id return [server, category_id]
def build_folder_api_url(self, server_url, folder_id): def build_folder_api_url(self, information):
[server_url, folder_id] = information
return '{server_url}/ws.php?format=json&method=pwg.categories.getList&cat_id={folder_id}&recursive=false'.format(server_url = server_url, folder_id = folder_id) return '{server_url}/ws.php?format=json&method=pwg.categories.getList&cat_id={folder_id}&recursive=false'.format(server_url = server_url, folder_id = folder_id)
def parse_payload_into_folder(self, payload): def parse_payload_into_folder(self, original_url, payload):
result = payload['result']['categories'][0] result = payload['result']['categories'][0]
return Folder(result['name'], result['url'], result['nb_images']) return Folder(result['name'], result['url'], result['nb_images'])
def build_list_files_in_folder_api_url(self, server_url, folder_id): def build_list_files_in_folder_api_url(self, information):
# ToDo: add pagination # ToDo: add pagination
[server_url, folder_id] = information
return '{server_url}/ws.php?format=json&method=pwg.categories.getImages&cat_id={folder_id}&recursive=false&per_page=500'.format(server_url = server_url, folder_id = folder_id) return '{server_url}/ws.php?format=json&method=pwg.categories.getImages&cat_id={folder_id}&recursive=false&per_page=500'.format(server_url = server_url, folder_id = folder_id)
def parse_payload_into_files(self, payload): def parse_payload_into_files(self, payload):

Wyświetl plik

@ -353,7 +353,7 @@ CELERY_RESULT_BACKEND = os.environ.get('WO_BROKER', 'redis://localhost')
CELERY_TASK_SERIALIZER = 'json' CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json' CELERY_RESULT_SERIALIZER = 'json'
CELERY_ACCEPT_CONTENT = ['json'] CELERY_ACCEPT_CONTENT = ['json']
CELERY_INCLUDE=['worker.tasks'] CELERY_INCLUDE=['worker.tasks', 'app.plugins.worker']
CELERY_WORKER_REDIRECT_STDOUTS = False CELERY_WORKER_REDIRECT_STDOUTS = False
CELERY_WORKER_HIJACK_ROOT_LOGGER = False CELERY_WORKER_HIJACK_ROOT_LOGGER = False