Adding github support

2020-07-25 19:21:15 -03:00 · 2020-07-25 19:21:15 -03:00 · 75e6a03a59
commit 75e6a03a59
--- a/plugins/cloudimport/README.md
+++ b/plugins/cloudimport/README.md
@ -11,7 +11,7 @@ Currently, we support these kinds of sources:
 A **cloud platform** is an online platform that can store files, like [Dropbox](https://www.dropbox.com/ "Dropbox") or [Google Drive](https://www.google.com/drive/ "Google Drive"). Platforms have the concept of a folder or album, where files are stored. By entering the folder's URL, we will use each platform's API to retrieve all the images in those folders, and import them into WebODM.

 Current platforms supported:
-*None so far*
+* [GitHub](https://github.com/ "GitHub")

 #### Cloud Libraries
 A **cloud library** is an extension of a cloud platform that has images organized in folders or albums. It differs from a cloud platform, in the way that it can also list all folders it contains, so that a user can choose to import a specific folder from a list, instead of a URL.
@ -38,6 +38,6 @@ Now, there are a few known gaps to the system that you might encounter or that y
 1. **Allow image resizing**:
 	Currently, when importing a folder, image resizing is not allowed. This might be a problem for users without a lot of disk space, so it might make sense to fix this.
 1. **Allow potential pagination when calling APIs**
-	Currently, the workflow doesn't support calling APIs that requiere pagination. 
-1. **Make platform extension have their own js, like WebODM plugins**
-	Currently, when a platform extension requires their own Javascript code, you will need to add this code manually to the already existing code. It would be much easier if this was handed automatically, like the other parts of the add-on.
+	Currently, the workflow doesn't support calling APIs that requiere pagination.
+1. **Make platform extensions have their own js, like WebODM plugins**
+	Currently, when a platform extension requires their own Javascript code, you will need to add this code manually to the already existing code. It would be much easier if this was handed automatically, like the other parts of the add-on.
--- a/plugins/cloudimport/cloud_platform.py
+++ b/plugins/cloudimport/cloud_platform.py
@ -16,14 +16,14 @@ class CloudPlatform(ABC):
  
    def verify_folder_url(self, folder_url):
        try:
-            # Parse the url and get the id of the folder, and the server
-            server_url, folder_id = self.get_server_and_folder_id_from_url(folder_url)
+            # Parse the url and get all necessary information
+            information = self.parse_url(folder_url)
            # Define the API url we will call to assert that the folder exists and is valid
-            folder_api_url = self.build_folder_api_url(server_url, folder_id)
+            folder_api_url = self.build_folder_api_url(information)
            # Call the API
            payload = self.call_api(folder_api_url)
            # Parse payload into a Folder instance
-            return self.parse_payload_into_folder(payload)
+            return self.parse_payload_into_folder(folder_url, payload)
        except Exception as e:
            logger.error(str(e))
            return None
@ -33,11 +33,11 @@ class CloudPlatform(ABC):
        # Verify the url
        if self.verify_folder_url(folder_url) == None:
            raise Exception('Invalid URL')
-        
-        # Parse the url and get the id of the folder, and the server
-        server_url, folder_id = self.get_server_and_folder_id_from_url(folder_url)
+
+        # Parse the url and get all necessary information
+        information = self.parse_url(folder_url)
        # Define the API url we will call to get all the files in the folder
-        folder_api_url = self.build_list_files_in_folder_api_url(server_url, folder_id)
+        folder_api_url = self.build_list_files_in_folder_api_url(information)
        # Call the API
        payload = self.call_api(folder_api_url)
        # Parse the payload into File instances
@ -48,8 +48,10 @@ class CloudPlatform(ABC):
        return [file for file in files if file.is_valid()]
  
    def call_api(self, api_url):
-        return requests.get(api_url, timeout=10).json()
-  
+        response = requests.get(api_url, timeout=10)
+        response.raise_for_status()
+        return response.json()
+
    def platform_file_processing(self, files):
        """This method does nothing, but each platform might want to do some processing of the files and they can, by overriding this method"""
        return files
@ -58,19 +60,19 @@ class CloudPlatform(ABC):
        return {'name': self.name, 'folder_url_example': self.folder_url_example, 'type': 'platform'} 
  
    @abstractmethod
-    def get_server_and_folder_id_from_url(self, url):
-        """Parse the given url and return the folder id, and the server url. Will throw an exception is the url is invalid"""
-  
+    def parse_url(self, url):
+        """Parse the given url and return necessary information to prepare the next requests"""
+
    @abstractmethod
-    def build_list_files_in_folder_api_url(self, server_url, folder_id):
-        """Build the api url from the folder id and the server url. This API should list all the files in the folder"""
-        
+    def build_list_files_in_folder_api_url(self, information):
+        """Build the api url from the parsed information. This API should list all the files in the folder"""
+
    @abstractmethod
-    def build_folder_api_url(self, server_url, folder_id):
-        """Build the api url from the folder id and the server url. This API should return the name (and maybe amount of files) for the folder"""    
-  
+    def build_folder_api_url(self, information):
+        """Build the api url from the parsed information. This API should return the name (and maybe amount of files) for the folder"""
+
    @abstractmethod
-    def parse_payload_into_folder(self, payload):
+    def parse_payload_into_folder(self, original_url, payload):
        """Parse the api payload and return a Folder instance"""
  
    @abstractmethod
--- a/plugins/cloudimport/platforms/github.py
+++ b/plugins/cloudimport/platforms/github.py
@ -0,0 +1,36 @@
+# Check http://piwigo.com/
+from urllib.parse import urlparse
+from os import path
+from plugins.cloudimport.cloud_platform import File, Folder, CloudPlatform
+from app.plugins import logger
+
+class Platform(CloudPlatform):
+    def __init__(self):
+        super().__init__('GitHub', 'https://github.com/{owner}/{repo}/tree/{commit/branch/tag}/{path to folder}')
+
+    # Cloud Platform
+    def parse_url(self, url):
+        parse_result = urlparse(url)
+        path_split = parse_result.path.split('/')
+        if len(path_split) < 4:
+            raise Exception('Wrong URL format')
+        _, owner, repo, _, ref, *paths = path_split
+        path = '/'.join(paths)
+
+        return [owner, repo, ref, path]
+
+    def build_folder_api_url(self, information):
+        [owner, repo, ref, path] = information
+        return 'https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={ref}'.format(owner = owner, repo = repo, ref = ref, path = path)
+
+    def parse_payload_into_folder(self, original_url, payload):
+        name = original_url.split('/')[-1].title()
+        return Folder(name, original_url, len(payload))
+
+    def build_list_files_in_folder_api_url(self, information):
+        # ToDo: add pagination
+        [owner, repo, ref, path] = information
+        return 'https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={ref}'.format(owner = owner, repo = repo, ref = ref, path = path)
+
+    def parse_payload_into_files(self, payload):
+        return [File(file['name'], file['download_url']) for file in payload]
--- a/plugins/cloudimport/platforms/piwigo.py
+++ b/plugins/cloudimport/platforms/piwigo.py
@ -14,8 +14,8 @@ class Platform(CloudLibrary):
        # So it might happen that if the File Uploader plugin is used for GCP files, that the files will need to be renamed to store multiple GCP files.
        # So basically we are taking any file that contains the string 'gcp_list' and has the extension '.txt' and rename it to 'gcp_list.txt'
        return [self._map_gcp_file_if_necessary(file) for file in files]
-  
-    def get_server_and_folder_id_from_url(self, url):
+
+    def parse_url(self, url):
        parse_result = urlparse(url)
        paths = parse_result.query.split('/')
        if not 'category' in paths or paths.index('category') >= len(paths) - 1:
@ -28,17 +28,19 @@ class Platform(CloudLibrary):
            
            path = path[0:path.index('index.php')]
            server = parse_result.scheme + '://' + parse_result.netloc + '/' + path
-            return server, category_id
-  
-    def build_folder_api_url(self, server_url, folder_id):
+            return [server, category_id]
+
+    def build_folder_api_url(self, information):
+        [server_url, folder_id] = information
        return '{server_url}/ws.php?format=json&method=pwg.categories.getList&cat_id={folder_id}&recursive=false'.format(server_url = server_url, folder_id = folder_id)
-  
-    def parse_payload_into_folder(self, payload):
+
+    def parse_payload_into_folder(self, original_url, payload):
        result = payload['result']['categories'][0]
        return Folder(result['name'], result['url'], result['nb_images'])
-  
-    def build_list_files_in_folder_api_url(self, server_url, folder_id):
+
+    def build_list_files_in_folder_api_url(self, information):
        # ToDo: add pagination
+        [server_url, folder_id] = information
        return '{server_url}/ws.php?format=json&method=pwg.categories.getImages&cat_id={folder_id}&recursive=false&per_page=500'.format(server_url = server_url, folder_id = folder_id)
  
    def parse_payload_into_files(self, payload):
--- a/webodm/settings.py
+++ b/webodm/settings.py
@ -353,7 +353,7 @@ CELERY_RESULT_BACKEND = os.environ.get('WO_BROKER', 'redis://localhost')
 CELERY_TASK_SERIALIZER = 'json'
 CELERY_RESULT_SERIALIZER = 'json'
 CELERY_ACCEPT_CONTENT = ['json']
-CELERY_INCLUDE=['worker.tasks']
+CELERY_INCLUDE=['worker.tasks', 'plugins.cloudimport.api_views']
 CELERY_WORKER_REDIRECT_STDOUTS = False
 CELERY_WORKER_HIJACK_ROOT_LOGGER = False