Works with multiple sheets (hardcoded)

2021-01-19 10:04:30 +00:00 · 2021-01-19 10:04:30 +00:00 · e55a4d55dc
commit e55a4d55dc
--- a/update.py
+++ b/update.py
@ -11,95 +11,101 @@ from botocore.errorfactory import ClientError
 load_dotenv()

 gc = gspread.service_account()
-sh = gc.open("Bellingcat media archiver")
-wks = sh.sheet1
-values = wks.get_all_values()
+sh = gc.open("Media Sheet (January 16-20 + People)")

-ydl_opts = {'outtmpl': 'tmp/%(id)s.%(ext)s', 'quiet': False}
-ydl = youtube_dl.YoutubeDL(ydl_opts)
+for ii in range(5):
+    wks = sh.get_worksheet(ii)
+    values = wks.get_all_values()

-s3_client = boto3.client('s3',
-        region_name=os.getenv('DO_SPACES_REGION'),
-        endpoint_url='https://{}.digitaloceanspaces.com'.format(os.getenv('DO_SPACES_REGION')),
-        aws_access_key_id=os.getenv('DO_SPACES_KEY'),
-        aws_secret_access_key=os.getenv('DO_SPACES_SECRET'))
+    ydl_opts = {'outtmpl': 'tmp/%(id)s.%(ext)s', 'quiet': False}
+    ydl = youtube_dl.YoutubeDL(ydl_opts)

-for i in range(2, len(values)+1):
-    v = values[i-1]
+    s3_client = boto3.client('s3',
+            region_name=os.getenv('DO_SPACES_REGION'),
+            endpoint_url='https://{}.digitaloceanspaces.com'.format(os.getenv('DO_SPACES_REGION')),
+            aws_access_key_id=os.getenv('DO_SPACES_KEY'),
+            aws_secret_access_key=os.getenv('DO_SPACES_SECRET'))

-    if v[2] == "":
-        print(v[0])
+    for i in range(2, len(values)+1):
+        v = values[i-1]

-        try:
-            info = ydl.extract_info(v[0], download=False)
-
-            if 'entries' in info:
-                if len(info['entries']) > 1:
-                    raise Exception('ERROR: Cannot archive channels or pages with multiple videos')
-
-                filename = ydl.prepare_filename(info['entries'][0])
-            else:
-                filename = ydl.prepare_filename(info)
-            
-            print(filename)
-            key = filename.split('/')[1]
-            cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
+        if v[1] != "" and v[10] == "":
+            print(v[1])

            try:
-                s3_client.head_object(Bucket=os.getenv('DO_BUCKET'), Key=key)
+                info = ydl.extract_info(v[1], download=False)

-                # file exists
-
-                update = [{
-                    'range': 'C' + str(i),
-                    'values': [['already archived']]
-                }, {
-                    'range': 'D' + str(i),
-                    'values': [[cdn_url]]
-                }]
-
-                wks.batch_update(update)
-
-            except ClientError:
-                # Not found
-
-                # sometimes this results in a different filename, so do this again
-                info = ydl.extract_info(v[0], download=True)
                if 'entries' in info:
+                    if len(info['entries']) > 1:
+                        raise Exception('ERROR: Cannot archive channels or pages with multiple videos')
+
                    filename = ydl.prepare_filename(info['entries'][0])
                else:
                    filename = ydl.prepare_filename(info)
-
+                
                print(filename)
                key = filename.split('/')[1]
                cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)

-                # with open(filename, 'rb') as f:
-                #     s3_client.upload_fileobj(f, Bucket=os.getenv('DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
+                try:
+                    s3_client.head_object(Bucket=os.getenv('DO_BUCKET'), Key=key)

-                os.remove(filename)
+                    # file exists
+
+                    update = [{
+                        'range': 'K' + str(i),
+                        'values': [['already archived']]
+                    }, {
+                        'range': 'M' + str(i),
+                        'values': [[cdn_url]]
+                    }]
+
+                    wks.batch_update(update)
+
+                except ClientError:
+                    # Not found
+
+                    # sometimes this results in a different filename, so do this again
+                    info = ydl.extract_info(v[1], download=True)
+                    if 'entries' in info:
+                        filename = ydl.prepare_filename(info['entries'][0])
+                    else:
+                        filename = ydl.prepare_filename(info)
+
+
+                    if not os.path.exists(filename):
+                        filename = filename.split('.')[0] + '.mkv'
+
+                    print(filename)
+                    key = filename.split('/')[1]
+                    cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
+
+                    with open(filename, 'rb') as f:
+                        s3_client.upload_fileobj(f, Bucket=os.getenv('DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
+
+                    os.remove(filename)
+
+                    update = [{
+                        'range': 'K' + str(i),
+                        'values': [['successful']]
+                    }, {
+                        'range': 'L' + str(i),
+                        'values': [[datetime.datetime.now().isoformat()]]
+                    }, {
+                        'range': 'M' + str(i),
+                        'values': [[cdn_url]]
+                    }]
+
+                    wks.batch_update(update)
+            except:
+                t, value, traceback = sys.exc_info()

                update = [{
-                    'range': 'C' + str(i),
-                    'values': [['successful-desktop']]
+                    'range': 'K' + str(i),
+                    'values': [[str(value)]]
                }, {
-                    'range': 'B' + str(i),
+                    'range': 'L' + str(i),
                    'values': [[datetime.datetime.now().isoformat()]]
-                }, {
-                    'range': 'D' + str(i),
-                    'values': [[cdn_url]]
                }]

                wks.batch_update(update)
-        except:
-            t, value, traceback = sys.exc_info()
-
-            update = [{
-                'range': 'C' + str(i),
-                'values': [[str(value)]]
-            }, {
-                'range': 'B' + str(i),
-                'values': [[datetime.datetime.now().isoformat()]]
-            }]
-
-            wks.batch_update(update)