diff --git a/scraper/management/commands/scrape.py b/scraper/management/commands/scrape.py index 3f52fa6..e5b1c20 100644 --- a/scraper/management/commands/scrape.py +++ b/scraper/management/commands/scrape.py @@ -27,7 +27,7 @@ from scraper.management.commands._util import require_lock, InvalidResponseError # TODO: use the /api/v1/server/followers and /api/v1/server/following endpoints in peertube instances SEED = 'mastodon.social' -TIMEOUT = 20 +TIMEOUT = 1 class Command(BaseCommand): @@ -51,9 +51,10 @@ class Command(BaseCommand): """Collect connected instances""" url = 'https://' + instance_name + '/api/v1/instance/peers' response = requests.get(url, timeout=TIMEOUT) - if response.status_code != 200: + json = response.json() + if response.status_code != 200 or not isinstance(json, list): raise InvalidResponseError("Could not get peers for {}".format(instance_name)) - return response.json() + return json def process_instance(self, instance_name: str): """Given an instance, get all the data we're interested in""" @@ -63,6 +64,9 @@ class Command(BaseCommand): data['instance'] = instance_name data['info'] = self.get_instance_info(instance_name) data['peers'] = [peer for peer in self.get_instance_peers(instance_name) if peer] # get rid of null peers + if not data['info'] and not data['peers']: + # We got a response from the instance, but it didn't have any of the information we were expecting. + raise InvalidResponseError data['status'] = 'success' return data except (InvalidResponseError, diff --git a/scraper/models.py b/scraper/models.py index 42b393c..e06120f 100644 --- a/scraper/models.py +++ b/scraper/models.py @@ -18,5 +18,5 @@ class InstanceStats(models.Model): domain_count = models.IntegerField(blank=True, null=True) status_count = models.IntegerField(blank=True, null=True) user_count = models.IntegerField(blank=True, null=True) - version = models.CharField(max_length=1000, blank=True) + version = models.CharField(max_length=1000, blank=True) # In Django CharField is never stored as NULL in the db status = models.CharField(max_length=100)