diff --git a/apiv1/admin.py b/apiv1/admin.py deleted file mode 100644 index 8c38f3f..0000000 --- a/apiv1/admin.py +++ /dev/null @@ -1,3 +0,0 @@ -from django.contrib import admin - -# Register your models here. diff --git a/apiv1/models.py b/apiv1/models.py deleted file mode 100644 index 71a8362..0000000 --- a/apiv1/models.py +++ /dev/null @@ -1,3 +0,0 @@ -from django.db import models - -# Create your models here. diff --git a/apiv1/serializers.py b/apiv1/serializers.py index 86ed8ad..5986250 100644 --- a/apiv1/serializers.py +++ b/apiv1/serializers.py @@ -1,12 +1,6 @@ from rest_framework import serializers from collections import OrderedDict -from scraper.models import Instance, InstanceStats - - -class InstanceStatsSerializer(serializers.ModelSerializer): - class Meta: - model = InstanceStats - exclude = ('id', 'instance', 'status') +from scraper.models import Instance class InstanceListSerializer(serializers.ModelSerializer): @@ -25,8 +19,7 @@ class InstanceListSerializer(serializers.ModelSerializer): class InstanceDetailSerializer(serializers.ModelSerializer): peers = InstanceListSerializer(many=True, read_only=True) - stats = InstanceStatsSerializer(many=True, read_only=True) class Meta: model = Instance - fields = ('name', 'stats', 'peers') + fields = '__all__' diff --git a/apiv1/tests.py b/apiv1/tests.py deleted file mode 100644 index 7ce503c..0000000 --- a/apiv1/tests.py +++ /dev/null @@ -1,3 +0,0 @@ -from django.test import TestCase - -# Create your tests here. diff --git a/scraper/management/commands/scrape.py b/scraper/management/commands/scrape.py index e5b1c20..d7fdb60 100644 --- a/scraper/management/commands/scrape.py +++ b/scraper/management/commands/scrape.py @@ -10,7 +10,7 @@ import time from datetime import datetime from django.core.management.base import BaseCommand from django.db import transaction -from scraper.models import Instance, InstanceStats +from scraper.models import Instance from scraper.management.commands._util import require_lock, InvalidResponseError, get_key # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -80,29 +80,19 @@ class Command(BaseCommand): @require_lock(Instance, 'ACCESS EXCLUSIVE') def save_data(self, data): """Save data""" - user_count = get_key(data, ['info', 'stats', 'user_count']) - if user_count: - instance, _ = Instance.objects.update_or_create( - name=get_key(data, ['instance']), - defaults={'user_count': user_count}, - ) - else: - instance, _ = Instance.objects.get_or_create(name=get_key(data, ['instance'])) - if data['status'] == 'success': - # Save stats - stats = InstanceStats( - instance=instance, - domain_count=get_key(data, ['info', 'stats', 'domain_count']), - status_count=get_key(data, ['info', 'stats', 'status_count']), - user_count=get_key(data, ['info', 'stats', 'user_count']), - version=get_key(data, ['info', 'version']), - status=get_key(data, ['status']), - ) - stats.save() + defaults = dict() + defaults['domain_count'] = get_key(data, ['info', 'stats', 'domain_count']) or None + defaults['status_count'] = get_key(data, ['info', 'stats', 'status_count']) or None + defaults['user_count'] = get_key(data, ['info', 'stats', 'user_count']) or None + defaults['version'] = get_key(data, ['info', 'version']) + defaults['status'] = get_key(data, ['status']) + instance, _ = Instance.objects.update_or_create( + name=get_key(data, ['instance']), + defaults=defaults, + ) + if defaults['status'] == 'success' and data['peers']: # Save peers # TODO: make this shared amongst threads so the database only needs to be queried once - if not data['peers']: - return existing_instance_ids = Instance.objects.values_list('name', flat=True) existing_peers = Instance.objects.filter(name__in=existing_instance_ids) new_peer_ids = [peer for peer in data['peers'] if peer not in existing_instance_ids] @@ -110,12 +100,6 @@ class Command(BaseCommand): new_peers = Instance.objects.bulk_create([Instance(name=peer) for peer in new_peer_ids]) instance.peers.set(new_peers) instance.peers.set(existing_peers) - else: - stats = InstanceStats( - instance=instance, - status=get_key(data, ['status']) - ) - stats.save() self.stdout.write("{} - Saved {}".format(datetime.now().isoformat(), data['instance'])) def worker(self, queue: multiprocessing.JoinableQueue): diff --git a/scraper/migrations/0001_initial.py b/scraper/migrations/0001_initial.py index ced10ac..035f56a 100644 --- a/scraper/migrations/0001_initial.py +++ b/scraper/migrations/0001_initial.py @@ -1,7 +1,6 @@ -# Generated by Django 2.1 on 2018-08-26 17:26 +# Generated by Django 2.1 on 2018-08-29 17:37 from django.db import migrations, models -import django.db.models.deletion class Migration(migrations.Migration): @@ -16,20 +15,14 @@ class Migration(migrations.Migration): name='Instance', fields=[ ('name', models.CharField(max_length=200, primary_key=True, serialize=False)), - ('peers', models.ManyToManyField(to='scraper.Instance')), - ], - ), - migrations.CreateModel( - name='InstanceStats', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('timestamp', models.DateTimeField(auto_now_add=True)), - ('num_peers', models.IntegerField(blank=True, null=True)), - ('num_statuses', models.IntegerField(blank=True, null=True)), - ('num_users', models.IntegerField(blank=True, null=True)), + ('domain_count', models.IntegerField(blank=True, null=True)), + ('status_count', models.IntegerField(blank=True, null=True)), + ('user_count', models.IntegerField(blank=True, null=True)), ('version', models.CharField(blank=True, max_length=1000)), ('status', models.CharField(max_length=100)), - ('instance', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='scraper.Instance')), + ('first_seen', models.DateTimeField(auto_now_add=True)), + ('last_updated', models.DateTimeField(auto_now=True)), + ('peers', models.ManyToManyField(related_name='_instance_peers_+', to='scraper.Instance')), ], ), ] diff --git a/scraper/migrations/0002_auto_20180826_2201.py b/scraper/migrations/0002_auto_20180826_2201.py deleted file mode 100644 index a84f4b7..0000000 --- a/scraper/migrations/0002_auto_20180826_2201.py +++ /dev/null @@ -1,19 +0,0 @@ -# Generated by Django 2.1 on 2018-08-26 22:01 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ('scraper', '0001_initial'), - ] - - operations = [ - migrations.AlterField( - model_name='instancestats', - name='instance', - field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='stats', to='scraper.Instance'), - ), - ] diff --git a/scraper/migrations/0003_auto_20180828_2206.py b/scraper/migrations/0003_auto_20180828_2206.py deleted file mode 100644 index d8c9369..0000000 --- a/scraper/migrations/0003_auto_20180828_2206.py +++ /dev/null @@ -1,33 +0,0 @@ -# Generated by Django 2.1 on 2018-08-28 22:06 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('scraper', '0002_auto_20180826_2201'), - ] - - operations = [ - migrations.RenameField( - model_name='instancestats', - old_name='num_peers', - new_name='domain_count', - ), - migrations.RenameField( - model_name='instancestats', - old_name='num_statuses', - new_name='status_count', - ), - migrations.RenameField( - model_name='instancestats', - old_name='num_users', - new_name='user_count', - ), - migrations.AddField( - model_name='instance', - name='user_count', - field=models.IntegerField(blank=True, null=True), - ), - ] diff --git a/scraper/models.py b/scraper/models.py index e06120f..e19f1c7 100644 --- a/scraper/models.py +++ b/scraper/models.py @@ -2,21 +2,24 @@ from django.db import models class Instance(models.Model): + # Primary key name = models.CharField(max_length=200, primary_key=True) - peers = models.ManyToManyField('self', symmetrical=False) - user_count = models.IntegerField(blank=True, null=True) - -class InstanceStats(models.Model): - # TODO: collect everything the API exposes - timestamp = models.DateTimeField(auto_now_add=True) - instance = models.ForeignKey( - Instance, - on_delete=models.CASCADE, - related_name='stats', - ) + # Details domain_count = models.IntegerField(blank=True, null=True) status_count = models.IntegerField(blank=True, null=True) user_count = models.IntegerField(blank=True, null=True) version = models.CharField(max_length=1000, blank=True) # In Django CharField is never stored as NULL in the db status = models.CharField(max_length=100) + + # Foreign keys + # The peers endpoint returns a "list of all domain names known to this instance" + # (https://github.com/tootsuite/mastodon/pull/6125) + # In other words, an asymmetrical relationship here doesn't make much sense. If we one day can get a list of + # instances that the instance actively follows (i.e. knows and not suspended), it's worth adding an + # asymmetrical relation. + peers = models.ManyToManyField('self', symmetrical=True) + + # Automatic fields + first_seen = models.DateTimeField(auto_now_add=True) + last_updated = models.DateTimeField(auto_now=True)