From 8afabf9f0fb801307a30e940853477528edeae8e Mon Sep 17 00:00:00 2001 From: Andrey Date: Wed, 14 Sep 2022 20:46:28 +0300 Subject: [PATCH] Add update logic. --- crawlers/mooncrawl/mooncrawl/data.py | 4 ++- .../mooncrawl/metadata_crawler/cli.py | 27 ++++++++++++---- .../mooncrawl/metadata_crawler/db.py | 32 +++++++++++++++++-- 3 files changed, 53 insertions(+), 10 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/data.py b/crawlers/mooncrawl/mooncrawl/data.py index 0e3da95d..9757033a 100644 --- a/crawlers/mooncrawl/mooncrawl/data.py +++ b/crawlers/mooncrawl/mooncrawl/data.py @@ -1,7 +1,8 @@ from dataclasses import dataclass from datetime import datetime from enum import Enum -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional +from uuid import UUID from pydantic import BaseModel, Field @@ -57,3 +58,4 @@ class TokenURIs(BaseModel): block_number: int block_timestamp: str address: str + metadata_id: Optional[UUID] = None diff --git a/crawlers/mooncrawl/mooncrawl/metadata_crawler/cli.py b/crawlers/mooncrawl/mooncrawl/metadata_crawler/cli.py index 75f3767d..b51a130b 100644 --- a/crawlers/mooncrawl/mooncrawl/metadata_crawler/cli.py +++ b/crawlers/mooncrawl/mooncrawl/metadata_crawler/cli.py @@ -20,6 +20,7 @@ from .db import ( get_uri_addresses, get_not_updated_metadata_for_address, metadata_to_label, + update_metadata, ) from ..settings import ( MOONSTREAM_METADATA_CRAWLER_THREADS, @@ -121,12 +122,26 @@ def parse_metadata(blockchain_type: AvailableBlockchainType, batch_size: int): for result in executor.map( crawl_uri, [request for request in requests_chunk] ): - db_session.add( - metadata_to_label( - metadata=result[0], - blockchain_type=blockchain_type, - token_uri_data=result[1], - ) + + metadata = result[0] + token_uri_data = result[1] + label = metadata_to_label( + metadata=metadata, + blockchain_type=blockchain_type, + token_uri_data=token_uri_data, + ) + + if token_uri_data.metadata_id is None: + + db_session.add(label) + writed_labels += 1 + continue + + update_metadata( + db_session, + blockchain_type, + token_uri_data.metadata_id, + label, ) writed_labels += 1 diff --git a/crawlers/mooncrawl/mooncrawl/metadata_crawler/db.py b/crawlers/mooncrawl/mooncrawl/metadata_crawler/db.py index d21ed374..9aaf18d9 100644 --- a/crawlers/mooncrawl/mooncrawl/metadata_crawler/db.py +++ b/crawlers/mooncrawl/mooncrawl/metadata_crawler/db.py @@ -117,7 +117,8 @@ def get_not_updated_metadata_for_address( SELECT DISTINCT ON((label_data ->> 'token_id') :: int) (label_data ->> 'token_id') :: text as token_id, label_data ->>'token_uri' as token_uri, - block_number + block_number, + id from {} where @@ -135,7 +136,8 @@ def get_not_updated_metadata_for_address( current_tokens_uri.block_timestamp as block_timestamp, current_tokens_uri.address as address, tokens_metadata.block_number as metadata_block_number, - tokens_metadata.token_uri as metadata_token_uri + tokens_metadata.token_uri as metadata_token_uri, + tokens_metadata.id as metadata_id from current_tokens_uri left JOIN tokens_metadata ON current_tokens_uri.token_id = tokens_metadata.token_id @@ -145,7 +147,8 @@ def get_not_updated_metadata_for_address( state_token_uri, view_state_block_number, block_timestamp, - address + address, + metadata_id from tokens_state where @@ -167,8 +170,31 @@ def get_not_updated_metadata_for_address( block_number=data[2], block_timestamp=data[3], address=data[4], + metadata_id=data[5], ) for data in current_metadata ] return results + + +def update_metadata( + db_session: Session, + blockchain_type: AvailableBlockchainType, + id: Dict[str, Any], + label: Any, +) -> None: + """ + Update metadata. + """ + + label_model = get_label_model(blockchain_type) + + db_session.query(label_model).filter(label_model.id == id).update( + { + "label_data": label.label_data, + "block_number": label.block_number, + "block_timestamp": label.block_timestamp, + }, + synchronize_session=False, + )