kopia lustrzana https://github.com/bugout-dev/moonstream
Added ethereum_whalewatch provider to API
rodzic
117cfa881e
commit
2886aed32e
|
@ -32,7 +32,7 @@ from bugout.app import Bugout
|
||||||
from bugout.data import BugoutResource
|
from bugout.data import BugoutResource
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from . import ethereum_blockchain
|
from . import bugout, ethereum_blockchain
|
||||||
from .. import data
|
from .. import data
|
||||||
from ..stream_queries import StreamQuery
|
from ..stream_queries import StreamQuery
|
||||||
from moonstream import stream_queries
|
from moonstream import stream_queries
|
||||||
|
@ -40,7 +40,10 @@ from moonstream import stream_queries
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logger.setLevel(logging.WARN)
|
logger.setLevel(logging.WARN)
|
||||||
|
|
||||||
event_providers: Dict[str, Any] = {ethereum_blockchain.event_type: ethereum_blockchain}
|
event_providers: Dict[str, Any] = {
|
||||||
|
ethereum_blockchain.event_type: ethereum_blockchain,
|
||||||
|
bugout.whalewatch_provider.event_type: bugout.whalewatch_provider,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_events(
|
def get_events(
|
||||||
|
|
|
@ -0,0 +1,266 @@
|
||||||
|
"""
|
||||||
|
Event providers powered by Bugout journals.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
from bugout.app import Bugout
|
||||||
|
from bugout.data import BugoutResource, BugoutSearchResult
|
||||||
|
from bugout.journal import SearchOrder
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from .. import data
|
||||||
|
from ..stream_queries import StreamQuery
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.setLevel(logging.WARN)
|
||||||
|
|
||||||
|
|
||||||
|
class BugoutEventProviderError(Exception):
|
||||||
|
"""
|
||||||
|
Catch-all error for BugoutEventProvider instances.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class BugoutEventProvider:
|
||||||
|
"""
|
||||||
|
Provides events (specified by a conjunction of tags) from a Bugout journal.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
event_type: str,
|
||||||
|
tags: Optional[List[str]] = None,
|
||||||
|
batch_size: int = 100,
|
||||||
|
timeout: float = 30.0,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
- event_type: Name of event this instance provides
|
||||||
|
- tags: Tags which define events that this provider works with
|
||||||
|
- batch_size: Number of events to read from journal at a time
|
||||||
|
- timeout: Request timeout for Bugout requests
|
||||||
|
"""
|
||||||
|
self.event_type = event_type
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.timeout = timeout
|
||||||
|
if tags is None:
|
||||||
|
tags = []
|
||||||
|
self.tags: List[str] = tags
|
||||||
|
self.query = [f"#{tag}" for tag in self.tags]
|
||||||
|
|
||||||
|
def validate_subscription(
|
||||||
|
self, subscription_resource_data: data.SubscriptionResourceData
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
This implementation is maximally permissive and returns True for all subscriptions as long as
|
||||||
|
their subscription_type_id is the configured event_type.
|
||||||
|
Subclasses of this provider can impose stricter criteria on submissions to the relevant event types.
|
||||||
|
"""
|
||||||
|
return subscription_resource_data.subscription_type_id == self.event_type
|
||||||
|
|
||||||
|
def entry_event(self, entry: BugoutSearchResult) -> data.Event:
|
||||||
|
"""
|
||||||
|
Load an event from a Bugout journal entry. Assumes that the entry content is a JSON string
|
||||||
|
with no additional markdown formatting.
|
||||||
|
"""
|
||||||
|
event_data = {}
|
||||||
|
if entry.content is not None:
|
||||||
|
event_data = json.loads(entry.content)
|
||||||
|
created_at = int(time.time())
|
||||||
|
return data.Event(
|
||||||
|
event_type=self.event_type,
|
||||||
|
event_timestamp=created_at,
|
||||||
|
event_data=event_data,
|
||||||
|
)
|
||||||
|
|
||||||
|
def parse_filters(
|
||||||
|
self, query: StreamQuery, user_subscriptions: Dict[str, List[BugoutResource]]
|
||||||
|
) -> Optional[List[str]]:
|
||||||
|
"""
|
||||||
|
Subclasses can provide additional constraints to apply to the journal search.
|
||||||
|
|
||||||
|
If None is returned, signals that no data should be returned from the provider at all.
|
||||||
|
"""
|
||||||
|
relevant_subscriptions = user_subscriptions.get(self.event_type)
|
||||||
|
if not relevant_subscriptions:
|
||||||
|
return None
|
||||||
|
return []
|
||||||
|
|
||||||
|
def get_events(
|
||||||
|
self,
|
||||||
|
db_session: Session,
|
||||||
|
bugout_client: Bugout,
|
||||||
|
data_journal_id: str,
|
||||||
|
data_access_token: str,
|
||||||
|
stream_boundary: data.StreamBoundary,
|
||||||
|
query: StreamQuery,
|
||||||
|
user_subscriptions: Dict[str, List[BugoutResource]],
|
||||||
|
) -> Optional[Tuple[data.StreamBoundary, List[data.Event]]]:
|
||||||
|
"""
|
||||||
|
Uses journal search endpoint to retrieve events for the given stream boundary and query constraints
|
||||||
|
from the connected journal.
|
||||||
|
"""
|
||||||
|
additional_constraints = self.parse_filters(query, user_subscriptions)
|
||||||
|
if additional_constraints is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
time_constraints: List[str] = []
|
||||||
|
if stream_boundary.start_time > 0:
|
||||||
|
operator = ">"
|
||||||
|
if stream_boundary.include_start:
|
||||||
|
operator = ">="
|
||||||
|
time_constraints.append(
|
||||||
|
f"created_at:{operator}{stream_boundary.start_time}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if stream_boundary.end_time is not None:
|
||||||
|
operator = "<"
|
||||||
|
if stream_boundary.include_end:
|
||||||
|
operator = "<="
|
||||||
|
time_constraints.append(f"created_at:{operator}{stream_boundary.end_time}")
|
||||||
|
|
||||||
|
final_query = " ".join(self.query + time_constraints + additional_constraints)
|
||||||
|
events: List[data.Event] = []
|
||||||
|
offset: Optional[int] = 0
|
||||||
|
while offset is not None:
|
||||||
|
search_results = bugout_client.search(
|
||||||
|
data_access_token,
|
||||||
|
data_journal_id,
|
||||||
|
final_query,
|
||||||
|
limit=self.batch_size,
|
||||||
|
offset=offset,
|
||||||
|
content=True,
|
||||||
|
timeout=self.timeout,
|
||||||
|
order=SearchOrder.DESCENDING,
|
||||||
|
)
|
||||||
|
events.extend([self.entry_event(entry) for entry in search_results.results])
|
||||||
|
offset = search_results.next_offset
|
||||||
|
|
||||||
|
return stream_boundary, events
|
||||||
|
|
||||||
|
def latest_events(
|
||||||
|
self,
|
||||||
|
db_session: Session,
|
||||||
|
bugout_client: Bugout,
|
||||||
|
data_journal_id: str,
|
||||||
|
data_access_token: str,
|
||||||
|
query: StreamQuery,
|
||||||
|
num_events: int,
|
||||||
|
user_subscriptions: Dict[str, List[BugoutResource]],
|
||||||
|
) -> Optional[List[data.Event]]:
|
||||||
|
"""
|
||||||
|
Gets the latest events corresponding to this provider from the given journal.
|
||||||
|
"""
|
||||||
|
additional_constraints = self.parse_filters(query, user_subscriptions)
|
||||||
|
if additional_constraints is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if num_events > self.batch_size:
|
||||||
|
raise BugoutEventProviderError(
|
||||||
|
f"You requested too many events: event_type={self.event_type}, num_events={num_events}, limit={self.batch_size}"
|
||||||
|
)
|
||||||
|
|
||||||
|
final_query = " ".join(self.query + additional_constraints)
|
||||||
|
search_results = bugout_client.search(
|
||||||
|
data_access_token,
|
||||||
|
data_journal_id,
|
||||||
|
final_query,
|
||||||
|
limit=num_events,
|
||||||
|
content=True,
|
||||||
|
timeout=self.timeout,
|
||||||
|
order=SearchOrder.DESCENDING,
|
||||||
|
)
|
||||||
|
return [self.entry_event(entry) for entry in search_results.results]
|
||||||
|
|
||||||
|
def next_event(
|
||||||
|
self,
|
||||||
|
db_session: Session,
|
||||||
|
bugout_client: Bugout,
|
||||||
|
data_journal_id: str,
|
||||||
|
data_access_token: str,
|
||||||
|
stream_boundary: data.StreamBoundary,
|
||||||
|
query: StreamQuery,
|
||||||
|
user_subscriptions: Dict[str, List[BugoutResource]],
|
||||||
|
) -> Optional[data.Event]:
|
||||||
|
"""
|
||||||
|
Get the earliest event that occurred after the time window represented by the given stream boundary.
|
||||||
|
"""
|
||||||
|
additional_constraints = self.parse_filters(query, user_subscriptions)
|
||||||
|
if additional_constraints is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if stream_boundary.end_time is None:
|
||||||
|
raise BugoutEventProviderError(
|
||||||
|
"Cannot return next event for a stream boundary which is current."
|
||||||
|
)
|
||||||
|
operator = ">="
|
||||||
|
if stream_boundary.include_end:
|
||||||
|
operator = ">"
|
||||||
|
additional_constraints.append(
|
||||||
|
f"created_at:{operator}{stream_boundary.end_time}"
|
||||||
|
)
|
||||||
|
|
||||||
|
final_query = " ".join(self.query + additional_constraints)
|
||||||
|
search_results = bugout_client.search(
|
||||||
|
data_access_token,
|
||||||
|
data_journal_id,
|
||||||
|
final_query,
|
||||||
|
limit=1,
|
||||||
|
content=True,
|
||||||
|
timeout=self.timeout,
|
||||||
|
order=SearchOrder.ASCENDING,
|
||||||
|
)
|
||||||
|
if not search_results.results:
|
||||||
|
return None
|
||||||
|
return search_results.results[0]
|
||||||
|
|
||||||
|
def previous_event(
|
||||||
|
self,
|
||||||
|
db_session: Session,
|
||||||
|
bugout_client: Bugout,
|
||||||
|
data_journal_id: str,
|
||||||
|
data_access_token: str,
|
||||||
|
stream_boundary: data.StreamBoundary,
|
||||||
|
query: StreamQuery,
|
||||||
|
user_subscriptions: Dict[str, List[BugoutResource]],
|
||||||
|
) -> Optional[data.Event]:
|
||||||
|
"""
|
||||||
|
Get the latest event that occurred before the time window represented by the given stream boundary.
|
||||||
|
"""
|
||||||
|
additional_constraints = self.parse_filters(query, user_subscriptions)
|
||||||
|
if additional_constraints is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if stream_boundary.start_time == 0:
|
||||||
|
raise BugoutEventProviderError(
|
||||||
|
"Cannot return previous event for a stream boundary starting at the beginning of time."
|
||||||
|
)
|
||||||
|
operator = "<="
|
||||||
|
if stream_boundary.include_start:
|
||||||
|
operator = "<"
|
||||||
|
additional_constraints.append(
|
||||||
|
f"created_at:{operator}{stream_boundary.start_time}"
|
||||||
|
)
|
||||||
|
|
||||||
|
final_query = " ".join(self.query + additional_constraints)
|
||||||
|
search_results = bugout_client.search(
|
||||||
|
data_access_token,
|
||||||
|
data_journal_id,
|
||||||
|
final_query,
|
||||||
|
limit=1,
|
||||||
|
content=True,
|
||||||
|
timeout=self.timeout,
|
||||||
|
order=SearchOrder.DESCENDING,
|
||||||
|
)
|
||||||
|
if not search_results.results:
|
||||||
|
return None
|
||||||
|
return search_results.results[0]
|
||||||
|
|
||||||
|
|
||||||
|
whalewatch_provider = BugoutEventProvider(
|
||||||
|
event_type="ethereum_whalewatch", tags=["crawl_type:ethereum_trending"]
|
||||||
|
)
|
|
@ -133,6 +133,7 @@ def parse_filters(
|
||||||
|
|
||||||
if not requires_ethereum_blockchain_data:
|
if not requires_ethereum_blockchain_data:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return parsed_filters
|
return parsed_filters
|
||||||
|
|
||||||
|
|
||||||
|
@ -232,7 +233,6 @@ def get_events(
|
||||||
|
|
||||||
If the query does not require any data from this provider, returns None.
|
If the query does not require any data from this provider, returns None.
|
||||||
"""
|
"""
|
||||||
logger.warn("WHAT THE HELL PARAKEET")
|
|
||||||
parsed_filters = parse_filters(query, user_subscriptions)
|
parsed_filters = parse_filters(query, user_subscriptions)
|
||||||
if parsed_filters is None:
|
if parsed_filters is None:
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -164,7 +164,7 @@ async def latest_events_handler(
|
||||||
query,
|
query,
|
||||||
1,
|
1,
|
||||||
user_subscriptions,
|
user_subscriptions,
|
||||||
result_timeout=600.0,
|
result_timeout=6.0,
|
||||||
raise_on_error=True,
|
raise_on_error=True,
|
||||||
sort_events=True,
|
sort_events=True,
|
||||||
)
|
)
|
||||||
|
@ -212,7 +212,7 @@ async def next_event_handler(
|
||||||
stream_boundary,
|
stream_boundary,
|
||||||
query,
|
query,
|
||||||
user_subscriptions,
|
user_subscriptions,
|
||||||
result_timeout=600.0,
|
result_timeout=6.0,
|
||||||
raise_on_error=True,
|
raise_on_error=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -260,7 +260,7 @@ async def previous_event_handler(
|
||||||
stream_boundary,
|
stream_boundary,
|
||||||
query,
|
query,
|
||||||
user_subscriptions,
|
user_subscriptions,
|
||||||
result_timeout=600.0,
|
result_timeout=6.0,
|
||||||
raise_on_error=True,
|
raise_on_error=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ asgiref==3.4.1
|
||||||
black==21.7b0
|
black==21.7b0
|
||||||
boto3==1.18.1
|
boto3==1.18.1
|
||||||
botocore==1.21.1
|
botocore==1.21.1
|
||||||
bugout==0.1.16
|
bugout==0.1.17
|
||||||
certifi==2021.5.30
|
certifi==2021.5.30
|
||||||
charset-normalizer==2.0.3
|
charset-normalizer==2.0.3
|
||||||
click==8.0.1
|
click==8.0.1
|
||||||
|
|
|
@ -10,7 +10,7 @@ setup(
|
||||||
name="moonstream",
|
name="moonstream",
|
||||||
version=MOONSTREAM_VERSION,
|
version=MOONSTREAM_VERSION,
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
install_requires=["boto3", "bugout >= 0.1.16", "fastapi", "uvicorn"],
|
install_requires=["boto3", "bugout >= 0.1.17", "fastapi", "uvicorn"],
|
||||||
extras_require={
|
extras_require={
|
||||||
"dev": ["black", "mypy"],
|
"dev": ["black", "mypy"],
|
||||||
"distribute": ["setuptools", "twine", "wheel"],
|
"distribute": ["setuptools", "twine", "wheel"],
|
||||||
|
|
Ładowanie…
Reference in New Issue