moonstream/moonstreamapi/moonstreamapi/providers/__init__.py

320 wiersze
11 KiB
Python

"""
Maps provider interface over all available providers.
Available providers are exposed through the `event_providers` dictionary.
Exposes all the standard event provider methods:
- get_events
- latest_events
- next_event
- previous_event
In addition to their standard arguments, adds the following arguments to each of these methods:
- `max_threads` - Since the mapper retrieves events from each providers in the background, this allows
the caller to specify how many threads they want to make available to the background executor. This
can be set to None, in which case it uses the Python default behaviour for the max_workers=None to
concurrent.futures.ThreadPoolExecutor. (Default: None)
- result_timeout - A float representing the number of seconds to wait for the background workers to get
events from the individual providers. (Default: 30.0)
- raise_on_error - Set this to True to raise an error if any of the individual event providers experiences an
error fulfilling its method. If set to False, ignores event providers which failed and still tries to
return data to the caller. (Default: False)
- sort_events - Set this to True to sort the events that come in from different providers. Set it to False
if the order does not matter and you would rather emphasize speed. Only available for method which involve
lists of events. (Default: True)
"""
import logging
from concurrent.futures import Future, ThreadPoolExecutor
from typing import Any, Dict, List, Optional, Tuple
from bugout.app import Bugout
from bugout.data import BugoutResource
from sqlalchemy.orm import Session
from .. import data
from ..stream_queries import StreamQuery
from . import bugout, moonworm_provider, transactions
logger = logging.getLogger(__name__)
logger.setLevel(logging.WARN)
class ReceivingEventsException(Exception):
"""
Raised when error occurs during receiving events from provider.
"""
event_providers: Dict[str, Any] = {
moonworm_provider.EthereumMoonwormProvider.event_type: moonworm_provider.EthereumMoonwormProvider,
moonworm_provider.PolygonMoonwormProvider.event_type: moonworm_provider.PolygonMoonwormProvider,
moonworm_provider.MumbaiMoonwormProvider.event_type: moonworm_provider.MumbaiMoonwormProvider,
moonworm_provider.XDaiMoonwormProvider.event_type: moonworm_provider.XDaiMoonwormProvider,
transactions.EthereumTransactions.event_type: transactions.EthereumTransactions,
transactions.PolygonTransactions.event_type: transactions.PolygonTransactions,
transactions.MumbaiTransactions.event_type: transactions.MumbaiTransactions,
transactions.XDaiTransactions.event_type: transactions.XDaiTransactions,
bugout.polygon_whalewatch_provider.event_type: bugout.polygon_whalewatch_provider,
bugout.ethereum_txpool_provider.event_type: bugout.ethereum_txpool_provider,
bugout.ethereum_whalewatch_provider.event_type: bugout.ethereum_whalewatch_provider,
bugout.ethereum_txpool_provider.event_type: bugout.ethereum_txpool_provider,
}
def get_events(
db_session: Session,
bugout_client: Bugout,
data_journal_id: str,
data_access_token: str,
stream_boundary: data.StreamBoundary,
query: StreamQuery,
user_subscriptions: Dict[str, List[BugoutResource]],
max_threads: Optional[int] = None,
result_timeout: float = 30.0,
raise_on_error: bool = False,
sort_events: bool = True,
) -> Tuple[data.StreamBoundary, List[data.Event]]:
"""
Gets events from all providers and sends them back with the stream boundary.
"""
futures: Dict[str, Future] = {}
with ThreadPoolExecutor(
max_workers=max_threads, thread_name_prefix="event_providers_"
) as executor:
# Filter our not queried event_types
event_providers_filtered = {
key: value
for (key, value) in event_providers.items()
if value.event_type in query.subscription_types
}
for provider_name, provider in event_providers_filtered.items():
futures[provider_name] = executor.submit(
provider.get_events,
db_session,
bugout_client,
data_journal_id,
data_access_token,
stream_boundary,
query,
user_subscriptions,
)
results: Dict[str, Tuple[data.StreamBoundary, List[data.Event]]] = {}
for provider_name, future in futures.items():
try:
result = future.result(timeout=result_timeout)
if result is not None:
results[provider_name] = result
except Exception as e:
if not raise_on_error:
logger.warn(
f"Error receiving events from provider: {provider_name}:\n{repr(e)}"
)
else:
raise ReceivingEventsException(e)
stream_boundary = [boundary for boundary, _ in results.values()][0]
events = [event for _, event_list in results.values() for event in event_list]
if sort_events:
# If stream_boundary time was reversed, so do not reverse by timestamp,
# it is already in correct oreder
events.sort(
key=lambda event: event.event_timestamp,
reverse=not stream_boundary.reversed_time,
)
return (stream_boundary, events)
def latest_events(
db_session: Session,
bugout_client: Bugout,
data_journal_id: str,
data_access_token: str,
query: StreamQuery,
num_events: int,
user_subscriptions: Dict[str, List[BugoutResource]],
max_threads: Optional[int] = None,
result_timeout: float = 30.0,
raise_on_error: bool = False,
sort_events: bool = True,
) -> List[data.Event]:
"""
Gets num_events most recent events from all providers, compiles them into a single list, and
returns them to the caller.
NOTE: Unlike simple event providers, the interpretation of num_events here is that we return num_event
events per individual event provider!
"""
futures: Dict[str, Future] = {}
with ThreadPoolExecutor(
max_workers=max_threads, thread_name_prefix="event_providers_"
) as executor:
# Filter our not queried event_types
event_providers_filtered = {
key: value
for (key, value) in event_providers.items()
if value.event_type in query.subscription_types
}
for provider_name, provider in event_providers_filtered.items():
futures[provider_name] = executor.submit(
provider.latest_events,
db_session,
bugout_client,
data_journal_id,
data_access_token,
query,
num_events,
user_subscriptions,
)
results: Dict[str, List[data.Event]] = {}
for provider_name, future in futures.items():
try:
result = future.result(timeout=result_timeout)
if result is not None:
results[provider_name] = result
except Exception as e:
if not raise_on_error:
logger.warn(
f"Error receiving events from provider: {provider_name}:\n{repr(e)}"
)
else:
raise ReceivingEventsException(e)
events = [event for event_list in results.values() for event in event_list]
if sort_events:
events.sort(key=lambda event: event.event_timestamp, reverse=True)
return events
def next_event(
db_session: Session,
bugout_client: Bugout,
data_journal_id: str,
data_access_token: str,
stream_boundary: data.StreamBoundary,
query: StreamQuery,
user_subscriptions: Dict[str, List[BugoutResource]],
max_threads: Optional[int] = None,
result_timeout: float = 30.0,
raise_on_error: bool = False,
) -> Optional[data.Event]:
"""
Get earliest event after stream boundary across all available providers.
"""
futures: Dict[str, Future] = {}
with ThreadPoolExecutor(
max_workers=max_threads, thread_name_prefix="event_providers_"
) as executor:
# Filter our not queried event_types
event_providers_filtered = {
key: value
for (key, value) in event_providers.items()
if value.event_type in query.subscription_types
}
for provider_name, provider in event_providers_filtered.items():
futures[provider_name] = executor.submit(
provider.next_event,
db_session,
bugout_client,
data_journal_id,
data_access_token,
stream_boundary,
query,
user_subscriptions,
)
results: Dict[str, data.Event] = {}
for provider_name, future in futures.items():
try:
result = future.result(timeout=result_timeout)
if result is not None:
results[provider_name] = result
except Exception as e:
if not raise_on_error:
logger.warn(
f"Error receiving events from provider: {provider_name}:\n{repr(e)}"
)
else:
raise ReceivingEventsException(e)
event: Optional[data.Event] = None
for candidate in results.values():
if event is None:
event = candidate
elif event.event_timestamp > candidate.event_timestamp:
event = candidate
return event
def previous_event(
db_session: Session,
bugout_client: Bugout,
data_journal_id: str,
data_access_token: str,
stream_boundary: data.StreamBoundary,
query: StreamQuery,
user_subscriptions: Dict[str, List[BugoutResource]],
max_threads: Optional[int] = None,
result_timeout: float = 30.0,
raise_on_error: bool = False,
) -> Optional[data.Event]:
"""
Get latest event before stream boundary across all available providers.
"""
futures: Dict[str, Future] = {}
with ThreadPoolExecutor(
max_workers=max_threads, thread_name_prefix="event_providers_"
) as executor:
# Filter our not queried event_types
event_providers_filtered = {
key: value
for (key, value) in event_providers.items()
if value.event_type in query.subscription_types
}
for provider_name, provider in event_providers_filtered.items():
futures[provider_name] = executor.submit(
provider.previous_event,
db_session,
bugout_client,
data_journal_id,
data_access_token,
stream_boundary,
query,
user_subscriptions,
)
results: Dict[str, data.Event] = {}
for provider_name, future in futures.items():
try:
result = future.result(timeout=result_timeout)
if result is not None:
results[provider_name] = result
except Exception as e:
if not raise_on_error:
logger.warn(
f"Error receiving events from provider: {provider_name}:\n{repr(e)}"
)
else:
raise ReceivingEventsException(e)
event: Optional[data.Event] = None
for candidate in results.values():
if event is None:
event = candidate
elif event.event_timestamp < candidate.event_timestamp:
event = candidate
return event