kopia lustrzana https://github.com/saubury/mastodon-stream
further cleanup
rodzic
6d80c5897e
commit
32ad2e6ea6
|
@ -1,5 +1,5 @@
|
||||||
---
|
---
|
||||||
version: '3'
|
version: '3.9'
|
||||||
services:
|
services:
|
||||||
zookeeper:
|
zookeeper:
|
||||||
image: confluentinc/cp-zookeeper:${CONF_VER}
|
image: confluentinc/cp-zookeeper:${CONF_VER}
|
||||||
|
@ -13,7 +13,7 @@ services:
|
||||||
start_period: 15s
|
start_period: 15s
|
||||||
interval: 5s
|
interval: 5s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 10
|
retries: 30
|
||||||
environment:
|
environment:
|
||||||
ZOOKEEPER_CLIENT_PORT: 2181
|
ZOOKEEPER_CLIENT_PORT: 2181
|
||||||
ZOOKEEPER_TICK_TIME: 2000
|
ZOOKEEPER_TICK_TIME: 2000
|
||||||
|
@ -34,7 +34,7 @@ services:
|
||||||
start_period: 15s
|
start_period: 15s
|
||||||
interval: 5s
|
interval: 5s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 20
|
retries: 30
|
||||||
environment:
|
environment:
|
||||||
KAFKA_BROKER_ID: 1
|
KAFKA_BROKER_ID: 1
|
||||||
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
|
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
|
||||||
|
@ -67,18 +67,16 @@ services:
|
||||||
- "8081:8081"
|
- "8081:8081"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: nc -z localhost 8081 || exit -1
|
test: nc -z localhost 8081 || exit -1
|
||||||
start_period: 15s
|
start_period: 30s
|
||||||
interval: 5s
|
interval: 20s
|
||||||
timeout: 10s
|
timeout: 60s
|
||||||
retries: 10
|
retries: 300
|
||||||
environment:
|
environment:
|
||||||
SCHEMA_REGISTRY_HOST_NAME: schema-registry
|
SCHEMA_REGISTRY_HOST_NAME: schema-registry
|
||||||
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'broker:29092'
|
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'broker:29092'
|
||||||
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081
|
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081
|
||||||
|
|
||||||
connect:
|
connect:
|
||||||
# image: cnfldemos/cp-server-connect-datagen:0.5.0-6.2.0
|
|
||||||
# image: confluentinc/cp-server-connect:${CONF_VER}
|
|
||||||
build:
|
build:
|
||||||
context: ./kafka-connect
|
context: ./kafka-connect
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
|
@ -95,7 +93,7 @@ services:
|
||||||
start_period: 15s
|
start_period: 15s
|
||||||
interval: 5s
|
interval: 5s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 10
|
retries: 30
|
||||||
environment:
|
environment:
|
||||||
CONNECT_BOOTSTRAP_SERVERS: 'broker:29092'
|
CONNECT_BOOTSTRAP_SERVERS: 'broker:29092'
|
||||||
CONNECT_REST_ADVERTISED_HOST_NAME: connect
|
CONNECT_REST_ADVERTISED_HOST_NAME: connect
|
||||||
|
@ -130,7 +128,6 @@ services:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
connect:
|
connect:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
# - ksqldb-server
|
|
||||||
ports:
|
ports:
|
||||||
- "9021:9021"
|
- "9021:9021"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
|
@ -138,7 +135,7 @@ services:
|
||||||
start_period: 15s
|
start_period: 15s
|
||||||
interval: 5s
|
interval: 5s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 10
|
retries: 30
|
||||||
environment:
|
environment:
|
||||||
CONTROL_CENTER_BOOTSTRAP_SERVERS: 'broker:29092'
|
CONTROL_CENTER_BOOTSTRAP_SERVERS: 'broker:29092'
|
||||||
CONTROL_CENTER_CONNECT_CONNECT-DEFAULT_CLUSTER: 'http://connect:8083'
|
CONTROL_CENTER_CONNECT_CONNECT-DEFAULT_CLUSTER: 'http://connect:8083'
|
||||||
|
@ -181,76 +178,23 @@ services:
|
||||||
exit 0;
|
exit 0;
|
||||||
"
|
"
|
||||||
|
|
||||||
# jupyter:
|
jupyter:
|
||||||
# image: jupyter/scipy-notebook
|
image: jupyter/scipy-notebook
|
||||||
# ports:
|
ports:
|
||||||
# - "8888:8888"
|
- "8888:8888"
|
||||||
# volumes:
|
healthcheck:
|
||||||
# - ./notebooks:/home/jovyan/
|
test: nc -z localhost 8888 || exit -1
|
||||||
# environment:
|
start_period: 15s
|
||||||
# JUPYTER_ENABLE_LAB: "yes"
|
interval: 5s
|
||||||
# JUPYTER_RUNTIME_DIR: "/tmp"
|
timeout: 10s
|
||||||
# command: "start-notebook.sh --NotebookApp.token='' --NotebookApp.password=''"
|
retries: 10
|
||||||
|
volumes:
|
||||||
|
- ./notebooks:/home/jovyan/
|
||||||
|
user: root
|
||||||
# ksqldb-server:
|
environment:
|
||||||
# image: confluentinc/cp-ksqldb-server:${CONF_VER}
|
JUPYTER_ENABLE_LAB: "yes"
|
||||||
# platform: linux/amd64
|
JUPYTER_RUNTIME_DIR: "/tmp"
|
||||||
# hostname: ksqldb-server
|
NB_USER: simonaubury
|
||||||
# container_name: ksqldb-server
|
CHOWN_HOME: 'yes'
|
||||||
# depends_on:
|
CHOWN_HOME_OPTS: '-R'
|
||||||
# - broker
|
command: "start-notebook.sh --allow-root --ip=0.0.0.0 --NotebookApp.token='' --NotebookApp.password=''"
|
||||||
# - connect
|
|
||||||
# ports:
|
|
||||||
# - "8088:8088"
|
|
||||||
# environment:
|
|
||||||
# KSQL_CONFIG_DIR: "/etc/ksql"
|
|
||||||
# KSQL_BOOTSTRAP_SERVERS: "broker:29092"
|
|
||||||
# KSQL_HOST_NAME: ksqldb-server
|
|
||||||
# KSQL_LISTENERS: "http://0.0.0.0:8088"
|
|
||||||
# KSQL_CACHE_MAX_BYTES_BUFFERING: 0
|
|
||||||
# KSQL_KSQL_SCHEMA_REGISTRY_URL: "http://schema-registry:8081"
|
|
||||||
# KSQL_PRODUCER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringProducerInterceptor"
|
|
||||||
# KSQL_CONSUMER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor"
|
|
||||||
# KSQL_KSQL_CONNECT_URL: "http://connect:8083"
|
|
||||||
# KSQL_KSQL_LOGGING_PROCESSING_TOPIC_REPLICATION_FACTOR: 1
|
|
||||||
# KSQL_KSQL_LOGGING_PROCESSING_TOPIC_AUTO_CREATE: 'true'
|
|
||||||
# KSQL_KSQL_LOGGING_PROCESSING_STREAM_AUTO_CREATE: 'true'
|
|
||||||
|
|
||||||
# ksqldb-cli:
|
|
||||||
# image: confluentinc/cp-ksqldb-cli:${CONF_VER}
|
|
||||||
# platform: linux/amd64
|
|
||||||
# container_name: ksqldb-cli
|
|
||||||
# depends_on:
|
|
||||||
# - broker
|
|
||||||
# - connect
|
|
||||||
# - ksqldb-server
|
|
||||||
# entrypoint: /bin/sh
|
|
||||||
# tty: true
|
|
||||||
|
|
||||||
|
|
||||||
# elasticsearch:
|
|
||||||
# image: docker.elastic.co/elasticsearch/elasticsearch:${ELST_VER}
|
|
||||||
# platform: linux/amd64
|
|
||||||
# container_name: elasticsearch
|
|
||||||
# ports:
|
|
||||||
# - 9200:9200
|
|
||||||
# environment:
|
|
||||||
# xpack.security.enabled: "false"
|
|
||||||
# ES_JAVA_OPTS: "-Xms1g -Xmx1g"
|
|
||||||
# discovery.type: "single-node"
|
|
||||||
|
|
||||||
|
|
||||||
# kibana:
|
|
||||||
# image: docker.elastic.co/kibana/kibana:${ELST_VER}
|
|
||||||
# platform: linux/amd64
|
|
||||||
# container_name: kibana
|
|
||||||
# hostname: kibana
|
|
||||||
# depends_on:
|
|
||||||
# - elasticsearch
|
|
||||||
# ports:
|
|
||||||
# - 5601:5601
|
|
||||||
# environment:
|
|
||||||
# xpack.security.enabled: "false"
|
|
||||||
# discovery.type: "single-node"
|
|
||||||
|
|
Plik binarny nie jest wyświetlany.
Przed Szerokość: | Wysokość: | Rozmiar: 21 KiB Po Szerokość: | Wysokość: | Rozmiar: 20 KiB |
Plik binarny nie jest wyświetlany.
Przed Szerokość: | Wysokość: | Rozmiar: 61 KiB Po Szerokość: | Wysokość: | Rozmiar: 72 KiB |
|
@ -87,6 +87,22 @@ select
|
||||||
, mastodon_text
|
, mastodon_text
|
||||||
from read_parquet('20230213/mastodon-topic/partition=0/*.parquet');
|
from read_parquet('20230213/mastodon-topic/partition=0/*.parquet');
|
||||||
|
|
||||||
|
insert into toots
|
||||||
|
select
|
||||||
|
m_id
|
||||||
|
, created_at
|
||||||
|
, created_at_str
|
||||||
|
, app
|
||||||
|
, url
|
||||||
|
, base_url
|
||||||
|
, language
|
||||||
|
, favourites
|
||||||
|
, username
|
||||||
|
, bot
|
||||||
|
, tags
|
||||||
|
, characters
|
||||||
|
, mastodon_text
|
||||||
|
from read_parquet('20230216/mastodon-topic/partition=0/*.parquet');
|
||||||
|
|
||||||
|
|
||||||
create table all_toots
|
create table all_toots
|
||||||
|
|
|
@ -1,6 +1,3 @@
|
||||||
# FROM confluentinc/cp-server-connect-base:7.3.1
|
|
||||||
FROM confluentinc/cp-server-connect:7.1.0
|
FROM confluentinc/cp-server-connect:7.1.0
|
||||||
|
|
||||||
RUN confluent-hub install --no-prompt confluentinc/kafka-connect-s3:10.3.0
|
RUN confluent-hub install --no-prompt confluentinc/kafka-connect-s3:10.3.0
|
||||||
|
|
||||||
# ENTRYPOINT ["tail", "-f", "/dev/null"]
|
|
||||||
|
|
File diff suppressed because one or more lines are too long
Ładowanie…
Reference in New Issue