From f8dc638b2ca65fbe2437538cf57fa280e3ba7794 Mon Sep 17 00:00:00 2001 From: Simon Aubury Date: Tue, 21 Feb 2023 18:04:56 +1100 Subject: [PATCH] cleanup folders --- duckdb/go.sql | 138 ----------------------------- duckdb/init.sql | 9 -- {duckdb => notebooks}/language.csv | 0 notebooks/mastodon-analysis.ipynb | 2 +- 4 files changed, 1 insertion(+), 148 deletions(-) delete mode 100644 duckdb/go.sql delete mode 100644 duckdb/init.sql rename {duckdb => notebooks}/language.csv (100%) diff --git a/duckdb/go.sql b/duckdb/go.sql deleted file mode 100644 index 053075b..0000000 --- a/duckdb/go.sql +++ /dev/null @@ -1,138 +0,0 @@ --- .read duckdb/go.sql - -/* -drop table if exists xx; - -create table xx as -select m_id -, created_at_str -, created_at, ('EPOCH'::TIMESTAMP + INTERVAL (created_at::INT) seconds)::TIMESTAMPTZ as created_tz -, app -, url -, regexp_replace(regexp_replace(url, '^http[s]://', ''), '/.*$', '') as new_url -, base_url -, language -, favourites -, username -, bot -, tags -, characters -, mastodon_text -FROM read_parquet('s3://mastodon/topics/mastodon-topic/partition=0/*'); -*/ - -select date_part('day', created_tz) as created_day -, date_part('hour', created_tz) as created_hour -, count(*) -from yy -group by 1,2 -order by 1,2 -; - --- select username, bot, count(*) from xx group by 1,2 order by 3 desc; - - as select * - - - --- old backup -create table toots -as -select m_id -, created_at -, created_at_str -, app -, url -, base_url -, language -, favourites -, username -, bot -, tags -, characters -, mastodon_text -FROM read_parquet('../xx.parquet'); - -insert into toots -select - m_id -, created_at -, created_at_str -, app -, url -, base_url -, language -, favourites -, username -, bot -, tags -, characters -, mastodon_text -from read_parquet('mastodo*.parquet'); - -insert into toots -select - m_id -, created_at -, created_at_str -, app -, url -, base_url -, language -, favourites -, username -, bot -, tags -, characters -, mastodon_text -from read_parquet('20230213/mastodon-topic/partition=0/*.parquet'); - -insert into toots -select - m_id -, created_at -, created_at_str -, app -, url -, base_url -, language -, favourites -, username -, bot -, tags -, characters -, mastodon_text -from read_parquet('20230220/mastodon-topic/partition=0/*.parquet'); - - -create table all_toots -as -select - m_id -, created_at -, app -, url -, base_url -, language -, favourites -, username -, bot -, tags -, characters -, mastodon_text -from toots -group by - m_id -, created_at -, app -, url -, base_url -, language -, favourites -, username -, bot -, tags -, characters -, mastodon_text; - -COPY all_toots TO 'all_toots.parquet' (FORMAT PARQUET); \ No newline at end of file diff --git a/duckdb/init.sql b/duckdb/init.sql deleted file mode 100644 index 1d4867a..0000000 --- a/duckdb/init.sql +++ /dev/null @@ -1,9 +0,0 @@ -install 'httpfs'; -load 'httpfs'; - -set s3_endpoint='localhost:9000'; -set s3_access_key_id='minio'; -set s3_secret_access_key='minio123'; -set s3_use_ssl=false; -set s3_region='us-east-1'; -set s3_url_style='path'; diff --git a/duckdb/language.csv b/notebooks/language.csv similarity index 100% rename from duckdb/language.csv rename to notebooks/language.csv diff --git a/notebooks/mastodon-analysis.ipynb b/notebooks/mastodon-analysis.ipynb index 7305230..39761aa 100644 --- a/notebooks/mastodon-analysis.ipynb +++ b/notebooks/mastodon-analysis.ipynb @@ -117,7 +117,7 @@ "\n", "insert into language\n", "select *\n", - "from read_csv('../duckdb/language.csv', AUTO_DETECT=TRUE, header=True);\n", + "from read_csv('./language.csv', AUTO_DETECT=TRUE, header=True);\n", "\n", "create table mastodon_toot_raw as\n", "select m_id\n",