From db5025a31c38046e03c2251c148f469c9a79eaa0 Mon Sep 17 00:00:00 2001 From: Nilesh Date: Sat, 24 Dec 2022 19:54:00 +0000 Subject: [PATCH] Ruby script to insert data in postgres --- .gitignore | 3 -- README.md | 7 +-- db/insert_in_pg.rb | 106 +++++++++++++++++++++++++++++++++++++++++++++ db/schema.sql | 73 ++++++++++++++++++++----------- rollup.config.js | 97 ----------------------------------------- 5 files changed, 155 insertions(+), 131 deletions(-) create mode 100644 db/insert_in_pg.rb delete mode 100644 rollup.config.js diff --git a/.gitignore b/.gitignore index 48018d6..08b0245 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,6 @@ .DS_Store node_modules -public/bundle.css -public/bundle.js -public/bundle.js.map public/alltopics.json # Developer tools' files diff --git a/README.md b/README.md index bdb2854..9c940e6 100644 --- a/README.md +++ b/README.md @@ -23,10 +23,6 @@ In conjunction with this, we're also building an online game where this is prese Your bookmarks are saved in localStorage so be assured that no personal data is being tracked or saved on this site. -But if you'd like faster performance or to self-host this, you need a general-purpose computer (that means Linux/Windows/Mac) with Datasette (which is an exploratory tool for SQLite databases) installed. You can find [installation instructions specific to your operating system here](https://docs.datasette.io/en/stable/installation.html). - -After cloning this git repository on your local machine, run `npm run start` in the top-level directory to start the datasette server and open the app in your browser. - ## To contribute: This is a Wikipedia-scale project and we could use all kind of help: @@ -34,8 +30,7 @@ This is a Wikipedia-scale project and we could use all kind of help: - Spread word about this project among your friends, family, colleagues and online followers - To donate funds, [visit our OpenCollective](https://opencollective.com/learnawesome) - To report bugs, [create an issue](https://github.com/learn-awesome/learndb/issues) -- To improve our topic taxonomy (improve sub-topics / prerequisites etc), [raise a PR on our Github with changes in `db/topics.js` file](https://github.com/learn-awesome/learndb/tree/main/db) -- To improve the data about learning resources, first read [db/README.md](db/README.md) and [raise a PR on our Github with changes in `db/items.js` file](https://github.com/learn-awesome/learndb/tree/main/db) +- To improve the topic taxonomy or to add/modify the dataset, wait till we put together a public contribution workflow. You can always fork and create your own collection at any time. - To improve design and suggest features, [start a discussion](https://github.com/learn-awesome/learndb/discussions) - To fix technical bugs, [propose solutions on the issues](https://github.com/learn-awesome/learndb/issues) - For anything else, [start a discussion](https://github.com/learn-awesome/learndb/discussions) diff --git a/db/insert_in_pg.rb b/db/insert_in_pg.rb new file mode 100644 index 0000000..085855e --- /dev/null +++ b/db/insert_in_pg.rb @@ -0,0 +1,106 @@ +require 'pg' +require 'active_record' +require 'json' + +class MyDB < ActiveRecord::Base + self.abstract_class = true +end + +class Topic < MyDB; end +class Creator < MyDB; end +class Item < MyDB; end +class Review < MyDB; end + +ActiveRecord::Base.logger = Logger.new(STDERR) + +ActiveRecord::Base.establish_connection( + { adapter: 'postgresql', + database: 'postgres', + host: 'localhost', + username: 'postgres', + password: "", + port: 6543 + } +) + +class String + def slugify + self.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/,'') + end +end + +topics = File.readlines('topics.js')[1..-2].map{ |l| + JSON.parse(l.chomp, symbolize_names: true, object_class: OpenStruct) +}; 0 + +items = File.readlines('items.js')[1..-2].map{ |l| + JSON.parse(l.chomp.gsub("\\\\","\\"), symbolize_names: true, object_class: OpenStruct) +}; 0 + +reviews = File.readlines('reviews.js')[1..-2].map{ |l| + JSON.parse(l.chomp.gsub("\\\\","\\"), symbolize_names: true, object_class: OpenStruct) +}; 0 + +creators = reviews.map(&:by).uniq; 0 + + +def insert_topic_with_parents(all_topics, topic) + if topic.parent_id && !Topic.where(name: topic.parent_id).exists? + # insert parent topic before children to satisfy foreign key constraint + insert_topic_with_parents(all_topics, all_topics.find {|t| t.name == topic.parent_id}) + end + + return if Topic.where(name: topic.name).exists? + + Topic.create!( + name: topic.name, + hname: topic.display_name, + parent_name: topic.parent_id, + sort_index: topic.sort_index + ) +end + +MyDB.transaction do + creators.each do |c| + Creator.create!( + name: c.slugify, + hname: c, + description: nil, + image_url: nil, + tags: [], + links: [] + ) + end; 0 + + topics.each do |t| + insert_topic_with_parents(topics, t) + end; 0 + + # skip items without links + items.select { |i| i.links.present? }.each do |i| + Item.create!( + id: i.iid, + hname: i.name, + description: i.description, + image_url: i.image, + tags: i.tags.to_s.split(";"), + links: i.links.split(";"), + topics: i.topics.split(";"), + creators: i.creators.to_s.split(";"), + year: i.year, + level: i.level, + cost: i.cost, + rating: (i.rating ? (i.rating.to_f * 10).round : nil) + ) + end; 0 + + reviews.each do |r| + Review.create!( + item_id: r.item_id, + by_creator: r.by.slugify, + rating: (r.rating ? (r.rating.to_f * 10).round : nil), + blurb: r.blurb, + url: r.url + ) + end +end \ No newline at end of file diff --git a/db/schema.sql b/db/schema.sql index bf960da..36109dd 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -1,47 +1,70 @@ -PRAGMA foreign_keys=OFF; +DROP TABLE IF EXISTS reviews; +DROP TABLE IF EXISTS items; +DROP TABLE IF EXISTS topics; +DROP TABLE IF EXISTS creators; CREATE TABLE topics ( - name VARCHAR(255) PRIMARY KEY, -- url-friendly slug - display_name VARCHAR(255), -- allow null, use name + name VARCHAR(255) PRIMARY KEY, -- url-friendly unique slug + hname VARCHAR(255), -- human-readable name, allow null, use name parent_name VARCHAR(255), -- create a hierarchy with self-reference sort_index INTEGER, -- used for sorting children of a parent FOREIGN KEY (parent_name) REFERENCES topics(name) ); CREATE TABLE creators ( - id VARCHAR(255) PRIMARY KEY, - name VARCHAR(1024) NOT NULL, + name VARCHAR(255) PRIMARY KEY, -- readable, but url-friendly and unique name eg: bill_gates_1 + hname VARCHAR(255) NOT NULL, + description TEXT, image_url VARCHAR(1024), - bio TEXT, - occupation VARCHAR(255), - links TEXT + tags TEXT[] NOT NULL, + links TEXT[] NOT NULL ); + CREATE TABLE items ( - id VARCHAR(255) PRIMARY KEY, -- uuid - name VARCHAR(1024) NOT NULL, - description VARCHAR(4096), -- in markdown + id uuid DEFAULT gen_random_uuid() PRIMARY KEY, + hname VARCHAR(1024) NOT NULL, + description TEXT, -- in markdown image_url VARCHAR(1024), - links TEXT NOT NULL, -- json array of {item_type, url, misc} - topics TEXT NOT NULL, -- json array of topic names - creators TEXT, -- json array of creator_ids - year varchar(32), - difficulty INTEGER, + tags TEXT[] NOT NULL, -- eg: is_oer, is_nsfw etc + links TEXT[] NOT NULL, -- json array of {item_type|url|tags} + topics TEXT[] NOT NULL, -- json array of topic names + creators TEXT[] NOT NULL, -- json array of creator_ids + year VARCHAR(32), + level INTEGER, cost TEXT, - rating INTEGER, -- scale of 1 to 100, divide by 10 if needed - tags TEXT + rating INTEGER -- scale of 1 to 100, divide by 10 if needed ); CREATE TABLE reviews ( - item_id VARCHAR(255) NOT NULL, - by_item VARCHAR(255), - by_creator VARCHAR(255), + item_id uuid NOT NULL, + by_item uuid REFERENCES items(id), + by_creator VARCHAR(255) REFERENCES creators(name), rating INTEGER, -- scale of 1 to 100, divide by 10 if needed blurb TEXT, url VARCHAR(1024), - PRIMARY KEY (item_id, by_item, by_creator), + UNIQUE (item_id, by_item, by_creator), FOREIGN KEY (item_id) REFERENCES items(id), - FOREIGN KEY (by_item) REFERENCES items(id), - FOREIGN KEY (by_creator) REFERENCES creators(id), CHECK(by_item IS NOT NULL OR by_creator IS NOT NULL) -- one of the two must be present. Ideally both. -) \ No newline at end of file +); + +-- Dump from database to JSON files +-- COPY ( +-- SELECT json_agg(row_to_json(topics)) :: text +-- FROM topics +-- ) to '/Users/eshnil/code/learndb/db/topics.json'; + +-- COPY ( +-- SELECT json_agg(row_to_json(creators)) :: text +-- FROM creators +-- ) to '/Users/eshnil/code/learndb/db/creators.json'; + +-- COPY ( +-- SELECT json_agg(row_to_json(items)) :: text +-- FROM items +-- ) to '/Users/eshnil/code/learndb/db/items.json'; + +-- COPY ( +-- SELECT json_agg(row_to_json(reviews)) :: text +-- FROM reviews +-- ) to '/Users/eshnil/code/learndb/db/reviews.json'; \ No newline at end of file diff --git a/rollup.config.js b/rollup.config.js deleted file mode 100644 index 5cf2ccb..0000000 --- a/rollup.config.js +++ /dev/null @@ -1,97 +0,0 @@ -import svelte from 'rollup-plugin-svelte'; -import commonjs from '@rollup/plugin-commonjs'; -import resolve from '@rollup/plugin-node-resolve'; -import livereload from 'rollup-plugin-livereload'; -import { terser } from 'rollup-plugin-terser'; -import css from 'rollup-plugin-css-only'; - -import autoPreprocess from 'svelte-preprocess'; -import typescript from 'rollup-plugin-typescript2'; - -import { topics } from './db/topics.js' -import fs from 'fs'; - -function generateTopicJSON(){ - const topics_db = topics.trimStart().trimEnd().split('\n').map(j => JSON.parse(j)); - try { - fs.writeFileSync('static/alltopics.json', JSON.stringify(topics_db)); - } catch (error) { - console.error(error); - } - -} - -const production = !process.env.ROLLUP_WATCH; - -function serve() { - let server; - - function toExit() { - if (server) server.kill(0); - } - - return { - writeBundle() { - if (server) return; - server = require('child_process').spawn('npm', ['run', 'start'], { - stdio: ['ignore', 'inherit', 'inherit'], - shell: true - }); - - process.on('SIGTERM', toExit); - process.on('exit', toExit); - } - }; -} - -export default { - input: 'src/main.js', - output: { - sourcemap: true, - format: 'iife', - name: 'app', - file: 'static/bundle.js' - }, - plugins: [ - svelte({ - preprocess: autoPreprocess(), - compilerOptions: { - // enable run-time checks when not in production - dev: !production - } - }), - typescript({ sourceMap: !production }), - // we'll extract any component CSS out into - // a separate file - better for performance - css({ output: 'bundle.css' }), - - // If you have external dependencies installed from - // npm, you'll most likely need these plugins. In - // some cases you'll need additional configuration - - // consult the documentation for details: - // https://github.com/rollup/plugins/tree/master/packages/commonjs - resolve({ - browser: true, - dedupe: ['svelte'] - }), - commonjs(), - - // In dev mode, call `npm run start` once - // the bundle has been generated - !production && serve(), - - // generate alltopics.json - production && generateTopicJSON(), - - // Watch the `public` directory and refresh the - // browser on changes when not in production - !production && livereload('static'), - - // If we're building for production (npm run build - // instead of npm run dev), minify - production && terser() - ], - watch: { - clearScreen: false - } -};