Ruby script to insert data in postgres

pull/58/head
Nilesh 2022-12-24 19:54:00 +00:00
rodzic 70f721be7b
commit db5025a31c
5 zmienionych plików z 155 dodań i 131 usunięć

3
.gitignore vendored
Wyświetl plik

@ -1,9 +1,6 @@
.DS_Store
node_modules
public/bundle.css
public/bundle.js
public/bundle.js.map
public/alltopics.json
# Developer tools' files

Wyświetl plik

@ -23,10 +23,6 @@ In conjunction with this, we're also building an online game where this is prese
Your bookmarks are saved in localStorage so be assured that no personal data is being tracked or saved on this site.
But if you'd like faster performance or to self-host this, you need a general-purpose computer (that means Linux/Windows/Mac) with Datasette (which is an exploratory tool for SQLite databases) installed. You can find [installation instructions specific to your operating system here](https://docs.datasette.io/en/stable/installation.html).
After cloning this git repository on your local machine, run `npm run start` in the top-level directory to start the datasette server and open the app in your browser.
## To contribute:
This is a Wikipedia-scale project and we could use all kind of help:
@ -34,8 +30,7 @@ This is a Wikipedia-scale project and we could use all kind of help:
- Spread word about this project among your friends, family, colleagues and online followers
- To donate funds, [visit our OpenCollective](https://opencollective.com/learnawesome)
- To report bugs, [create an issue](https://github.com/learn-awesome/learndb/issues)
- To improve our topic taxonomy (improve sub-topics / prerequisites etc), [raise a PR on our Github with changes in `db/topics.js` file](https://github.com/learn-awesome/learndb/tree/main/db)
- To improve the data about learning resources, first read [db/README.md](db/README.md) and [raise a PR on our Github with changes in `db/items.js` file](https://github.com/learn-awesome/learndb/tree/main/db)
- To improve the topic taxonomy or to add/modify the dataset, wait till we put together a public contribution workflow. You can always fork and create your own collection at any time.
- To improve design and suggest features, [start a discussion](https://github.com/learn-awesome/learndb/discussions)
- To fix technical bugs, [propose solutions on the issues](https://github.com/learn-awesome/learndb/issues)
- For anything else, [start a discussion](https://github.com/learn-awesome/learndb/discussions)

106
db/insert_in_pg.rb 100644
Wyświetl plik

@ -0,0 +1,106 @@
require 'pg'
require 'active_record'
require 'json'
class MyDB < ActiveRecord::Base
self.abstract_class = true
end
class Topic < MyDB; end
class Creator < MyDB; end
class Item < MyDB; end
class Review < MyDB; end
ActiveRecord::Base.logger = Logger.new(STDERR)
ActiveRecord::Base.establish_connection(
{ adapter: 'postgresql',
database: 'postgres',
host: 'localhost',
username: 'postgres',
password: "",
port: 6543
}
)
class String
def slugify
self.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/,'')
end
end
topics = File.readlines('topics.js')[1..-2].map{ |l|
JSON.parse(l.chomp, symbolize_names: true, object_class: OpenStruct)
}; 0
items = File.readlines('items.js')[1..-2].map{ |l|
JSON.parse(l.chomp.gsub("\\\\","\\"), symbolize_names: true, object_class: OpenStruct)
}; 0
reviews = File.readlines('reviews.js')[1..-2].map{ |l|
JSON.parse(l.chomp.gsub("\\\\","\\"), symbolize_names: true, object_class: OpenStruct)
}; 0
creators = reviews.map(&:by).uniq; 0
def insert_topic_with_parents(all_topics, topic)
if topic.parent_id && !Topic.where(name: topic.parent_id).exists?
# insert parent topic before children to satisfy foreign key constraint
insert_topic_with_parents(all_topics, all_topics.find {|t| t.name == topic.parent_id})
end
return if Topic.where(name: topic.name).exists?
Topic.create!(
name: topic.name,
hname: topic.display_name,
parent_name: topic.parent_id,
sort_index: topic.sort_index
)
end
MyDB.transaction do
creators.each do |c|
Creator.create!(
name: c.slugify,
hname: c,
description: nil,
image_url: nil,
tags: [],
links: []
)
end; 0
topics.each do |t|
insert_topic_with_parents(topics, t)
end; 0
# skip items without links
items.select { |i| i.links.present? }.each do |i|
Item.create!(
id: i.iid,
hname: i.name,
description: i.description,
image_url: i.image,
tags: i.tags.to_s.split(";"),
links: i.links.split(";"),
topics: i.topics.split(";"),
creators: i.creators.to_s.split(";"),
year: i.year,
level: i.level,
cost: i.cost,
rating: (i.rating ? (i.rating.to_f * 10).round : nil)
)
end; 0
reviews.each do |r|
Review.create!(
item_id: r.item_id,
by_creator: r.by.slugify,
rating: (r.rating ? (r.rating.to_f * 10).round : nil),
blurb: r.blurb,
url: r.url
)
end
end

Wyświetl plik

@ -1,47 +1,70 @@
PRAGMA foreign_keys=OFF;
DROP TABLE IF EXISTS reviews;
DROP TABLE IF EXISTS items;
DROP TABLE IF EXISTS topics;
DROP TABLE IF EXISTS creators;
CREATE TABLE topics (
name VARCHAR(255) PRIMARY KEY, -- url-friendly slug
display_name VARCHAR(255), -- allow null, use name
name VARCHAR(255) PRIMARY KEY, -- url-friendly unique slug
hname VARCHAR(255), -- human-readable name, allow null, use name
parent_name VARCHAR(255), -- create a hierarchy with self-reference
sort_index INTEGER, -- used for sorting children of a parent
FOREIGN KEY (parent_name) REFERENCES topics(name)
);
CREATE TABLE creators (
id VARCHAR(255) PRIMARY KEY,
name VARCHAR(1024) NOT NULL,
name VARCHAR(255) PRIMARY KEY, -- readable, but url-friendly and unique name eg: bill_gates_1
hname VARCHAR(255) NOT NULL,
description TEXT,
image_url VARCHAR(1024),
bio TEXT,
occupation VARCHAR(255),
links TEXT
tags TEXT[] NOT NULL,
links TEXT[] NOT NULL
);
CREATE TABLE items (
id VARCHAR(255) PRIMARY KEY, -- uuid
name VARCHAR(1024) NOT NULL,
description VARCHAR(4096), -- in markdown
id uuid DEFAULT gen_random_uuid() PRIMARY KEY,
hname VARCHAR(1024) NOT NULL,
description TEXT, -- in markdown
image_url VARCHAR(1024),
links TEXT NOT NULL, -- json array of {item_type, url, misc}
topics TEXT NOT NULL, -- json array of topic names
creators TEXT, -- json array of creator_ids
year varchar(32),
difficulty INTEGER,
tags TEXT[] NOT NULL, -- eg: is_oer, is_nsfw etc
links TEXT[] NOT NULL, -- json array of {item_type|url|tags}
topics TEXT[] NOT NULL, -- json array of topic names
creators TEXT[] NOT NULL, -- json array of creator_ids
year VARCHAR(32),
level INTEGER,
cost TEXT,
rating INTEGER, -- scale of 1 to 100, divide by 10 if needed
tags TEXT
rating INTEGER -- scale of 1 to 100, divide by 10 if needed
);
CREATE TABLE reviews (
item_id VARCHAR(255) NOT NULL,
by_item VARCHAR(255),
by_creator VARCHAR(255),
item_id uuid NOT NULL,
by_item uuid REFERENCES items(id),
by_creator VARCHAR(255) REFERENCES creators(name),
rating INTEGER, -- scale of 1 to 100, divide by 10 if needed
blurb TEXT,
url VARCHAR(1024),
PRIMARY KEY (item_id, by_item, by_creator),
UNIQUE (item_id, by_item, by_creator),
FOREIGN KEY (item_id) REFERENCES items(id),
FOREIGN KEY (by_item) REFERENCES items(id),
FOREIGN KEY (by_creator) REFERENCES creators(id),
CHECK(by_item IS NOT NULL OR by_creator IS NOT NULL) -- one of the two must be present. Ideally both.
)
);
-- Dump from database to JSON files
-- COPY (
-- SELECT json_agg(row_to_json(topics)) :: text
-- FROM topics
-- ) to '/Users/eshnil/code/learndb/db/topics.json';
-- COPY (
-- SELECT json_agg(row_to_json(creators)) :: text
-- FROM creators
-- ) to '/Users/eshnil/code/learndb/db/creators.json';
-- COPY (
-- SELECT json_agg(row_to_json(items)) :: text
-- FROM items
-- ) to '/Users/eshnil/code/learndb/db/items.json';
-- COPY (
-- SELECT json_agg(row_to_json(reviews)) :: text
-- FROM reviews
-- ) to '/Users/eshnil/code/learndb/db/reviews.json';

Wyświetl plik

@ -1,97 +0,0 @@
import svelte from 'rollup-plugin-svelte';
import commonjs from '@rollup/plugin-commonjs';
import resolve from '@rollup/plugin-node-resolve';
import livereload from 'rollup-plugin-livereload';
import { terser } from 'rollup-plugin-terser';
import css from 'rollup-plugin-css-only';
import autoPreprocess from 'svelte-preprocess';
import typescript from 'rollup-plugin-typescript2';
import { topics } from './db/topics.js'
import fs from 'fs';
function generateTopicJSON(){
const topics_db = topics.trimStart().trimEnd().split('\n').map(j => JSON.parse(j));
try {
fs.writeFileSync('static/alltopics.json', JSON.stringify(topics_db));
} catch (error) {
console.error(error);
}
}
const production = !process.env.ROLLUP_WATCH;
function serve() {
let server;
function toExit() {
if (server) server.kill(0);
}
return {
writeBundle() {
if (server) return;
server = require('child_process').spawn('npm', ['run', 'start'], {
stdio: ['ignore', 'inherit', 'inherit'],
shell: true
});
process.on('SIGTERM', toExit);
process.on('exit', toExit);
}
};
}
export default {
input: 'src/main.js',
output: {
sourcemap: true,
format: 'iife',
name: 'app',
file: 'static/bundle.js'
},
plugins: [
svelte({
preprocess: autoPreprocess(),
compilerOptions: {
// enable run-time checks when not in production
dev: !production
}
}),
typescript({ sourceMap: !production }),
// we'll extract any component CSS out into
// a separate file - better for performance
css({ output: 'bundle.css' }),
// If you have external dependencies installed from
// npm, you'll most likely need these plugins. In
// some cases you'll need additional configuration -
// consult the documentation for details:
// https://github.com/rollup/plugins/tree/master/packages/commonjs
resolve({
browser: true,
dedupe: ['svelte']
}),
commonjs(),
// In dev mode, call `npm run start` once
// the bundle has been generated
!production && serve(),
// generate alltopics.json
production && generateTopicJSON(),
// Watch the `public` directory and refresh the
// browser on changes when not in production
!production && livereload('static'),
// If we're building for production (npm run build
// instead of npm run dev), minify
production && terser()
],
watch: {
clearScreen: false
}
};