kopia lustrzana https://github.com/harvard-lil/archive.social
commit
f8c0cd67c2
62
README.md
62
README.md
|
@ -1,11 +1,18 @@
|
||||||
# archive.social
|
# archive.social 📚
|
||||||
> 🚧 Work In Progress
|
|
||||||
|
High-fidelity capture of Twitter threads as sealed PDFs - [archive.social](https://archive.social).
|
||||||
|
|
||||||
|
[Archive.social](https://archive.social) is an experiment of the [Harvard Library Innovation Lab](https://lil.law.harvard.edu).
|
||||||
|
|
||||||
|
> 🚧 Experimental / Prototype. Early release to be consolidated.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Summary
|
## Summary
|
||||||
- [Dependencies](#dependencies)
|
- [Dependencies](#dependencies)
|
||||||
- [Local development](#local-development)
|
- [Local development](#local-development)
|
||||||
|
- [Dev CLI](#dev-cli)
|
||||||
|
- [Code docs](/docs)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
@ -13,7 +20,7 @@
|
||||||
|
|
||||||
### Runtimes
|
### Runtimes
|
||||||
- [Node.js](https://nodejs.org/) 18+
|
- [Node.js](https://nodejs.org/) 18+
|
||||||
- [Python](https://www.python.org/) 3.9+.
|
- [Python](https://www.python.org/) 3.9+
|
||||||
|
|
||||||
### Browsers
|
### Browsers
|
||||||
- Google Chrome _(`npx playwright install --force chrome` may be used)_.
|
- Google Chrome _(`npx playwright install --force chrome` may be used)_.
|
||||||
|
@ -21,12 +28,13 @@
|
||||||
### Python dependencies
|
### Python dependencies
|
||||||
- ⚠️ For now: Python dependencies are installed at machine level, as a post-install step of `npm install`.
|
- ⚠️ For now: Python dependencies are installed at machine level, as a post-install step of `npm install`.
|
||||||
|
|
||||||
### Known Debian / Ubuntu packages
|
### Known Ubuntu packages
|
||||||
```
|
```
|
||||||
curl bash gcc g++ python3 python3-pip python3-dev zlib1g zlib1g-dev libjpeg-dev libssl-dev libffi-dev ghostscript poppler-utils
|
curl bash gcc g++ python3 python3-pip python3-dev zlib1g zlib1g-dev libjpeg-dev libssl-dev libffi-dev ghostscript poppler-utils
|
||||||
```
|
```
|
||||||
|
|
||||||
Node may be sourced from [Nodesource](https://github.com/nodesource/distributions/blob/master/README.md#installation-instructions).
|
- ⚠️ On Linux, this project is only compatible with Ubuntu at the time, because it uses Playwright + Chrome.
|
||||||
|
- Node may be sourced from [Nodesource](https://github.com/nodesource/distributions/blob/master/README.md#installation-instructions).
|
||||||
|
|
||||||
### For development on Mac OS
|
### For development on Mac OS
|
||||||
A `brewfile` is available. Run `brew bundle` to install machine-level dependencies that can be provided by [homebrew](https://brew.sh/).
|
A `brewfile` is available. Run `brew bundle` to install machine-level dependencies that can be provided by [homebrew](https://brew.sh/).
|
||||||
|
@ -37,12 +45,48 @@ A `brewfile` is available. Run `brew bundle` to install machine-level dependenci
|
||||||
|
|
||||||
## Local development
|
## Local development
|
||||||
|
|
||||||
> 🚧 WIP
|
Run the following commands to initialize the project and start the development server.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
brew bundle # (Mac OS only) - See Linux dependencies above.
|
brew bundle # (Mac OS only) - See Linux dependencies above.
|
||||||
npm install
|
npm install # To install npm packages
|
||||||
npx playwright install chrome
|
npx playwright install chrome # To ensure Playwright has a version of Chrome to talk to
|
||||||
npm run generate-local-cert # Will generate a certificate for self-signing PDFs
|
npm run generate-dev-cert # Will generate a certificate for self-signing PDFs. For testing purposes only.
|
||||||
|
npm run dev # Starts the development server on port 3000
|
||||||
|
```
|
||||||
|
|
||||||
|
[☝️ Back to summary](#summary)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Dev CLI
|
||||||
|
|
||||||
|
### start
|
||||||
|
```bash
|
||||||
|
npm run start
|
||||||
|
```
|
||||||
|
|
||||||
|
Starts the app's server on port 3000 with warning-level logs.
|
||||||
|
|
||||||
|
### dev
|
||||||
|
```bash
|
||||||
npm run dev
|
npm run dev
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Starts the app's server on port 3000 with info-level logs. Watches for file changes.
|
||||||
|
|
||||||
|
### generate-dev-cert
|
||||||
|
```bash
|
||||||
|
npm run generate-dev-cert
|
||||||
|
```
|
||||||
|
|
||||||
|
Generate a `certs/cert.pem` and `certs/key.pem` for local development purposes.
|
||||||
|
|
||||||
|
### docgen
|
||||||
|
```bash
|
||||||
|
npm run docgen
|
||||||
|
```
|
||||||
|
|
||||||
|
Generates JSDoc-based code documentation under `/docs`.
|
||||||
|
|
||||||
|
[☝️ Back to summary](#summary)
|
||||||
|
|
10
app/const.js
10
app/const.js
|
@ -13,6 +13,7 @@ export const CERTS_PATH = process.env.CERTS_PATH ? process.env.CERTS_PATH : `${p
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Path to the "data" folder.
|
* Path to the "data" folder.
|
||||||
|
* @constant
|
||||||
*/
|
*/
|
||||||
export const DATA_PATH = process.env.DATA_PATH ? process.env.DATA_PATH : `${process.env.PWD}/app/data/`;
|
export const DATA_PATH = process.env.DATA_PATH ? process.env.DATA_PATH : `${process.env.PWD}/app/data/`;
|
||||||
|
|
||||||
|
@ -24,25 +25,30 @@ export const TMP_PATH = `${process.env.PWD}/app/tmp/`;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Path to the "templates" folder.
|
* Path to the "templates" folder.
|
||||||
|
* @constant
|
||||||
*/
|
*/
|
||||||
export const TEMPLATES_PATH = `${process.env.PWD}/app/templates/`;
|
export const TEMPLATES_PATH = `${process.env.PWD}/app/templates/`;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Path to the "executables" folder.
|
* Path to the "executables" folder, for dependencies that are meant to be executed directly, such as `yt-dlp`.
|
||||||
|
* @constant
|
||||||
*/
|
*/
|
||||||
export const EXECUTABLES_FOLDER = `${process.env.PWD}/executables/`;
|
export const EXECUTABLES_FOLDER = `${process.env.PWD}/executables/`;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Path to the "static" folder.
|
* Path to the "static" folder.
|
||||||
|
* @constant
|
||||||
*/
|
*/
|
||||||
export const STATIC_PATH = `${process.env.PWD}/app/static/`;
|
export const STATIC_PATH = `${process.env.PWD}/app/static/`;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maximum capture processes that can be run in parallel.
|
* Maximum capture processes that can be run in parallel.
|
||||||
|
* @constant
|
||||||
*/
|
*/
|
||||||
export const MAX_PARALLEL_CAPTURES_TOTAL = 200;
|
export const MAX_PARALLEL_CAPTURES_TOTAL = 200;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maximum capture processes that can be run in parallel for a given key.
|
* Maximum capture processes that can be run in parallel for a given key.
|
||||||
|
* @constant
|
||||||
*/
|
*/
|
||||||
export const MAX_PARALLEL_CAPTURES_PER_ACCESS_KEY = 20;
|
export const MAX_PARALLEL_CAPTURES_PER_ACCESS_KEY = 20;
|
339
app/server.js
339
app/server.js
|
@ -27,10 +27,10 @@ const successLog = new SuccessLog();
|
||||||
const accessKeys = new AccessKeys();
|
const accessKeys = new AccessKeys();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Keeps track of how many capture processes are currently running.
|
* Keeps track of how many capture processes are currently running.
|
||||||
* May be used to redirect users if over capacity.
|
* May be used to redirect users if over capacity.
|
||||||
*
|
*
|
||||||
* [!] This needs to be upgraded to proper rate limiting after launch.
|
* [!] Only good for early prototyping.
|
||||||
*
|
*
|
||||||
* @type {{
|
* @type {{
|
||||||
* currentTotal: number,
|
* currentTotal: number,
|
||||||
|
@ -46,170 +46,193 @@ const CAPTURES_WATCH = {
|
||||||
maxPerAccessKey: MAX_PARALLEL_CAPTURES_PER_ACCESS_KEY,
|
maxPerAccessKey: MAX_PARALLEL_CAPTURES_PER_ACCESS_KEY,
|
||||||
}
|
}
|
||||||
|
|
||||||
export default async function (fastify, opts) {
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* [GET] /
|
||||||
|
* Shows the landing page / form.
|
||||||
|
* Assumes `fastify` is in scope.
|
||||||
|
*
|
||||||
|
* @param {fastify.FastifyRequest} request
|
||||||
|
* @param {fastify.FastifyReply} reply
|
||||||
|
* @returns {Promise<fastify.FastifyReply>}
|
||||||
|
*/
|
||||||
|
async function index(request, reply) {
|
||||||
|
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`);
|
||||||
|
|
||||||
|
return reply
|
||||||
|
.code(200)
|
||||||
|
.header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
.send(html);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* [POST] `/`
|
||||||
|
* Processes a request to capture a `twitter.com` url.
|
||||||
|
* Serves PDF bytes directly if operation is successful.
|
||||||
|
* Returns to form with specific error code, passed as `errorReason`, otherwise.
|
||||||
|
* Assumes `fastify` is in scope.
|
||||||
|
*
|
||||||
|
* @param {fastify.FastifyRequest} request
|
||||||
|
* @param {fastify.FastifyReply} reply
|
||||||
|
* @returns {Promise<fastify.FastifyReply>}
|
||||||
|
*/
|
||||||
|
async function capture(request, reply) {
|
||||||
|
const data = request.body;
|
||||||
|
const accessKey = data["access-key"];
|
||||||
|
|
||||||
|
request.log.info(`Capture capacity: ${CAPTURES_WATCH.currentTotal} / ${CAPTURES_WATCH.maxTotal}.`);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Check access key
|
||||||
|
//
|
||||||
|
if (!accessKeys.check(accessKey)) {
|
||||||
|
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
|
||||||
|
error: true,
|
||||||
|
errorReason: "ACCESS-KEY"
|
||||||
|
});
|
||||||
|
|
||||||
|
return reply
|
||||||
|
.code(401)
|
||||||
|
.header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
.send(html);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Check url
|
||||||
|
//
|
||||||
|
try {
|
||||||
|
const url = new URL(data.url);
|
||||||
|
assert(url.origin === "https://twitter.com");
|
||||||
|
}
|
||||||
|
catch(err) {
|
||||||
|
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
|
||||||
|
error: true,
|
||||||
|
errorReason: "URL"
|
||||||
|
});
|
||||||
|
|
||||||
|
return reply
|
||||||
|
.code(400)
|
||||||
|
.header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
.send(html);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Check that there is still capture capacity (total)
|
||||||
|
//
|
||||||
|
if (CAPTURES_WATCH.currentTotal >= CAPTURES_WATCH.maxTotal) {
|
||||||
|
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
|
||||||
|
error: true,
|
||||||
|
errorReason: "TOO-MANY-CAPTURES-TOTAL"
|
||||||
|
});
|
||||||
|
|
||||||
|
return reply
|
||||||
|
.code(503)
|
||||||
|
.header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
.send(html);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Check that there is still capture capacity (for this access key)
|
||||||
|
//
|
||||||
|
if (CAPTURES_WATCH.currentByAccessKey[accessKey] >= CAPTURES_WATCH.maxPerAccessKey) {
|
||||||
|
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
|
||||||
|
error: true,
|
||||||
|
errorReason: "TOO-MANY-CAPTURES-USER"
|
||||||
|
});
|
||||||
|
|
||||||
|
return reply
|
||||||
|
.code(429)
|
||||||
|
.header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
.send(html);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Process capture request
|
||||||
|
//
|
||||||
|
try {
|
||||||
|
CAPTURES_WATCH.currentTotal += 1;
|
||||||
|
|
||||||
|
if (accessKey in CAPTURES_WATCH.currentByAccessKey) {
|
||||||
|
CAPTURES_WATCH.currentByAccessKey[accessKey] += 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
CAPTURES_WATCH.currentByAccessKey[accessKey] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const tweets = new TwitterCapture(data.url, {runBrowserBehaviors: "auto-scroll" in data});
|
||||||
|
const pdf = await tweets.capture();
|
||||||
|
|
||||||
|
successLog.add(accessKey, pdf);
|
||||||
|
|
||||||
|
return reply
|
||||||
|
.code(200)
|
||||||
|
.header('Content-Type', 'application/pdf')
|
||||||
|
.header('Content-Disposition', 'attachment; filename="capture.pdf"')
|
||||||
|
.send(pdf);
|
||||||
|
}
|
||||||
|
catch(err) {
|
||||||
|
request.log.error(`Capture failed. ${err}`);
|
||||||
|
|
||||||
|
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
|
||||||
|
error: true,
|
||||||
|
errorReason: "CAPTURE-ISSUE"
|
||||||
|
});
|
||||||
|
|
||||||
|
return reply
|
||||||
|
.code(500)
|
||||||
|
.header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
.send(html);
|
||||||
|
}
|
||||||
|
// In any case: we need to decrease CAPTURES_WATCH counts.
|
||||||
|
finally {
|
||||||
|
CAPTURES_WATCH.currentTotal -= 1;
|
||||||
|
|
||||||
|
if (accessKey && accessKey in CAPTURES_WATCH.currentByAccessKey) {
|
||||||
|
CAPTURES_WATCH.currentByAccessKey[data["access-key"]] -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* [GET] `/api/v1/hashes/check/<sha512-hash>`.
|
||||||
|
* Checks if a given SHA512 hash is in the "success" logs, meaning this app created it.
|
||||||
|
* Hash is passed as the last parameter, url encoded.
|
||||||
|
* Assumes `fastify` is in scope.
|
||||||
|
*
|
||||||
|
* Returns HTTP 200 if found, HTTP 404 if not.
|
||||||
|
*
|
||||||
|
* @param {fastify.FastifyRequest} request
|
||||||
|
* @param {fastify.FastifyReply} reply
|
||||||
|
* @returns {Promise<fastify.FastifyReply>}
|
||||||
|
*/
|
||||||
|
async function checkHash(request, reply) {
|
||||||
|
let found = false;
|
||||||
|
const { hash } = request.params;
|
||||||
|
|
||||||
|
if (hash.length === 95 || hash.length === 88) {
|
||||||
|
found = successLog.findHashInLogs(hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
return reply.code(found ? 200 : 404).send();
|
||||||
|
}
|
||||||
|
|
||||||
|
export default async function (fastify, opts) {
|
||||||
// Adds support for `application/x-www-form-urlencoded`
|
// Adds support for `application/x-www-form-urlencoded`
|
||||||
fastify.register(import('@fastify/formbody'));
|
fastify.register(import('@fastify/formbody'));
|
||||||
|
|
||||||
// Serves files from STATIC_PATH
|
// Serves files from `STATIC_PATH`
|
||||||
fastify.register(import('@fastify/static'), {
|
fastify.register(import('@fastify/static'), {
|
||||||
root: STATIC_PATH,
|
root: STATIC_PATH,
|
||||||
prefix: '/static/',
|
prefix: '/static/',
|
||||||
});
|
});
|
||||||
|
|
||||||
/**
|
// [GET] /
|
||||||
* [GET] /
|
fastify.get('/', index);
|
||||||
* Shows the landing page / form.
|
|
||||||
*/
|
|
||||||
fastify.get('/', async (request, reply) => {
|
|
||||||
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`);
|
|
||||||
|
|
||||||
return reply
|
// [POST] /
|
||||||
.code(200)
|
fastify.post('/', capture);
|
||||||
.header('Content-Type', 'text/html; charset=utf-8')
|
|
||||||
.send(html);
|
|
||||||
});
|
|
||||||
|
|
||||||
/**
|
// [GET] /api/v1/hashes/check/:hash
|
||||||
* [POST] /
|
fastify.get('/api/v1/hashes/check/:hash', checkHash);
|
||||||
* Processes a request to capture a twitter url.
|
|
||||||
* Serves PDF bytes directly if operation is successful.
|
|
||||||
* Returns to form with specific error code, passed as `errorReason`, otherwise.
|
|
||||||
*/
|
|
||||||
fastify.post('/', async (request, reply) => {
|
|
||||||
const data = request.body;
|
|
||||||
const accessKey = data["access-key"];
|
|
||||||
|
|
||||||
request.log.info(`Capture capacity: ${CAPTURES_WATCH.currentTotal} / ${CAPTURES_WATCH.maxTotal}.`);
|
|
||||||
|
|
||||||
//
|
|
||||||
// Check access key
|
|
||||||
//
|
|
||||||
if (!accessKeys.check(accessKey)) {
|
|
||||||
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
|
|
||||||
error: true,
|
|
||||||
errorReason: "ACCESS-KEY"
|
|
||||||
});
|
|
||||||
|
|
||||||
return reply
|
|
||||||
.code(401)
|
|
||||||
.header('Content-Type', 'text/html; charset=utf-8')
|
|
||||||
.send(html);
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Check url
|
|
||||||
//
|
|
||||||
try {
|
|
||||||
const url = new URL(data.url);
|
|
||||||
assert(url.origin === "https://twitter.com");
|
|
||||||
}
|
|
||||||
catch(err) {
|
|
||||||
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
|
|
||||||
error: true,
|
|
||||||
errorReason: "URL"
|
|
||||||
});
|
|
||||||
|
|
||||||
return reply
|
|
||||||
.code(400)
|
|
||||||
.header('Content-Type', 'text/html; charset=utf-8')
|
|
||||||
.send(html);
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Check that there is still capture capacity (total)
|
|
||||||
//
|
|
||||||
if (CAPTURES_WATCH.currentTotal >= CAPTURES_WATCH.maxTotal) {
|
|
||||||
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
|
|
||||||
error: true,
|
|
||||||
errorReason: "TOO-MANY-CAPTURES-TOTAL"
|
|
||||||
});
|
|
||||||
|
|
||||||
return reply
|
|
||||||
.code(503)
|
|
||||||
.header('Content-Type', 'text/html; charset=utf-8')
|
|
||||||
.send(html);
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Check that there is still capture capacity (for this access key)
|
|
||||||
//
|
|
||||||
if (CAPTURES_WATCH.currentByAccessKey[accessKey] >= CAPTURES_WATCH.maxPerAccessKey) {
|
|
||||||
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
|
|
||||||
error: true,
|
|
||||||
errorReason: "TOO-MANY-CAPTURES-USER"
|
|
||||||
});
|
|
||||||
|
|
||||||
return reply
|
|
||||||
.code(429)
|
|
||||||
.header('Content-Type', 'text/html; charset=utf-8')
|
|
||||||
.send(html);
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Process capture request
|
|
||||||
//
|
|
||||||
try {
|
|
||||||
CAPTURES_WATCH.currentTotal += 1;
|
|
||||||
|
|
||||||
if (accessKey in CAPTURES_WATCH.currentByAccessKey) {
|
|
||||||
CAPTURES_WATCH.currentByAccessKey[accessKey] += 1;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
CAPTURES_WATCH.currentByAccessKey[accessKey] = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const tweets = new TwitterCapture(data.url, {runBrowserBehaviors: "auto-scroll" in data});
|
|
||||||
const pdf = await tweets.capture();
|
|
||||||
|
|
||||||
successLog.add(accessKey, pdf);
|
|
||||||
|
|
||||||
return reply
|
|
||||||
.code(200)
|
|
||||||
.header('Content-Type', 'application/pdf')
|
|
||||||
.header('Content-Disposition', 'attachment; filename="capture.pdf"')
|
|
||||||
.send(pdf);
|
|
||||||
}
|
|
||||||
catch(err) {
|
|
||||||
request.log.error(`Capture failed. ${err}`);
|
|
||||||
|
|
||||||
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
|
|
||||||
error: true,
|
|
||||||
errorReason: "CAPTURE-ISSUE"
|
|
||||||
});
|
|
||||||
|
|
||||||
return reply
|
|
||||||
.code(500)
|
|
||||||
.header('Content-Type', 'text/html; charset=utf-8')
|
|
||||||
.send(html);
|
|
||||||
}
|
|
||||||
// In any case: we need to decrease CAPTURES_WATCH counts.
|
|
||||||
finally {
|
|
||||||
CAPTURES_WATCH.currentTotal -= 1;
|
|
||||||
|
|
||||||
if (accessKey && accessKey in CAPTURES_WATCH.currentByAccessKey) {
|
|
||||||
CAPTURES_WATCH.currentByAccessKey[data["access-key"]] -= 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* [GET] /api/v1/hashes/check/<sha512-hash>
|
|
||||||
* Checks if a given SHA512 hash is in the "success" logs, meaning this app created it.
|
|
||||||
* Hash is passed as the last parameter, url encoded.
|
|
||||||
*
|
|
||||||
* Returns HTTP 200 if found, HTTP 404 if not.
|
|
||||||
*/
|
|
||||||
fastify.get('/api/v1/hashes/check/:hash', async (request, reply) => {
|
|
||||||
let found = false;
|
|
||||||
const { hash } = request.params;
|
|
||||||
|
|
||||||
if (hash.length === 95 || hash.length === 88) {
|
|
||||||
found = successLog.findHashInLogs(hash);
|
|
||||||
}
|
|
||||||
|
|
||||||
return reply.code(found ? 200 : 404).send();
|
|
||||||
});
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -124,7 +124,7 @@
|
||||||
|
|
||||||
<h2>Who can use it?</h2>
|
<h2>Who can use it?</h2>
|
||||||
|
|
||||||
<p>To use our website <a href="https://docs.google.com/forms/d/11pVfBReAHmHGmtzKrQ4XqrvOMBr4BI4bX-hDdsn0OuQ/viewform">you'll need to contact us</a> for an API key. We're currently only able to share a limited number with people like journalists, internet scholars, and archivists. But you can also use our open source software (coming soon!) to stand up an archive server of your own, and share it with your friends.</p>
|
<p>To use our website <a href="https://docs.google.com/forms/d/11pVfBReAHmHGmtzKrQ4XqrvOMBr4BI4bX-hDdsn0OuQ/viewform">you'll need to contact us</a> for an API key. We're currently only able to share a limited number with people like journalists, internet scholars, and archivists. But you can also use <a href="https://github.com/harvard-lil/archive.social">our open source software</a> to stand up an archive server of your own, and share it with your friends.</p>
|
||||||
|
|
||||||
<h2>Why make a PDF archiving tool for Twitter?</h2>
|
<h2>Why make a PDF archiving tool for Twitter?</h2>
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@ import { DATA_PATH } from "../const.js";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility class for handling access keys to the app.
|
* Utility class for handling access keys to the app.
|
||||||
* [!] Needs replacement.
|
* [!] For alpha launch only.
|
||||||
*/
|
*/
|
||||||
export class AccessKeys {
|
export class AccessKeys {
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/**
|
/**
|
||||||
* archive.social
|
* archive.social
|
||||||
* @module utils.logCaptureSuccess
|
* @module utils.SuccessLog
|
||||||
* @author The Harvard Library Innovation Lab
|
* @author The Harvard Library Innovation Lab
|
||||||
* @license MIT
|
* @license MIT
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -50,8 +50,8 @@ export class TwitterCapture {
|
||||||
renderTimeout: 4000,
|
renderTimeout: 4000,
|
||||||
};
|
};
|
||||||
|
|
||||||
/** @type {object} - Based on TwitterCapture.defaults */
|
/** @type {object} */
|
||||||
options = {};
|
options = {}; // Based on TwitterCapture.defaults
|
||||||
|
|
||||||
/** @type {?string} */
|
/** @type {?string} */
|
||||||
url = null;
|
url = null;
|
||||||
|
@ -61,9 +61,9 @@ export class TwitterCapture {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @type {{
|
* @type {{
|
||||||
* browser: ?import('playwright').Browser,
|
* browser: playwright.Browser,
|
||||||
* context: ?import('playwright').BrowserContext,
|
* context: playwright.BrowserContext,
|
||||||
* page: ?import('playwright').Page,
|
* page: playwright.Page,
|
||||||
* viewport: ?{width: number, height: number},
|
* viewport: ?{width: number, height: number},
|
||||||
* ready: boolean
|
* ready: boolean
|
||||||
* }}
|
* }}
|
||||||
|
@ -219,7 +219,7 @@ export class TwitterCapture {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adjusts the current page's DOM so the resulting PDF is not affected by UI artifact.
|
* Adjusts the current page's DOM so the resulting PDF is not affected by UI artifact.
|
||||||
* Playwright needs to be ready.
|
* Playwright needs to be ready.
|
||||||
*
|
*
|
||||||
* @returns {Promise<void>}
|
* @returns {Promise<void>}
|
||||||
|
@ -366,7 +366,7 @@ export class TwitterCapture {
|
||||||
* Uses Playwright's network interception to capture images and add them to `this.interceptedJPEGs`.
|
* Uses Playwright's network interception to capture images and add them to `this.interceptedJPEGs`.
|
||||||
* Called whenever Playwright processes an HTTP response.
|
* Called whenever Playwright processes an HTTP response.
|
||||||
*
|
*
|
||||||
* @param {import('playwright').Response} response
|
* @param {playwright.Response} response
|
||||||
* @returns {Promise<void>}
|
* @returns {Promise<void>}
|
||||||
*/
|
*/
|
||||||
interceptJpegs = async(response) => {
|
interceptJpegs = async(response) => {
|
||||||
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
<a name="const.module_js"></a>
|
||||||
|
|
||||||
|
## js
|
||||||
|
archive.social
|
||||||
|
|
||||||
|
**Author**: The Harvard Library Innovation Lab
|
||||||
|
**License**: MIT
|
||||||
|
|
||||||
|
* [js](#const.module_js)
|
||||||
|
* [.CERTS_PATH](#const.module_js.CERTS_PATH)
|
||||||
|
* [.DATA_PATH](#const.module_js.DATA_PATH)
|
||||||
|
* [.TMP_PATH](#const.module_js.TMP_PATH)
|
||||||
|
* [.TEMPLATES_PATH](#const.module_js.TEMPLATES_PATH)
|
||||||
|
* [.EXECUTABLES_FOLDER](#const.module_js.EXECUTABLES_FOLDER)
|
||||||
|
* [.STATIC_PATH](#const.module_js.STATIC_PATH)
|
||||||
|
* [.MAX_PARALLEL_CAPTURES_TOTAL](#const.module_js.MAX_PARALLEL_CAPTURES_TOTAL)
|
||||||
|
* [.MAX_PARALLEL_CAPTURES_PER_ACCESS_KEY](#const.module_js.MAX_PARALLEL_CAPTURES_PER_ACCESS_KEY)
|
||||||
|
|
||||||
|
<a name="const.module_js.CERTS_PATH"></a>
|
||||||
|
|
||||||
|
### js.CERTS\_PATH
|
||||||
|
Path to the folder holding the certificates used for signing the PDFs.
|
||||||
|
|
||||||
|
**Kind**: static constant of [<code>js</code>](#const.module_js)
|
||||||
|
<a name="const.module_js.DATA_PATH"></a>
|
||||||
|
|
||||||
|
### js.DATA\_PATH
|
||||||
|
Path to the "data" folder.
|
||||||
|
|
||||||
|
**Kind**: static constant of [<code>js</code>](#const.module_js)
|
||||||
|
<a name="const.module_js.TMP_PATH"></a>
|
||||||
|
|
||||||
|
### js.TMP\_PATH
|
||||||
|
Path to the folder in which temporary files will be written by the app.
|
||||||
|
|
||||||
|
**Kind**: static constant of [<code>js</code>](#const.module_js)
|
||||||
|
<a name="const.module_js.TEMPLATES_PATH"></a>
|
||||||
|
|
||||||
|
### js.TEMPLATES\_PATH
|
||||||
|
Path to the "templates" folder.
|
||||||
|
|
||||||
|
**Kind**: static constant of [<code>js</code>](#const.module_js)
|
||||||
|
<a name="const.module_js.EXECUTABLES_FOLDER"></a>
|
||||||
|
|
||||||
|
### js.EXECUTABLES\_FOLDER
|
||||||
|
Path to the "executables" folder, for dependencies that are meant to be executed directly, such as `yt-dlp`.
|
||||||
|
|
||||||
|
**Kind**: static constant of [<code>js</code>](#const.module_js)
|
||||||
|
<a name="const.module_js.STATIC_PATH"></a>
|
||||||
|
|
||||||
|
### js.STATIC\_PATH
|
||||||
|
Path to the "static" folder.
|
||||||
|
|
||||||
|
**Kind**: static constant of [<code>js</code>](#const.module_js)
|
||||||
|
<a name="const.module_js.MAX_PARALLEL_CAPTURES_TOTAL"></a>
|
||||||
|
|
||||||
|
### js.MAX\_PARALLEL\_CAPTURES\_TOTAL
|
||||||
|
Maximum capture processes that can be run in parallel.
|
||||||
|
|
||||||
|
**Kind**: static constant of [<code>js</code>](#const.module_js)
|
||||||
|
<a name="const.module_js.MAX_PARALLEL_CAPTURES_PER_ACCESS_KEY"></a>
|
||||||
|
|
||||||
|
### js.MAX\_PARALLEL\_CAPTURES\_PER\_ACCESS\_KEY
|
||||||
|
Maximum capture processes that can be run in parallel for a given key.
|
||||||
|
|
||||||
|
**Kind**: static constant of [<code>js</code>](#const.module_js)
|
|
@ -0,0 +1,80 @@
|
||||||
|
<a name="server.module_js"></a>
|
||||||
|
|
||||||
|
## js
|
||||||
|
archive.social
|
||||||
|
|
||||||
|
**Author**: The Harvard Library Innovation Lab
|
||||||
|
**License**: MIT
|
||||||
|
|
||||||
|
* [js](#server.module_js)
|
||||||
|
* [~successLog](#server.module_js..successLog) : <code>SuccessLog</code>
|
||||||
|
* [~accessKeys](#server.module_js..accessKeys) : <code>AccessKeys</code>
|
||||||
|
* [~CAPTURES_WATCH](#server.module_js..CAPTURES_WATCH) : <code>Object</code>
|
||||||
|
* [~index(request, reply)](#server.module_js..index) ⇒ <code>Promise.<fastify.FastifyReply></code>
|
||||||
|
* [~capture(request, reply)](#server.module_js..capture) ⇒ <code>Promise.<fastify.FastifyReply></code>
|
||||||
|
* [~checkHash(request, reply)](#server.module_js..checkHash) ⇒ <code>Promise.<fastify.FastifyReply></code>
|
||||||
|
|
||||||
|
<a name="server.module_js..successLog"></a>
|
||||||
|
|
||||||
|
### js~successLog : <code>SuccessLog</code>
|
||||||
|
**Kind**: inner constant of [<code>js</code>](#server.module_js)
|
||||||
|
<a name="server.module_js..accessKeys"></a>
|
||||||
|
|
||||||
|
### js~accessKeys : <code>AccessKeys</code>
|
||||||
|
**Kind**: inner constant of [<code>js</code>](#server.module_js)
|
||||||
|
<a name="server.module_js..CAPTURES_WATCH"></a>
|
||||||
|
|
||||||
|
### js~CAPTURES\_WATCH : <code>Object</code>
|
||||||
|
Keeps track of how many capture processes are currently running.
|
||||||
|
May be used to redirect users if over capacity.
|
||||||
|
|
||||||
|
[!] Only good for early prototyping.
|
||||||
|
|
||||||
|
**Kind**: inner constant of [<code>js</code>](#server.module_js)
|
||||||
|
<a name="server.module_js..index"></a>
|
||||||
|
|
||||||
|
### js~index(request, reply) ⇒ <code>Promise.<fastify.FastifyReply></code>
|
||||||
|
[GET] /
|
||||||
|
Shows the landing page / form.
|
||||||
|
Assumes `fastify` is in scope.
|
||||||
|
|
||||||
|
**Kind**: inner method of [<code>js</code>](#server.module_js)
|
||||||
|
|
||||||
|
| Param | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| request | <code>fastify.FastifyRequest</code> |
|
||||||
|
| reply | <code>fastify.FastifyReply</code> |
|
||||||
|
|
||||||
|
<a name="server.module_js..capture"></a>
|
||||||
|
|
||||||
|
### js~capture(request, reply) ⇒ <code>Promise.<fastify.FastifyReply></code>
|
||||||
|
[POST] `/`
|
||||||
|
Processes a request to capture a `twitter.com` url.
|
||||||
|
Serves PDF bytes directly if operation is successful.
|
||||||
|
Returns to form with specific error code, passed as `errorReason`, otherwise.
|
||||||
|
Assumes `fastify` is in scope.
|
||||||
|
|
||||||
|
**Kind**: inner method of [<code>js</code>](#server.module_js)
|
||||||
|
|
||||||
|
| Param | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| request | <code>fastify.FastifyRequest</code> |
|
||||||
|
| reply | <code>fastify.FastifyReply</code> |
|
||||||
|
|
||||||
|
<a name="server.module_js..checkHash"></a>
|
||||||
|
|
||||||
|
### js~checkHash(request, reply) ⇒ <code>Promise.<fastify.FastifyReply></code>
|
||||||
|
[GET] `/api/v1/hashes/check/<sha512-hash>`.
|
||||||
|
Checks if a given SHA512 hash is in the "success" logs, meaning this app created it.
|
||||||
|
Hash is passed as the last parameter, url encoded.
|
||||||
|
Assumes `fastify` is in scope.
|
||||||
|
|
||||||
|
Returns HTTP 200 if found, HTTP 404 if not.
|
||||||
|
|
||||||
|
**Kind**: inner method of [<code>js</code>](#server.module_js)
|
||||||
|
|
||||||
|
| Param | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| request | <code>fastify.FastifyRequest</code> |
|
||||||
|
| reply | <code>fastify.FastifyReply</code> |
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
<a name="utils.module_AccessKeys"></a>
|
||||||
|
|
||||||
|
## AccessKeys
|
||||||
|
archive.social
|
||||||
|
|
||||||
|
**Author**: The Harvard Library Innovation Lab
|
||||||
|
**License**: MIT
|
||||||
|
|
||||||
|
* [AccessKeys](#utils.module_AccessKeys)
|
||||||
|
* [.AccessKeys](#utils.module_AccessKeys.AccessKeys)
|
||||||
|
* [new exports.AccessKeys()](#new_utils.module_AccessKeys.AccessKeys_new)
|
||||||
|
* [.filepath](#utils.module_AccessKeys.AccessKeys+filepath) : <code>string</code>
|
||||||
|
* [.check(accessKey)](#utils.module_AccessKeys.AccessKeys+check)
|
||||||
|
|
||||||
|
<a name="utils.module_AccessKeys.AccessKeys"></a>
|
||||||
|
|
||||||
|
### AccessKeys.AccessKeys
|
||||||
|
Utility class for handling access keys to the app.
|
||||||
|
[!] For alpha launch only.
|
||||||
|
|
||||||
|
**Kind**: static class of [<code>AccessKeys</code>](#utils.module_AccessKeys)
|
||||||
|
|
||||||
|
* [.AccessKeys](#utils.module_AccessKeys.AccessKeys)
|
||||||
|
* [new exports.AccessKeys()](#new_utils.module_AccessKeys.AccessKeys_new)
|
||||||
|
* [.filepath](#utils.module_AccessKeys.AccessKeys+filepath) : <code>string</code>
|
||||||
|
* [.check(accessKey)](#utils.module_AccessKeys.AccessKeys+check)
|
||||||
|
|
||||||
|
<a name="new_utils.module_AccessKeys.AccessKeys_new"></a>
|
||||||
|
|
||||||
|
#### new exports.AccessKeys()
|
||||||
|
On init:
|
||||||
|
- Create access keys file if it doesn't exist
|
||||||
|
- Load keys from file into `this.#keys`.
|
||||||
|
|
||||||
|
<a name="utils.module_AccessKeys.AccessKeys+filepath"></a>
|
||||||
|
|
||||||
|
#### accessKeys.filepath : <code>string</code>
|
||||||
|
Complete path to `access-keys.json`.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>AccessKeys</code>](#utils.module_AccessKeys.AccessKeys)
|
||||||
|
<a name="utils.module_AccessKeys.AccessKeys+check"></a>
|
||||||
|
|
||||||
|
#### accessKeys.check(accessKey)
|
||||||
|
Checks that a given access key is valid and active.
|
||||||
|
|
||||||
|
**Kind**: instance method of [<code>AccessKeys</code>](#utils.module_AccessKeys.AccessKeys)
|
||||||
|
|
||||||
|
| Param | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| accessKey | <code>string</code> |
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
<a name="utils.module_SuccessLog"></a>
|
||||||
|
|
||||||
|
## SuccessLog
|
||||||
|
archive.social
|
||||||
|
|
||||||
|
**Author**: The Harvard Library Innovation Lab
|
||||||
|
**License**: MIT
|
||||||
|
|
||||||
|
* [SuccessLog](#utils.module_SuccessLog)
|
||||||
|
* [.SuccessLog](#utils.module_SuccessLog.SuccessLog)
|
||||||
|
* [new exports.SuccessLog()](#new_utils.module_SuccessLog.SuccessLog_new)
|
||||||
|
* [.filepath](#utils.module_SuccessLog.SuccessLog+filepath) : <code>string</code>
|
||||||
|
* [.add(accessKey, pdfBytes)](#utils.module_SuccessLog.SuccessLog+add)
|
||||||
|
* [.findHashInLogs(hash)](#utils.module_SuccessLog.SuccessLog+findHashInLogs) ⇒ <code>boolean</code>
|
||||||
|
* [.reset()](#utils.module_SuccessLog.SuccessLog+reset) ⇒ <code>void</code>
|
||||||
|
|
||||||
|
<a name="utils.module_SuccessLog.SuccessLog"></a>
|
||||||
|
|
||||||
|
### SuccessLog.SuccessLog
|
||||||
|
**Kind**: static class of [<code>SuccessLog</code>](#utils.module_SuccessLog)
|
||||||
|
|
||||||
|
* [.SuccessLog](#utils.module_SuccessLog.SuccessLog)
|
||||||
|
* [new exports.SuccessLog()](#new_utils.module_SuccessLog.SuccessLog_new)
|
||||||
|
* [.filepath](#utils.module_SuccessLog.SuccessLog+filepath) : <code>string</code>
|
||||||
|
* [.add(accessKey, pdfBytes)](#utils.module_SuccessLog.SuccessLog+add)
|
||||||
|
* [.findHashInLogs(hash)](#utils.module_SuccessLog.SuccessLog+findHashInLogs) ⇒ <code>boolean</code>
|
||||||
|
* [.reset()](#utils.module_SuccessLog.SuccessLog+reset) ⇒ <code>void</code>
|
||||||
|
|
||||||
|
<a name="new_utils.module_SuccessLog.SuccessLog_new"></a>
|
||||||
|
|
||||||
|
#### new exports.SuccessLog()
|
||||||
|
On init:
|
||||||
|
- Create log file if it doesn't exist
|
||||||
|
- Load hashes from file into `this.#hashes`.
|
||||||
|
|
||||||
|
<a name="utils.module_SuccessLog.SuccessLog+filepath"></a>
|
||||||
|
|
||||||
|
#### successLog.filepath : <code>string</code>
|
||||||
|
Complete path to `success-log.json`.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>SuccessLog</code>](#utils.module_SuccessLog.SuccessLog)
|
||||||
|
<a name="utils.module_SuccessLog.SuccessLog+add"></a>
|
||||||
|
|
||||||
|
#### successLog.add(accessKey, pdfBytes)
|
||||||
|
Calculates hash of a PDF an:
|
||||||
|
- Creates a success log entry
|
||||||
|
- Updates `this.#hashes` (so it doesn't need to reload from file)
|
||||||
|
|
||||||
|
**Kind**: instance method of [<code>SuccessLog</code>](#utils.module_SuccessLog.SuccessLog)
|
||||||
|
|
||||||
|
| Param | Type | Description |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| accessKey | <code>string</code> | |
|
||||||
|
| pdfBytes | <code>Buffer</code> | Used to store a SHA512 hash of the PDF that was delivered |
|
||||||
|
|
||||||
|
<a name="utils.module_SuccessLog.SuccessLog+findHashInLogs"></a>
|
||||||
|
|
||||||
|
#### successLog.findHashInLogs(hash) ⇒ <code>boolean</code>
|
||||||
|
Checks whether or not a given hash is present in the logs.
|
||||||
|
|
||||||
|
**Kind**: instance method of [<code>SuccessLog</code>](#utils.module_SuccessLog.SuccessLog)
|
||||||
|
|
||||||
|
| Param | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| hash | <code>string</code> |
|
||||||
|
|
||||||
|
<a name="utils.module_SuccessLog.SuccessLog+reset"></a>
|
||||||
|
|
||||||
|
#### successLog.reset() ⇒ <code>void</code>
|
||||||
|
Resets `success-log.json`.
|
||||||
|
Also clears `this.#hashes`.
|
||||||
|
|
||||||
|
**Kind**: instance method of [<code>SuccessLog</code>](#utils.module_SuccessLog.SuccessLog)
|
|
@ -0,0 +1,263 @@
|
||||||
|
<a name="utils.module_TwitterCapture"></a>
|
||||||
|
|
||||||
|
## TwitterCapture
|
||||||
|
archive.social
|
||||||
|
|
||||||
|
**Author**: The Harvard Library Innovation Lab
|
||||||
|
**License**: MIT
|
||||||
|
|
||||||
|
* [TwitterCapture](#utils.module_TwitterCapture)
|
||||||
|
* [.TwitterCapture](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
* [new exports.TwitterCapture(url, options)](#new_utils.module_TwitterCapture.TwitterCapture_new)
|
||||||
|
* [.defaults](#utils.module_TwitterCapture.TwitterCapture+defaults)
|
||||||
|
* [.options](#utils.module_TwitterCapture.TwitterCapture+options) : <code>object</code>
|
||||||
|
* [.url](#utils.module_TwitterCapture.TwitterCapture+url) : <code>string</code>
|
||||||
|
* [.urlType](#utils.module_TwitterCapture.TwitterCapture+urlType) : <code>string</code>
|
||||||
|
* [.playwright](#utils.module_TwitterCapture.TwitterCapture+playwright) : <code>Object</code>
|
||||||
|
* [.interceptedJPEGs](#utils.module_TwitterCapture.TwitterCapture+interceptedJPEGs) : <code>object.<string, Buffer></code>
|
||||||
|
* [.capture](#utils.module_TwitterCapture.TwitterCapture+capture) ⇒ <code>Promise.<Buffer></code>
|
||||||
|
* [.setup](#utils.module_TwitterCapture.TwitterCapture+setup) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.teardown](#utils.module_TwitterCapture.TwitterCapture+teardown)
|
||||||
|
* [.adjustUIForCapture](#utils.module_TwitterCapture.TwitterCapture+adjustUIForCapture) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.runBrowserBehaviors](#utils.module_TwitterCapture.TwitterCapture+runBrowserBehaviors) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.resizeViewportToFitDocument](#utils.module_TwitterCapture.TwitterCapture+resizeViewportToFitDocument) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.getDocumentDimensions](#utils.module_TwitterCapture.TwitterCapture+getDocumentDimensions) ⇒ <code>Promise.<{width: number, height: number}></code>
|
||||||
|
* [.interceptJpegs](#utils.module_TwitterCapture.TwitterCapture+interceptJpegs) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.generateRawPDF](#utils.module_TwitterCapture.TwitterCapture+generateRawPDF) ⇒ <code>Promise.<Buffer></code>
|
||||||
|
* [.addInterceptedJPEGsToPDF](#utils.module_TwitterCapture.TwitterCapture+addInterceptedJPEGsToPDF) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.captureAndAddVideoToPDF](#utils.module_TwitterCapture.TwitterCapture+captureAndAddVideoToPDF) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.cropMarginsOnPDF](#utils.module_TwitterCapture.TwitterCapture+cropMarginsOnPDF)
|
||||||
|
* [.signPDF](#utils.module_TwitterCapture.TwitterCapture+signPDF) ⇒ <code>Buffer</code>
|
||||||
|
* [.filterOptions](#utils.module_TwitterCapture.TwitterCapture+filterOptions)
|
||||||
|
* [.filterUrl](#utils.module_TwitterCapture.TwitterCapture+filterUrl) ⇒ <code>bool</code>
|
||||||
|
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture"></a>
|
||||||
|
|
||||||
|
### TwitterCapture.TwitterCapture
|
||||||
|
Generates a "sealed" PDF out of a twitter.com url using Playwright.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
```
|
||||||
|
const tweet = new TwitterCapture(url);
|
||||||
|
const pdf = await tweet.capture();
|
||||||
|
fs.writeFileSync("tweet.pdf", pdf);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Kind**: static class of [<code>TwitterCapture</code>](#utils.module_TwitterCapture)
|
||||||
|
|
||||||
|
* [.TwitterCapture](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
* [new exports.TwitterCapture(url, options)](#new_utils.module_TwitterCapture.TwitterCapture_new)
|
||||||
|
* [.defaults](#utils.module_TwitterCapture.TwitterCapture+defaults)
|
||||||
|
* [.options](#utils.module_TwitterCapture.TwitterCapture+options) : <code>object</code>
|
||||||
|
* [.url](#utils.module_TwitterCapture.TwitterCapture+url) : <code>string</code>
|
||||||
|
* [.urlType](#utils.module_TwitterCapture.TwitterCapture+urlType) : <code>string</code>
|
||||||
|
* [.playwright](#utils.module_TwitterCapture.TwitterCapture+playwright) : <code>Object</code>
|
||||||
|
* [.interceptedJPEGs](#utils.module_TwitterCapture.TwitterCapture+interceptedJPEGs) : <code>object.<string, Buffer></code>
|
||||||
|
* [.capture](#utils.module_TwitterCapture.TwitterCapture+capture) ⇒ <code>Promise.<Buffer></code>
|
||||||
|
* [.setup](#utils.module_TwitterCapture.TwitterCapture+setup) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.teardown](#utils.module_TwitterCapture.TwitterCapture+teardown)
|
||||||
|
* [.adjustUIForCapture](#utils.module_TwitterCapture.TwitterCapture+adjustUIForCapture) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.runBrowserBehaviors](#utils.module_TwitterCapture.TwitterCapture+runBrowserBehaviors) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.resizeViewportToFitDocument](#utils.module_TwitterCapture.TwitterCapture+resizeViewportToFitDocument) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.getDocumentDimensions](#utils.module_TwitterCapture.TwitterCapture+getDocumentDimensions) ⇒ <code>Promise.<{width: number, height: number}></code>
|
||||||
|
* [.interceptJpegs](#utils.module_TwitterCapture.TwitterCapture+interceptJpegs) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.generateRawPDF](#utils.module_TwitterCapture.TwitterCapture+generateRawPDF) ⇒ <code>Promise.<Buffer></code>
|
||||||
|
* [.addInterceptedJPEGsToPDF](#utils.module_TwitterCapture.TwitterCapture+addInterceptedJPEGsToPDF) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.captureAndAddVideoToPDF](#utils.module_TwitterCapture.TwitterCapture+captureAndAddVideoToPDF) ⇒ <code>Promise.<void></code>
|
||||||
|
* [.cropMarginsOnPDF](#utils.module_TwitterCapture.TwitterCapture+cropMarginsOnPDF)
|
||||||
|
* [.signPDF](#utils.module_TwitterCapture.TwitterCapture+signPDF) ⇒ <code>Buffer</code>
|
||||||
|
* [.filterOptions](#utils.module_TwitterCapture.TwitterCapture+filterOptions)
|
||||||
|
* [.filterUrl](#utils.module_TwitterCapture.TwitterCapture+filterUrl) ⇒ <code>bool</code>
|
||||||
|
|
||||||
|
<a name="new_utils.module_TwitterCapture.TwitterCapture_new"></a>
|
||||||
|
|
||||||
|
#### new exports.TwitterCapture(url, options)
|
||||||
|
|
||||||
|
| Param | Type | Description |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| url | <code>string</code> | `twitter.com` url to capture. Works best on statuses and threads. |
|
||||||
|
| options | <code>object</code> | See `TwitterCapture.defaults` for detailed options. Will use defaults unless overridden. |
|
||||||
|
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+defaults"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.defaults
|
||||||
|
Defaults for options that can be passed to `TwitterCapture`.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
**Properties**
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| privateKeyPath | <code>string</code> | Path to `.pem` file containing a private key. |
|
||||||
|
| certPath | <code>string</code> | Path to a `.pem` file containing a certificate. |
|
||||||
|
| tmpFolderPath | <code>string</code> | Path to a folder in which temporary file can be written. |
|
||||||
|
| ytDlpPath | <code>string</code> | Path to the `yt-dlp` executable. |
|
||||||
|
| timestampServerUrl | <code>string</code> | Timestamping server. |
|
||||||
|
| networkidleTimeout | <code>number</code> | Time to wait for "networkidle" state. |
|
||||||
|
| runBrowserBehaviors | <code>boolean</code> | If `true`, will try to auto-scroll and open more responses. Set to `false` automatically when trying to capture a profile url. |
|
||||||
|
| browserBehaviorsTimeout | <code>number</code> | Maximum browser behaviors execution time. |
|
||||||
|
| videoCaptureTimeout | <code>number</code> | Maximum yt-dlp execution time. |
|
||||||
|
| renderTimeout | <code>number</code> | Time to wait for re-renders. |
|
||||||
|
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+options"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.options : <code>object</code>
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+url"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.url : <code>string</code>
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+urlType"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.urlType : <code>string</code>
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+playwright"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.playwright : <code>Object</code>
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+interceptedJPEGs"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.interceptedJPEGs : <code>object.<string, Buffer></code>
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+capture"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.capture ⇒ <code>Promise.<Buffer></code>
|
||||||
|
Captures the current Twitter.com url and makes it a signed PDF.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
**Returns**: <code>Promise.<Buffer></code> - - Signed PDF.
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+setup"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.setup ⇒ <code>Promise.<void></code>
|
||||||
|
Sets up the browser used for capture as well as network interception for images capture.
|
||||||
|
Populates `this.playwright`.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+teardown"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.teardown
|
||||||
|
Closes browser used for capture.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+adjustUIForCapture"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.adjustUIForCapture ⇒ <code>Promise.<void></code>
|
||||||
|
Adjusts the current page's DOM so the resulting PDF is not affected by UI artifact.
|
||||||
|
Playwright needs to be ready.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+runBrowserBehaviors"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.runBrowserBehaviors ⇒ <code>Promise.<void></code>
|
||||||
|
Runs browser behaviors:
|
||||||
|
- Tries to scroll through the page.
|
||||||
|
- Tries to click on the next available "Show replies" button.
|
||||||
|
|
||||||
|
Playwright needs to be ready.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+resizeViewportToFitDocument"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.resizeViewportToFitDocument ⇒ <code>Promise.<void></code>
|
||||||
|
Stretches the viewport to match the document's dimensions.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+getDocumentDimensions"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.getDocumentDimensions ⇒ <code>Promise.<{width: number, height: number}></code>
|
||||||
|
Returns the current dimensions of the document.
|
||||||
|
Playwright needs to be ready.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+interceptJpegs"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.interceptJpegs ⇒ <code>Promise.<void></code>
|
||||||
|
Uses Playwright's network interception to capture images and add them to `this.interceptedJPEGs`.
|
||||||
|
Called whenever Playwright processes an HTTP response.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
|
||||||
|
| Param | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| response | <code>playwright.Response</code> |
|
||||||
|
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+generateRawPDF"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.generateRawPDF ⇒ <code>Promise.<Buffer></code>
|
||||||
|
Generates a PDF of the current page using Chrome Dev Tools.
|
||||||
|
Playwright needs to be ready.
|
||||||
|
|
||||||
|
Populates `this.pdf`.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
**Returns**: <code>Promise.<Buffer></code> - - PDF Bytes
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+addInterceptedJPEGsToPDF"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.addInterceptedJPEGsToPDF ⇒ <code>Promise.<void></code>
|
||||||
|
Adds entries from `this.interceptedJPEGs`
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
|
||||||
|
| Type | Description |
|
||||||
|
| --- | --- |
|
||||||
|
| <code>PDFDocument</code> | Editable PDF object from `pdf-lib`. |
|
||||||
|
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+captureAndAddVideoToPDF"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.captureAndAddVideoToPDF ⇒ <code>Promise.<void></code>
|
||||||
|
Tries to capture main video from current Twitter url and add it as attachment to the PDF.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
|
||||||
|
| Type | Description |
|
||||||
|
| --- | --- |
|
||||||
|
| <code>PDFDocument</code> | Editable PDF object from `pdf-lib`. |
|
||||||
|
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+cropMarginsOnPDF"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.cropMarginsOnPDF
|
||||||
|
Tries to remove some of the white space at the bottom of the PDF.
|
||||||
|
[!] TODO: This is a "let's ship it" hack. We will need to find a better solution.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
|
||||||
|
| Param | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| editablePDF | <code>PDFDocument</code> |
|
||||||
|
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+signPDF"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.signPDF ⇒ <code>Buffer</code>
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
**Returns**: <code>Buffer</code> - - PDF Bytes
|
||||||
|
|
||||||
|
| Param | Type | Description |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| editedPDF | <code>Buffer</code> | PDF Bytes |
|
||||||
|
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+filterOptions"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.filterOptions
|
||||||
|
Applies some basic filtering to new option objects and fills gaps with defaults.
|
||||||
|
Replaces `this.options` after filtering.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
|
||||||
|
| Param | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| newOptions | <code>Promise.<object></code> |
|
||||||
|
|
||||||
|
<a name="utils.module_TwitterCapture.TwitterCapture+filterUrl"></a>
|
||||||
|
|
||||||
|
#### twitterCapture.filterUrl ⇒ <code>bool</code>
|
||||||
|
Filters a given URL to ensure it's a `twitter.com` one.
|
||||||
|
Also asserts it's "type": "status", "search", "profile".
|
||||||
|
|
||||||
|
Automatically populates `this.url` and `this.urlType`.
|
||||||
|
|
||||||
|
**Kind**: instance property of [<code>TwitterCapture</code>](#utils.module_TwitterCapture.TwitterCapture)
|
||||||
|
|
||||||
|
| Param | Type |
|
||||||
|
| --- | --- |
|
||||||
|
| url | <code>string</code> |
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
<a name="module_utils"></a>
|
||||||
|
|
||||||
|
## utils
|
||||||
|
archive.social
|
||||||
|
|
||||||
|
**Author**: The Harvard Library Innovation Lab
|
||||||
|
**License**: MIT
|
Plik diff jest za duży
Load Diff
|
@ -8,7 +8,8 @@
|
||||||
"start": "fastify start app/server.js -l warn",
|
"start": "fastify start app/server.js -l warn",
|
||||||
"dev": "fastify start app/server.js -l info -w",
|
"dev": "fastify start app/server.js -l info -w",
|
||||||
"postinstall": "cd scripts && bash download-yt-dlp.sh && bash pip-install.sh",
|
"postinstall": "cd scripts && bash download-yt-dlp.sh && bash pip-install.sh",
|
||||||
"generate-local-cert": "cd scripts && bash generate-local-cert.sh",
|
"generate-dev-cert": "cd scripts && bash generate-dev-cert.sh",
|
||||||
|
"docgen": "cd scripts && bash docgen.sh",
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
@ -40,6 +41,7 @@
|
||||||
"forwarded": "^0.2.0",
|
"forwarded": "^0.2.0",
|
||||||
"ieee754": "^1.2.1",
|
"ieee754": "^1.2.1",
|
||||||
"ipaddr.js": "^1.9.1",
|
"ipaddr.js": "^1.9.1",
|
||||||
|
"jsdoc-to-markdown": "^7.1.1",
|
||||||
"json-schema-traverse": "^1.0.0",
|
"json-schema-traverse": "^1.0.0",
|
||||||
"light-my-request": "^5.6.1",
|
"light-my-request": "^5.6.1",
|
||||||
"lru-cache": "^6.0.0",
|
"lru-cache": "^6.0.0",
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
# Generates documentation using JSDoc comments
|
||||||
|
jsdoc2md ../app/server.js > ../docs/server.md;
|
||||||
|
jsdoc2md ../app/const.js > ../docs/const.md;
|
||||||
|
|
||||||
|
jsdoc2md ../app/utils/index.js > ../docs/utils/index.md;
|
||||||
|
jsdoc2md ../app/utils/AccessKeys.js > ../docs/utils/AccessKeys.md;
|
||||||
|
jsdoc2md ../app/utils/SuccessLog.js > ../docs/utils/SuccessLog.md;
|
||||||
|
jsdoc2md ../app/utils/TwitterCapture.js > ../docs/utils/TwitterCapture.md;
|
|
@ -0,0 +1,3 @@
|
||||||
|
# [DEV ONLY] Generates a local key pair that can be used for signing PDFs.
|
||||||
|
# Will be saved under ../app/certs.
|
||||||
|
openssl req -x509 -newkey rsa:4096 -keyout ../certs/key.pem -out ../certs/cert.pem -days 3650 -nodes -subj /CN="archive.social DEV";
|
|
@ -1,3 +0,0 @@
|
||||||
# Generates a local key pair that can be used for signing PDFs.
|
|
||||||
# Will be saved under ../certs.
|
|
||||||
openssl req -x509 -newkey rsa:4096 -keyout ../certs/key.pem -out ../certs/cert.pem -days 3650 -nodes -subj /CN="archive.social";
|
|
|
@ -1,2 +1 @@
|
||||||
pip3 install "pyHanko[pkcs11,image-support,opentype,xmp]"==0.15.1;
|
pip3 install "pyHanko[pkcs11,image-support,opentype,xmp]"==0.15.1;
|
||||||
pip3 install pdfCropMargins==1.0.9;
|
|
Ładowanie…
Reference in New Issue