Hybrid ip + access key system

pull/16/head
Matteo Cargnelutti 2022-11-29 15:39:08 -05:00
rodzic e0c6134119
commit 8e30fe2350
16 zmienionych plików z 393 dodań i 34 usunięć

Wyświetl plik

@ -16,6 +16,8 @@ An experiment of the [Harvard Library Innovation Lab](https://lil.law.harvard.ed
- [Local development](#local-development)
- [Dev CLI](#dev-cli)
- [Code docs](/docs)
- [Environment variables](#environment-variables)
- [Access Keys System](#access-keys-system)
---
@ -125,3 +127,42 @@ Runs the test suite. Requires test fixtures _(see `fixtures` folder)_.
[☝️ Back to summary](#summary)
---
## Environment variables
| Name | Required? | Description |
| --- | --- | --- |
| `CERTS_PATH` | No | If set, will be used as path to `.pem` files used for signing .PDF files. |
| `DATA_PATH` | No | If set, will be used as path to folder used for storing app data. |
| `REQUIRE_ACCESS_KEY` | No | If set and `"1"`, an access key will be required to make capture. |
| `MAX_PARALLEL_CAPTURES_TOTAL` | No | If set and contains an integer, determines the maximum of captures that the server can run in parallel. |
| `MAX_PARALLEL_CAPTURES_PER_IP` | No | If set and contains an integer, determines the maximum of captures that a single client can run in parallel. |
[☝️ Back to summary](#summary)
---
## Access keys system
If the `REQUIRE_ACCESS_KEY` environment variable is on, users will be required to use an access key to make captures.
Keys can be stored in a file named `access-key.json` under the _"data"_ folder.
**Example: `app/data/access-keys.json`:**
```json
{
"BB67BBC4-1F4B-4353-8E6D-9927A10F4509": true
}
```
### Create an access key to test with:
```bash
$ uuidgen
BB67BBC4-1F4B-4353-8E6D-9927A10F4509
```
[☝️ Back to summary](#summary)

Wyświetl plik

@ -8,12 +8,16 @@ import fs from "fs";
/**
* Path to the folder holding the certificates used for signing the PDFs.
* Defaults to `./certs`
* Can be replaced via the `CERTS_PATH` env variable.
* @constant
*/
export const CERTS_PATH = process.env.CERTS_PATH ? process.env.CERTS_PATH : `${process.env.PWD}/certs/`;
/**
* Path to the "data" folder.
* Defaults to `./app/data`
* Can be replaced via the `DATA_PATH` env variable.
* @constant
*/
export const DATA_PATH = process.env.DATA_PATH ? process.env.DATA_PATH : `${process.env.PWD}/app/data/`;
@ -43,16 +47,48 @@ export const EXECUTABLES_FOLDER = `${process.env.PWD}/executables/`;
export const STATIC_PATH = `${process.env.PWD}/app/static/`;
/**
* Maximum capture processes that can be run in parallel.
* If `true`, users will be required to provide an access key.
* Defaults to `false`.
* Can be replaced via the `REQUIRE_ACCESS_KEY` env variable, if set to "1".
* @constant
*/
export const MAX_PARALLEL_CAPTURES_TOTAL = 50;
export const REQUIRE_ACCESS_KEY = process.env.REQUIRE_ACCESS_KEY === "1" ? true : false;
/**
* Maximum capture processes that can be run in parallel for a IP address.
* Maximum capture processes that can be run in parallel.
* Defaults to 50.
* Can be replaced via the `MAX_PARALLEL_CAPTURES_TOTAL` env variable.
* @constant
*/
export const MAX_PARALLEL_CAPTURES_PER_IP = 2;
export const MAX_PARALLEL_CAPTURES_TOTAL = (() => {
const fromEnv = parseInt(process.env.MAX_PARALLEL_CAPTURES_TOTAL);
if (!isNaN(fromEnv) && fromEnv > 0) {
return fromEnv;
}
else {
return 50;
}
})();
/**
* Maximum capture processes that can be run in parallel for a given IP address.
* Defaults to:
* - 2 if REQUIRE_ACCESS_KEY is `false`
* - 10 if REQUIRE_ACCESS_KEY is `true`
* Can be replaced via the `MAX_PARALLEL_CAPTURES_PER_IP` env variable.
* @constant
*/
export const MAX_PARALLEL_CAPTURES_PER_IP = (() => {
const fromEnv = parseInt(process.env.MAX_PARALLEL_CAPTURES_PER_IP);
if (!isNaN(fromEnv) && fromEnv > 0) {
return fromEnv;
}
else {
return REQUIRE_ACCESS_KEY ? 10 : 2;
}
})();
/**
* APP version. Pulled from `package.json` by default.

Wyświetl plik

@ -8,14 +8,16 @@ import assert from "assert";
import nunjucks from "nunjucks";
import { IPBlockList, CertsHistory, SuccessLog, TwitterCapture } from "./utils/index.js";
import { AccessKeys, IPBlockList, CertsHistory, SuccessLog, TwitterCapture } from "./utils/index.js";
import {
TEMPLATES_PATH,
STATIC_PATH,
MAX_PARALLEL_CAPTURES_TOTAL,
MAX_PARALLEL_CAPTURES_PER_IP,
REQUIRE_ACCESS_KEY
} from "./const.js";
/**
* @type {SuccessLog}
*/
@ -26,6 +28,11 @@ export const successLog = new SuccessLog();
*/
const ipBlockList = new IPBlockList();
/**
* @type {AccessKey}
*/
const accessKeys = new AccessKeys();
/**
* Fastify-cli options
* @constant
@ -52,7 +59,7 @@ export const CAPTURES_WATCH = {
currentTotal: 0,
maxTotal: MAX_PARALLEL_CAPTURES_TOTAL,
currentByIp: {},
maxPerIp: MAX_PARALLEL_CAPTURES_PER_IP,
maxPerIp: MAX_PARALLEL_CAPTURES_PER_IP
}
export default async function (fastify, opts) {
@ -89,7 +96,7 @@ export default async function (fastify, opts) {
* @returns {Promise<fastify.FastifyReply>}
*/
async function index(request, reply) {
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`);
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {REQUIRE_ACCESS_KEY});
return reply
.code(200)
@ -106,6 +113,8 @@ async function index(request, reply) {
*
* Body is expected as `application/x-www-form-urlencoded` with the following fields:
* - url
* - why
* - access-key [If `REQUIRE_ACCESS_KEY` is enabled]
* - unfold-thread (optional)
*
* Assumes `fastify` is in scope.
@ -118,6 +127,7 @@ async function capture(request, reply) {
const data = request.body;
const ip = request.ip;
let why = null;
let accessKey = null;
request.log.info(`Capture capacity: ${CAPTURES_WATCH.currentTotal} / ${CAPTURES_WATCH.maxTotal}.`);
@ -127,7 +137,8 @@ async function capture(request, reply) {
if (ipBlockList.check(ip)) {
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
error: true,
errorReason: "IP"
errorReason: "IP",
REQUIRE_ACCESS_KEY
});
return reply
@ -136,6 +147,28 @@ async function capture(request, reply) {
.send(html);
}
//
// Check access key if required
//
if (REQUIRE_ACCESS_KEY) {
try {
accessKey = data["access-key"];
assert(accessKeys.check(accessKey));
}
catch(err) {
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
error: true,
errorReason: "ACCESS-KEY",
REQUIRE_ACCESS_KEY
});
return reply
.code(401)
.header('Content-Type', 'text/html; charset=utf-8')
.send(html);
}
}
//
// Check url
//
@ -146,7 +179,8 @@ async function capture(request, reply) {
catch(err) {
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
error: true,
errorReason: "URL"
errorReason: "URL",
REQUIRE_ACCESS_KEY
});
return reply
@ -165,7 +199,8 @@ async function capture(request, reply) {
catch(err) {
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
error: true,
errorReason: "WHY"
errorReason: "WHY",
REQUIRE_ACCESS_KEY
});
return reply
@ -180,7 +215,8 @@ async function capture(request, reply) {
if (CAPTURES_WATCH.currentTotal >= CAPTURES_WATCH.maxTotal) {
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
error: true,
errorReason: "TOO-MANY-CAPTURES-TOTAL"
errorReason: "TOO-MANY-CAPTURES-TOTAL",
REQUIRE_ACCESS_KEY
});
return reply
@ -195,7 +231,8 @@ async function capture(request, reply) {
if (CAPTURES_WATCH.currentByIp[ip] >= CAPTURES_WATCH.maxPerIp) {
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
error: true,
errorReason: "TOO-MANY-CAPTURES-USER"
errorReason: "TOO-MANY-CAPTURES-USER",
REQUIRE_ACCESS_KEY
});
return reply
@ -221,7 +258,7 @@ async function capture(request, reply) {
const tweets = new TwitterCapture(data.url, {runBrowserBehaviors: "unfold-thread" in data});
const pdf = await tweets.capture();
successLog.add(ip, why, pdf);
successLog.add(REQUIRE_ACCESS_KEY ? accessKey : ip, why, pdf);
// Generate a filename for the PDF based on url.
// Example: harvardlil-status-123456789-2022-11-25.pdf
@ -246,7 +283,8 @@ async function capture(request, reply) {
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
error: true,
errorReason: "CAPTURE-ISSUE"
errorReason: "CAPTURE-ISSUE",
REQUIRE_ACCESS_KEY
});
return reply

Wyświetl plik

@ -4,6 +4,7 @@
* @author The Harvard Library Innovation Lab
* @license MIT
*/
import fs from "fs";
import assert from "assert";
import crypto from "crypto";
@ -11,18 +12,36 @@ import { test } from "tap";
import Fastify from "fastify";
import isHtml from "is-html";
import { AccessKeys } from "./utils/index.js";
import server, { CAPTURES_WATCH, successLog } from "./server.js";
import { DATA_PATH, CERTS_PATH } from "./const.js";
/**
* Dummy url of a thread to capture.
* Sample url of a thread to capture.
*/
const THREAD_URL = "https://twitter.com/HarvardLIL/status/1595150565428039680";
/**
* Dummy reason for capture
* Sample reason for capture
*/
const WHY = "Testing thread-keeper";
const WHY = "Testing thread-keeper.";
/**
* Access keys fixture.
* @type {{active: string[], inactive: string[]}}
*/
const ACCESS_KEYS = (() => {
const rawAccessKeys = JSON.parse(fs.readFileSync(AccessKeys.filepath));
const out = { active: [], inactive: [] };
for (let [key, value] of Object.entries(rawAccessKeys)) {
value === true ? out.active.push(key) : out.inactive.push(key);
}
return out;
})();
test("Integration tests for server.js", async(t) => {
@ -50,6 +69,41 @@ test("Integration tests for server.js", async(t) => {
t.type(isHtml(response.body), true, "Server serves HTML.");
});
// This assumes tests are run with the REQUIRE_ACCESS_KEY env var set to `1`.
test("[POST] / returns HTTP 401 + HTML on failed access key check.", async (t) => {
const app = Fastify({logger: false});
await server(app, {});
const scenarios = [
"FOO-BAR", // Invalid key
ACCESS_KEYS.inactive[0], // Inactive key
null // No key
]
for (const accessKey of scenarios) {
const params = new URLSearchParams();
params.append("url", THREAD_URL);
params.append("why", WHY);
if (accessKey) {
params.append("access-key", accessKey);
}
const response = await app.inject({
method: "POST",
url: "/",
headers: { "Content-Type": "application/x-www-form-urlencoded" },
body: params.toString(),
});
t.equal(response.statusCode, 401, "Server returns HTTP 401.");
const body = `${response.body}`;
t.type(isHtml(body), true, "Server serves HTML");
t.equal(body.includes(`data-reason="ACCESS-KEY"`), true, "With error message.");
}
});
test("[POST] / returns HTTP 401 + HTML on blocked IP check.", async (t) => {
const app = Fastify({logger: false});
await server(app, {});
@ -78,10 +132,18 @@ test("Integration tests for server.js", async(t) => {
const app = Fastify({logger: false});
await server(app, {});
const params = new URLSearchParams();
params.append("why", WHY);
params.append("access-key", ACCESS_KEYS.active[0]);
const response = await app.inject({
method: "POST",
url: "/",
remoteAddress: "4.3.2.1",
headers: {
"Content-Type": "application/x-www-form-urlencoded"
},
body: params.toString()
});
// Should fail because no URL were passed, not because IP was blocked
@ -101,6 +163,8 @@ test("Integration tests for server.js", async(t) => {
for (const url of scenarios) {
const params = new URLSearchParams();
params.append("why", WHY);
params.append("access-key", ACCESS_KEYS.active[0]);
if (url) {
params.append("url", url);
@ -133,8 +197,8 @@ test("Integration tests for server.js", async(t) => {
for (const why of scenarios) {
const params = new URLSearchParams();
params.append("url", THREAD_URL);
params.append("access-key", ACCESS_KEYS.active[0]);
if (why) {
params.append("why", why);
@ -164,6 +228,7 @@ test("Integration tests for server.js", async(t) => {
const params = new URLSearchParams();
params.append("url", THREAD_URL);
params.append("why", WHY);
params.append("access-key", ACCESS_KEYS.active[0]);
const response = await app.inject({
method: "POST",
@ -191,6 +256,7 @@ test("Integration tests for server.js", async(t) => {
const params = new URLSearchParams();
params.append("url", THREAD_URL);
params.append("why", WHY);
params.append("access-key", ACCESS_KEYS.active[0]);
const response = await app.inject({
method: "POST",
@ -215,6 +281,7 @@ test("Integration tests for server.js", async(t) => {
const params = new URLSearchParams();
params.append("url", THREAD_URL);
params.append("why", WHY);
params.append("access-key", ACCESS_KEYS.active[0]);
params.append("unfold-thread", "on");
const response = await app.inject({

Wyświetl plik

@ -7,13 +7,19 @@ const formSubmitDialog = document.querySelector("dialog#form-submit");
document.querySelector("body#index form button").addEventListener("click", (e) => {
e.preventDefault();
const url = document.querySelector("body#index form input[name='url']");
const why = document.querySelector("body#index form input[name='why']");
const why = document.querySelector("body#index form textarea#why");
const accessKey = document.querySelector("body#index form input[name='access-key']");
if (!url.checkValidity()) {
url.reportValidity();
return;
}
if (accessKey && !accessKey.checkValidity()) {
accessKey.reportValidity();
return;
}
if (!why.checkValidity()) {
why.reportValidity();
return;

Wyświetl plik

@ -33,8 +33,18 @@
required>
</fieldset>
{% if REQUIRE_ACCESS_KEY %}
<fieldset>
<label for="why">Reason for archiving <a href="#who-can-use-it">(why this question?)</a></label>
<label for="access-key">Access key <a href="https://ocb.to/archive-social-form" title="Access key request form">(request access)</a></label>
<input type="password"
name="access-key"
id="access-key"
required>
</fieldset>
{% endif %}
<fieldset>
<label for="why">Reason for archiving <a href="#why-faq">(why this question?)</a></label>
<textarea required name="why" id="why" rows="2"></textarea>
</fieldset>
@ -63,6 +73,10 @@
<dialog id="form-error" data-reason="{{ errorReason }}">
<h2>Something went wrong</h2>
{% if errorReason and errorReason == "ACCESS-KEY" %}
<p>Access key provided is invalid or inactive.</p>
{% endif %}
{% if errorReason and errorReason == "IP" %}
<p>Your access to this service has been restricted.</p>
{% endif %}
@ -72,7 +86,7 @@
{% endif %}
{% if errorReason and errorReason == "WHY" %}
<p>The url provided is not a valid twitter.com url.</p>
<p>Please tell us why you would like to archive this twitter.com url.</p>
{% endif %}
{% if errorReason and errorReason == "TOO-MANY-CAPTURES-TOTAL" %}
@ -96,9 +110,11 @@
<p>This site is an experiment by the <a href="https://lil.law.harvard.edu">Harvard Library Innovation Lab</a> to let you download signed PDFs of Twitter URLs. <a href="/static/example.pdf">Here's an example PDF</a> we made from <a href="https://twitter.com/doctorow/status/1591759999323492358">this tweet</a>.</p>
<h2 id="who-can-use-it">Who can use it?</h2>
{% if REQUIRE_ACCESS_KEY %}
<h2>Who can use it?</h2>
<p>While anyone can use this website, it is experimental and running with limited server capacity. But you can also use <a href="https://github.com/harvard-lil/thread-keeper">our open source software</a> to stand up an archive server of your own, and share it with your friends.</p>
<p>To use our website <a href="https://ocb.to/archive-social-form">you'll need to contact us</a> for an API key. We're currently only able to share a limited number with people like journalists, internet scholars, and archivists. But you can also use <a href="https://github.com/harvard-lil/thread-keeper">our open source software</a> to stand up an archive server of your own, and share it with your friends.</p>
{% endif %}
<h2>Why make a PDF archiving tool for Twitter?</h2>
@ -114,6 +130,10 @@
<p>Library nerd note: societies create much more data than they can save. "Thinking carefully about what you choose to preserve" is part of the practice of archiving. By doing it, you're helping to form our shared cultural memory.</p>
<h2 id="why-faq">Why do you ask the reason for archiving?</h2>
<p>At the Library Innovation Lab, we build experiments like this to explore what's most important to save in the cultural record and how we can save it. Your answer will help us understand whether this tool is serving its purpose, who it's helping, and what other features it might need. Feel free to provide as much or as little detail as you want about who you are and what you're trying to accomplish. Including the same answer each time is fine.</p>
<h2>How do you make these PDFs (and why does it take so long)?</h2>
<p>Twitter captures are made using open source web archiving software we're developing at the Library Innovation Lab for eventual use in our <a href="https://perma.cc">Perma.cc project</a>. The software uses a headless Chrome browser to render the page as it would appear to a reader. For this experiment, we're also running custom javascript in the headless browser to remove Twitter UI and make the content easier to read.</p>
@ -136,6 +156,10 @@
<p>Tech nerd note: This whole trust step is needed because of something called <em>repudiability</em>: https web transactions are deliberately designed to be repudiable, meaning there's no way to tell as a third party after the fact whether they ever really happened. Signed HTTP exchanges are one proposal that may eventually let websites choose to publish verifiable content instead, but they aren't here yet. So for now, you're left deciding whether "social.perma.cc" is an intermediary you want to choose to trust.</p>
<h2>Is the code for this site available?</h2>
<p>Yes! Code for this site is published under an open license <a href="https://github.com/harvard-lil/thread-keeper">on GitHub</a>. We encourage you to run your own instance of the server — remembering that if you run it, you'll be the one asserting provenance of the resulting PDF files.</p>
<h2>What is your privacy policy?</h2>
<p>We may log requested Twitter URLs and may store cached copies of delivered archives. We also log cryptographic hashes of all PDF files delivered, in case there is a later question of authenticity. We store normal server request logs.</p>

Wyświetl plik

@ -0,0 +1,63 @@
/**
* thread-keeper
* @module utils.AccessKeys
* @author The Harvard Library Innovation Lab
* @license MIT
*/
import assert from "assert";
import fs from "fs";
import { validate as uuidValidate } from 'uuid';
import { DATA_PATH } from "../const.js";
/**
* Utility class for handling access keys to the app.
* [!] For alpha launch only.
*/
export class AccessKeys {
/**
* Complete path to `access-keys.json`.
* @type {string}
*/
static filepath = `${DATA_PATH}access-keys.json`;
/**
* Frozen hashmap of available access keys
* (app needs to be restarted for new keys to be taken into account, for now).
* @type {object.<string,boolean>}
*/
#keys = {};
/**
* On init:
* - Create access keys file if it doesn't exist
* - Load keys from file into `this.#keys`.
*/
constructor() {
const filepath = AccessKeys.filepath;
try {
const keys = fs.readFileSync(filepath);
this.#keys = Object.freeze(JSON.parse(keys));
}
catch (err) {
fs.writeFileSync(filepath, "{}");
}
}
/**
* Checks that a given access key is valid and active.
* @param {string} accessKey
*/
check(accessKey) {
try {
assert(uuidValidate(accessKey));
assert(this.#keys[accessKey] === true);
return true;
}
catch(err) {
return false;
}
}
}

Wyświetl plik

@ -69,11 +69,11 @@ export class SuccessLog {
* - Creates a success log entry
* - Updates `this.#hashes` (so it doesn't need to reload from file)
*
* @param {string} ip
* @param {string} identifier - Can be an IP or access key
* @param {string} why - Reason for creating this archive
* @param {Buffer} pdfBytes - Used to store a SHA512 hash of the PDF that was delivered
*/
add(ip, why, pdfBytes) {
add(identifier, why, pdfBytes) {
// Calculate SHA512 hash of the PDF
const hash = crypto.createHash('sha512').update(pdfBytes).digest('base64');
@ -87,7 +87,7 @@ export class SuccessLog {
.replaceAll("}", "");
// Save entry
const entry = `${new Date().toISOString()}\t${ip}\t${why}\tsha512-${hash}\n`;
const entry = `${new Date().toISOString()}\t${identifier}\t${why}\tsha512-${hash}\n`;
fs.appendFileSync(SuccessLog.filepath, entry);
this.#hashes[`sha512-${hash}`] = true;
}
@ -118,7 +118,7 @@ export class SuccessLog {
* @returns {void}
*/
reset() {
fs.writeFileSync(SuccessLog.filepath, "date-time\tip\twhy\thash\n");
fs.writeFileSync(SuccessLog.filepath, "date-time\tidentifier\twhy\thash\n");
this.#hashes = {};
}
}

Wyświetl plik

@ -8,5 +8,6 @@ import { IPBlockList } from "./IPBlockList.js";
import { TwitterCapture } from "./TwitterCapture.js";
import { SuccessLog } from "./SuccessLog.js";
import { CertsHistory } from "./CertsHistory.js";
import { AccessKeys } from "./AccessKeys.js";
export { IPBlockList, SuccessLog, CertsHistory, TwitterCapture };
export { AccessKeys, IPBlockList, SuccessLog, CertsHistory, TwitterCapture };

Wyświetl plik

@ -13,6 +13,7 @@ thread-keeper
* [.TEMPLATES_PATH](#module_const.TEMPLATES_PATH)
* [.EXECUTABLES_FOLDER](#module_const.EXECUTABLES_FOLDER)
* [.STATIC_PATH](#module_const.STATIC_PATH)
* [.REQUIRE_ACCESS_KEY](#module_const.REQUIRE_ACCESS_KEY)
* [.MAX_PARALLEL_CAPTURES_TOTAL](#module_const.MAX_PARALLEL_CAPTURES_TOTAL)
* [.MAX_PARALLEL_CAPTURES_PER_IP](#module_const.MAX_PARALLEL_CAPTURES_PER_IP)
* [.APP_VERSION](#module_const.APP_VERSION)
@ -21,12 +22,16 @@ thread-keeper
### const.CERTS\_PATH
Path to the folder holding the certificates used for signing the PDFs.
Defaults to `./certs`
Can be replaced via the `CERTS_PATH` env variable.
**Kind**: static constant of [<code>const</code>](#module_const)
<a name="module_const.DATA_PATH"></a>
### const.DATA\_PATH
Path to the "data" folder.
Defaults to `./app/data`
Can be replaced via the `DATA_PATH` env variable.
**Kind**: static constant of [<code>const</code>](#module_const)
<a name="module_const.TMP_PATH"></a>
@ -52,17 +57,31 @@ Path to the "executables" folder, for dependencies that are meant to be executed
### const.STATIC\_PATH
Path to the "static" folder.
**Kind**: static constant of [<code>const</code>](#module_const)
<a name="module_const.REQUIRE_ACCESS_KEY"></a>
### const.REQUIRE\_ACCESS\_KEY
If `true`, users will be required to provide an access key.
Defaults to `false`.
Can be replaced via the `REQUIRE_ACCESS_KEY` env variable, if set to "1".
**Kind**: static constant of [<code>const</code>](#module_const)
<a name="module_const.MAX_PARALLEL_CAPTURES_TOTAL"></a>
### const.MAX\_PARALLEL\_CAPTURES\_TOTAL
Maximum capture processes that can be run in parallel.
Defaults to 50.
Can be replaced via the `MAX_PARALLEL_CAPTURES_TOTAL` env variable.
**Kind**: static constant of [<code>const</code>](#module_const)
<a name="module_const.MAX_PARALLEL_CAPTURES_PER_IP"></a>
### const.MAX\_PARALLEL\_CAPTURES\_PER\_IP
Maximum capture processes that can be run in parallel for a IP address.
Maximum capture processes that can be run in parallel for a given IP address.
Defaults to:
- 2 if REQUIRE_ACCESS_KEY is `false`
- 10 if REQUIRE_ACCESS_KEY is `true`
Can be replaced via the `MAX_PARALLEL_CAPTURES_PER_IP` env variable.
**Kind**: static constant of [<code>const</code>](#module_const)
<a name="module_const.APP_VERSION"></a>

Wyświetl plik

@ -13,6 +13,7 @@ thread-keeper
* [.CAPTURES_WATCH](#module_server.CAPTURES_WATCH) : <code>Object</code>
* _inner_
* [~ipBlockList](#module_server..ipBlockList) : <code>IPBlockList</code>
* [~accessKeys](#module_server..accessKeys) : <code>AccessKey</code>
* [~index(request, reply)](#module_server..index) ⇒ <code>Promise.&lt;fastify.FastifyReply&gt;</code>
* [~capture(request, reply)](#module_server..capture) ⇒ <code>Promise.&lt;fastify.FastifyReply&gt;</code>
* [~check(request, reply)](#module_server..check) ⇒ <code>Promise.&lt;fastify.FastifyReply&gt;</code>
@ -41,6 +42,10 @@ May be used to redirect users if over capacity.
### server~ipBlockList : <code>IPBlockList</code>
**Kind**: inner constant of [<code>server</code>](#module_server)
<a name="module_server..accessKeys"></a>
### server~accessKeys : <code>AccessKey</code>
**Kind**: inner constant of [<code>server</code>](#module_server)
<a name="module_server..index"></a>
### server~index(request, reply) ⇒ <code>Promise.&lt;fastify.FastifyReply&gt;</code>
@ -66,6 +71,8 @@ Subject to captures rate limiting (see `CAPTURES_WATCH`).
Body is expected as `application/x-www-form-urlencoded` with the following fields:
- url
- why
- access-key [If `REQUIRE_ACCESS_KEY` is enabled]
- unfold-thread (optional)
Assumes `fastify` is in scope.

Wyświetl plik

@ -0,0 +1,51 @@
<a name="utils.module_AccessKeys"></a>
## AccessKeys
thread-keeper
**Author**: The Harvard Library Innovation Lab
**License**: MIT
* [AccessKeys](#utils.module_AccessKeys)
* [.AccessKeys](#utils.module_AccessKeys.AccessKeys)
* [new exports.AccessKeys()](#new_utils.module_AccessKeys.AccessKeys_new)
* [.filepath](#utils.module_AccessKeys.AccessKeys+filepath) : <code>string</code>
* [.check(accessKey)](#utils.module_AccessKeys.AccessKeys+check)
<a name="utils.module_AccessKeys.AccessKeys"></a>
### AccessKeys.AccessKeys
Utility class for handling access keys to the app.
[!] For alpha launch only.
**Kind**: static class of [<code>AccessKeys</code>](#utils.module_AccessKeys)
* [.AccessKeys](#utils.module_AccessKeys.AccessKeys)
* [new exports.AccessKeys()](#new_utils.module_AccessKeys.AccessKeys_new)
* [.filepath](#utils.module_AccessKeys.AccessKeys+filepath) : <code>string</code>
* [.check(accessKey)](#utils.module_AccessKeys.AccessKeys+check)
<a name="new_utils.module_AccessKeys.AccessKeys_new"></a>
#### new exports.AccessKeys()
On init:
- Create access keys file if it doesn't exist
- Load keys from file into `this.#keys`.
<a name="utils.module_AccessKeys.AccessKeys+filepath"></a>
#### accessKeys.filepath : <code>string</code>
Complete path to `access-keys.json`.
**Kind**: instance property of [<code>AccessKeys</code>](#utils.module_AccessKeys.AccessKeys)
<a name="utils.module_AccessKeys.AccessKeys+check"></a>
#### accessKeys.check(accessKey)
Checks that a given access key is valid and active.
**Kind**: instance method of [<code>AccessKeys</code>](#utils.module_AccessKeys.AccessKeys)
| Param | Type |
| --- | --- |
| accessKey | <code>string</code> |

Wyświetl plik

@ -10,7 +10,7 @@ thread-keeper
* [.SuccessLog](#utils.module_SuccessLog.SuccessLog)
* [new exports.SuccessLog()](#new_utils.module_SuccessLog.SuccessLog_new)
* [.filepath](#utils.module_SuccessLog.SuccessLog+filepath) : <code>string</code>
* [.add(ip, pdfBytes)](#utils.module_SuccessLog.SuccessLog+add)
* [.add(identifier, why, pdfBytes)](#utils.module_SuccessLog.SuccessLog+add)
* [.findHashInLogs(hash)](#utils.module_SuccessLog.SuccessLog+findHashInLogs) ⇒ <code>boolean</code>
* [.reset()](#utils.module_SuccessLog.SuccessLog+reset) ⇒ <code>void</code>
@ -24,7 +24,7 @@ Utility class for handling success logs. Keeps trace of the hashes of the PDFs t
* [.SuccessLog](#utils.module_SuccessLog.SuccessLog)
* [new exports.SuccessLog()](#new_utils.module_SuccessLog.SuccessLog_new)
* [.filepath](#utils.module_SuccessLog.SuccessLog+filepath) : <code>string</code>
* [.add(ip, pdfBytes)](#utils.module_SuccessLog.SuccessLog+add)
* [.add(identifier, why, pdfBytes)](#utils.module_SuccessLog.SuccessLog+add)
* [.findHashInLogs(hash)](#utils.module_SuccessLog.SuccessLog+findHashInLogs) ⇒ <code>boolean</code>
* [.reset()](#utils.module_SuccessLog.SuccessLog+reset) ⇒ <code>void</code>
@ -43,7 +43,7 @@ Complete path to `success-log.json`.
**Kind**: instance property of [<code>SuccessLog</code>](#utils.module_SuccessLog.SuccessLog)
<a name="utils.module_SuccessLog.SuccessLog+add"></a>
#### successLog.add(ip, pdfBytes)
#### successLog.add(identifier, why, pdfBytes)
Calculates hash of a PDF an:
- Creates a success log entry
- Updates `this.#hashes` (so it doesn't need to reload from file)
@ -52,7 +52,8 @@ Calculates hash of a PDF an:
| Param | Type | Description |
| --- | --- | --- |
| ip | <code>string</code> | |
| identifier | <code>string</code> | Can be an IP or access key |
| why | <code>string</code> | Reason for creating this archive |
| pdfBytes | <code>Buffer</code> | Used to store a SHA512 hash of the PDF that was delivered |
<a name="utils.module_SuccessLog.SuccessLog+findHashInLogs"></a>

Wyświetl plik

@ -0,0 +1,4 @@
{
"a9c2b7b6-0652-4207-bdce-0527ad28f3f9": true,
"60e605ec-3510-4358-a0a7-33f25b3d7b74": false
}

Wyświetl plik

@ -10,7 +10,7 @@
"postinstall": "cd scripts && bash download-yt-dlp.sh && bash pip-install.sh",
"generate-dev-cert": "cd scripts && bash generate-dev-cert.sh",
"docgen": "cd scripts && bash docgen.sh",
"test": "cd scripts && bash generate-test-cert.sh && cd .. && export CERTS_PATH='./fixtures/certs/' && export DATA_PATH='./fixtures/data/' && npx tap --reporter=list --timeout=180"
"test": "cd scripts && bash generate-test-cert.sh && cd .. && export CERTS_PATH='./fixtures/certs/' && export DATA_PATH='./fixtures/data/' && export REQUIRE_ACCESS_KEY='1' && npx tap --reporter=list --timeout=180"
},
"dependencies": {
"@fastify/formbody": "^7.3.0",

Wyświetl plik

@ -3,6 +3,7 @@ jsdoc2md ../app/server.js > ../docs/server.md;
jsdoc2md ../app/const.js > ../docs/const.md;
jsdoc2md ../app/utils/index.js > ../docs/utils/index.md;
jsdoc2md ../app/utils/AccessKeys.js > ../docs/utils/AccessKeys.md;
jsdoc2md ../app/utils/IPBlockList.js > ../docs/utils/IPBlockList.md;
jsdoc2md ../app/utils/SuccessLog.js > ../docs/utils/SuccessLog.md;
jsdoc2md ../app/utils/TwitterCapture.js > ../docs/utils/TwitterCapture.md;