Implements a "check PDF" feature

Adds a "Check PDF" dialog allowing to compare a client-side generated SHA-512 hash of a file against archive.social's logs.

It is primitive, but SuccessLog loads all available hashes from success-log.tsv on app boot into a hashmap, which gets updated as new entries get created.

Other quality-of-life updates include some minor CSS tweaks and refactoring of AccessKeys.js to match SuccessLog.js.
pull/2/head
Matteo Cargnelutti 2022-11-21 01:34:56 -05:00
rodzic 1101d0a8a8
commit f5ca37d831
6 zmienionych plików z 282 dodań i 54 usunięć

Wyświetl plik

@ -4,10 +4,8 @@
* @author The Harvard Library Innovation Lab
* @license MIT
*/
import fs from "fs";
import assert from "assert";
import { validate as uuidValidate } from 'uuid';
import nunjucks from "nunjucks";
import { AccessKeys, SuccessLog, TwitterCapture } from "./utils/index.js";
@ -18,6 +16,16 @@ import {
MAX_PARALLEL_CAPTURES_PER_ACCESS_KEY,
} from "./const.js";
/**
* @type {SuccessLog}
*/
const successLog = new SuccessLog();
/**
* @type {AccessKeys}
*/
const accessKeys = new AccessKeys();
/**
* Keeps track of how many capture processes are currently running.
* May be used to redirect users if over capacity.
@ -38,12 +46,6 @@ const CAPTURES_WATCH = {
maxPerAccessKey: MAX_PARALLEL_CAPTURES_PER_ACCESS_KEY,
}
/**
* Frozen copy of currently valid access keys.
* [!] For this alpha: app needs to be restarted for changes to be into account.
*/
const ACCESS_KEYS = AccessKeys.fetch();
export default async function (fastify, opts) {
// Adds support for `application/x-www-form-urlencoded`
@ -71,9 +73,8 @@ export default async function (fastify, opts) {
/**
* [POST] /
* Processes a request to capture a twitter url.
* Renders success page with PDF if capture went through.
* Serves PDF byte directly if operation is successful.
* Returns to form with specific error code, passed as `errorReason`, otherwise.
*
*/
fastify.post('/', async (request, reply) => {
const data = request.body;
@ -84,11 +85,7 @@ export default async function (fastify, opts) {
//
// Check access key
//
try {
assert(uuidValidate(accessKey));
assert(ACCESS_KEYS[accessKey]);
}
catch(err) {
if (!accessKeys.check(accessKey)) {
const html = nunjucks.render(`${TEMPLATES_PATH}index.njk`, {
error: true,
errorReason: "ACCESS-KEY"
@ -165,7 +162,7 @@ export default async function (fastify, opts) {
const tweets = new TwitterCapture(data.url, {runBrowserBehaviors: "auto-scroll" in data});
const pdf = await tweets.capture();
SuccessLog.add(accessKey, pdf);
successLog.add(accessKey, pdf);
return reply
.code(200)
@ -196,4 +193,23 @@ export default async function (fastify, opts) {
}
});
/**
* [GET] /api/v1/hashes/check/<sha512-hash>
* Checks if a given SHA512 hash is in the "success" logs, meaning this app created it.
* Hash is passed as the last parameter, url encoded.
*
* Returns HTTP 200 if found, HTTP 404 if not.
*/
fastify.get('/api/v1/hashes/check/:hash', async (request, reply) => {
let found = false;
const { hash } = request.params;
if (hash.length === 95 || hash.length === 88) {
found = successLog.findHashInLogs(hash);
}
return reply.code(found ? 200 : 404).send();
});
};

Wyświetl plik

@ -130,10 +130,11 @@ body#index > main form {
display: block;
margin: auto;
border-top: 1px solid var(--main-color-----);
/*border-bottom: 1px solid var(--main-color-----);*/
border-bottom: 1px solid var(--main-color-----);
padding-top: 1.5rem;
margin-bottom: 1rem;
padding-bottom: 1.5rem;
margin-top: 1.5rem;
margin-bottom: 1.5rem;
}
@media (max-width: 769px) {
@ -173,9 +174,6 @@ body#index > main form fieldset.submit {
text-align: right;
}
body#index > main form fieldset.submit span {
}
body#index > main form fieldset.submit span * {
display: inline-block;
font-size: 0.85rem;
@ -221,15 +219,34 @@ body#index > main dialog[open]::backdrop {
overflow: hidden;
}
/* INDEX - EXPLAINER */
body#index > main section {
max-width: 45ch;
margin: auto;
padding-top: 1.5rem;
margin-bottom: 1.5rem;
border-top: 1px solid var(--main-color-----);
body#index > main dialog#check-pdf button {
margin-right: 1rem;
}
body#index > main dialog#check-pdf input {
display: block;
width: 100%;
margin-bottom: 1rem;
cursor: pointer;
}
body#index > main dialog#check-pdf textarea {
display: block;
width: 100%;
margin-bottom: 1rem;
cursor: pointer;
height: 6rem;
padding: 0.5rem;
font-size: 0.65rem;
}
/* INDEX - EXPLAINER */
body#index > main section {
max-width: 55ch;
margin: auto;
margin-bottom: 1.5rem;
/*border-top: 1px solid var(--main-color-----);*/
}
body#index > main section p {
margin-bottom: 0.5rem;

Wyświetl plik

@ -1,6 +1,7 @@
//------------------------------------------------------------------------------
// "form-submit" dialog logic
//------------------------------------------------------------------------------
const formSubmitDialog = document.querySelector("dialog#form-submit");
// Click on form submit button: check input validity, open "form-submit" dialog.
document.querySelector("body#index form button").addEventListener("click", (e) => {
@ -18,24 +19,22 @@ document.querySelector("body#index form button").addEventListener("click", (e) =
return;
}
document.querySelector("dialog#form-submit").showModal();
formSubmitDialog.showModal();
});
// Click on button in "form-submit" dialog: close dialog and submit form.
document.querySelector("dialog#form-submit button").addEventListener("click", (e) => {
formSubmitDialog.querySelector("button").addEventListener("click", (e) => {
e.preventDefault();
document.querySelector("dialog#form-submit").close();
formSubmitDialog.close();
document.querySelector("body#index form").submit();
});
//------------------------------------------------------------------------------
// "form-error" dialog logic
//------------------------------------------------------------------------------
// Open on load if present, close on button click.
const formErrorDialog = document.querySelector("dialog#form-error");
// Open on load if present, close on button click.
if (formErrorDialog) {
formErrorDialog.showModal();
@ -43,3 +42,87 @@ if (formErrorDialog) {
formErrorDialog.close();
})
}
//------------------------------------------------------------------------------
// "check-pdf" dialog logic
//------------------------------------------------------------------------------
const checkPdfDialog = document.querySelector("dialog#check-pdf");
// Open / close based on hash
window.addEventListener('hashchange', (event) => {
const newURL = new URL(event.newURL);
const oldURL = new URL(event.oldURL);
if (newURL.hash === "#check-pdf") {
checkPdfDialog.showModal();
}
if (oldURL.hash === "#check-pdf" && oldURL !== newURL) {
checkPdfDialog.close();
}
});
// Open on load if hash already present
if (window.location.hash === "#check-pdf") {
checkPdfDialog.showModal();
}
// Clear output on file change
checkPdfDialog.querySelector("input").addEventListener("change", () => {
checkPdfDialog.querySelector("textarea").value = `Click on "Check" to proceed.\n`;
});
// Check file on click on "Check"
checkPdfDialog.querySelector("button").addEventListener("click", async(e) => {
let hash = "";
const output = checkPdfDialog.querySelector("textarea");
output.value = "";
try {
const data = await checkPdfDialog.querySelector("input[type='file']").files[0].arrayBuffer();
// Generate hash and convert it to hex and then base 64.
// Was of tremendous help: https://stackoverflow.com/questions/23190056/hex-to-base64-converter-for-javascript
hash = await (async() => {
const hash = await crypto.subtle.digest('SHA-512', data);
const walkable = Array.from(new Uint8Array(hash));
const toHex = walkable.map((b) => b.toString(16).padStart(2, "0")).join("");
return btoa(
toHex
.match(/\w{2}/g)
.map(function (a) {
return String.fromCharCode(parseInt(a, 16));
})
.join("")
);
})();
}
catch(err) {
output.value = `Could not calculate hash of the file selected, if any.\n`;
}
try {
const response = await fetch(`/api/v1/hashes/check/${encodeURIComponent(hash)}`);
output.value += `SHA-512 hash\n---${hash}\n---\n`;
switch (response.status) {
case 200:
output.value += `This hash is CONFIRMED to be present in archive.social's logs.\n`;
break;
case 404:
output.value += `This hash was NOT FOUND in archive.social's logs.\n`;
break;
default:
throw new Error(response.status);
break;
}
}
catch(err) {
console.log(`/api/v1/hashes/check/<hash> responsed with HTTP ${err}`);
output.value += `An error occurred while trying to verify file.`;
}
});

Wyświetl plik

@ -96,6 +96,26 @@
<button>I understand, proceed.</button>
</dialog>
<dialog id="check-pdf">
<h2>Check a PDF</h2>
<p>This tool verifies that a given .pdf file was created by archive.social by matching its fingerprint against our logs.</p>
<input
aria-label="File input for the PDF file to check"
type="file"
accept=".pdf"
name="check-pdf-file"/>
<textarea disabled>Results will be displayed here.</textarea>
<button>Check PDF</button>
<a href="/">Close</a>
</dialog>
<!-- EXPLAINER -->
<section>
<h2>What is this?</h2>
@ -132,7 +152,7 @@
<h2>How do I check that a PDF came from you?</h2>
<p>You can download <a href="https://crt.sh/?id=8004113167">this public key file</a> and add it to Adobe Acrobat. We'll also add a page on our site shortly where you can upload a PDF and we'll confirm whether it really came from us.</p>
<p>You can download <a href="https://crt.sh/?id=8004113167">this public key file (.PEM)</a> and add it to Adobe Acrobat. You can also use <a href="#check-pdf">this form</a> to confirm whether it really came from us by checking its fingerprint against our logs.</p>
<p>Tech nerd note: As an extra check, the key you're downloading here happens to be one that we also verified via LetsEncrypt as belonging to our domain. You can see the same key in <a href="https://crt.sh/?id=8004113167">the certificate transparency logs</a>.</p>

Wyświetl plik

@ -4,9 +4,12 @@
* @author The Harvard Library Innovation Lab
* @license MIT
*/
import fs from "fs";
import assert from "assert";
import fs from "fs";
import { DATA_PATH } from "../const.js";
import { validate as uuidValidate } from 'uuid';
import { DATA_PATH } from "../const.js";
/**
* Utility class for handling access keys to the app.
@ -20,21 +23,43 @@ export class AccessKeys {
*/
static filepath = `${DATA_PATH}access-keys.json`;
/**
* Tries to load access keys hashmap from disk.
* Creates empty file if none provided.
*
* @returns {object} - Frozen object
* Frozen hashmap of available access keys
* (app needs to be restarted for new keys to be taken into account, for now).
* @type {object.<string,boolean>}
*/
static fetch() {
#keys = {};
/**
* On init:
* - Create access keys file is it doesn't exist
* - Load keys from file into `this.#keys`.
*/
constructor() {
const filepath = AccessKeys.filepath;
try {
const keys = fs.readFileSync(filepath);
return Object.freeze(JSON.parse(keys));
this.#keys = Object.freeze(JSON.parse(keys));
}
catch (err) {
fs.writeFileSync(filepath, "{}");
}
}
/**
* Checks that a given access key is valid and active.
* @param {string} accessKey
*/
check(accessKey) {
try {
assert(uuidValidate(accessKey));
assert(this.#keys[accessKey] === true);
return true;
}
catch(err) {
return false;
}
}
}

Wyświetl plik

@ -5,6 +5,7 @@
* @license MIT
*/
import fs from "fs";
import readline from "node:readline";
import crypto from "crypto";
import { DATA_PATH } from "../const.js";
@ -18,28 +19,94 @@ export class SuccessLog {
static filepath = `${DATA_PATH}success-log.tsv`;
/**
* Adds an entry to `success-log.json`.
* @param {*} accessKey
* @param {Buffer} pdfBytes - Used to store a SHA512 hash of the PDF that was delivered
* Hashmap of all the sha512 hashes present in the current log file.
* Used for fast lookups.
*
* @type {object.<string, boolean>}
*/
static add(accessKey, pdfBytes) {
#hashes = {};
/**
* On init:
* - Create log file is it doesn't exist
* - Load hashes from file into `this.#hashes`.
*/
constructor() {
const filepath = SuccessLog.filepath;
// Create file if it does not exist
if (!fs.existsSync(SuccessLog.filepath)) {
SuccessLog.reset();
if (!fs.existsSync(filepath)) {
this.reset();
}
// Calculate SHA512 hash of the PDF
const pdfHash = crypto.createHash('sha512').update(pdfBytes).digest('base64');
// Load hashes from existing file into hashmap (asynchronous)
const readLogs = readline.createInterface({
input: fs.createReadStream(filepath),
crlfDelay: Infinity
});
// Save entry to file
const entry = `${new Date().toISOString()}\t${accessKey}\tsha512-${pdfHash}\n`;
readLogs.on("line", (line) => {
// Skip lines that are not log lines
if (line[0] === "d" || line[0] === "\n") {
return;
}
// Grab last 95 chars of line, check it's a sha512 hash, add to #hashes.
const lineLength = line.length;
const hash = line.substring(lineLength - 95);
if (hash.length === 95 && hash.startsWith("sha512-")) {
this.#hashes[hash] = true;
}
});
}
/**
* Calculates hash of a PDF an:
* - Creates a success log entry
* - Updates `this.#hashes` (so it doesn't need to reload from file)
*
* @param {string} accessKey
* @param {Buffer} pdfBytes - Used to store a SHA512 hash of the PDF that was delivered
*/
add(accessKey, pdfBytes) {
// Calculate SHA512 hash of the PDF
const hash = crypto.createHash('sha512').update(pdfBytes).digest('base64');
// Save entry
const entry = `${new Date().toISOString()}\t${accessKey}\tsha512-${hash}\n`;
fs.appendFileSync(SuccessLog.filepath, entry);
this.#hashes[hash] = true;
}
/**
* Checks whether or not a given hash is present in the logs.
* @param {string} hash
* @returns {boolean}
*/
findHashInLogs(hash) {
hash = String(hash);
// Compensate for the absence of "sha512-"
if (hash.length === 88) {
hash = `sha512-${hash}`;
}
if (hash.length < 95) {
return false;
}
return hash in this.#hashes && this.#hashes[hash] === true;
}
/**
* Resets `success-log.json`.
* Also clears `this.#hashes`.
* @returns {void}
*/
static reset() {
reset() {
fs.writeFileSync(SuccessLog.filepath, "date-time\taccess-key\thash\n");
this.#hashes = {};
}
}