MOW-102 - Add sanitization to ActivityPub Objects (#118)

* Add sanitization to ActivityPub Objects
pull/121/head
Pete Bacon Darwin 2023-01-17 12:00:07 +00:00 zatwierdzone przez GitHub
rodzic 5fb59a809e
commit abd41ae8ab
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
6 zmienionych plików z 127 dodań i 16 usunięć

Wyświetl plik

@ -1,6 +1,6 @@
import { defaultImages } from 'wildebeest/config/accounts'
import { generateUserKey } from 'wildebeest/backend/src/utils/key-ops'
import type { Object } from '../objects'
import { type Object, sanitizeContent, sanitizeName } from '../objects'
const PERSON = 'Person'
const isTesting = typeof jest !== 'undefined'
@ -58,6 +58,16 @@ export async function get(url: string | URL): Promise<Actor> {
const actor: Actor = { ...data }
actor.id = new URL(data.id)
if (data.content) {
actor.content = await sanitizeContent(data.content)
}
if (data.name) {
actor.name = await sanitizeName(data.name)
}
if (data.preferredUsername) {
actor.preferredUsername = await sanitizeName(data.preferredUsername)
}
// This is mostly for testing where for convenience not all values
// are provided.
// TODO: eventually clean that to better match production.

Wyświetl plik

@ -29,33 +29,33 @@ export function uri(domain: string, id: string): URL {
return new URL('/ap/o/' + id, 'https://' + domain)
}
export async function createObject(
export async function createObject<Type extends Object>(
domain: string,
db: D1Database,
type: string,
properties: any,
originalActorId: URL,
local: boolean
): Promise<Object> {
): Promise<Type> {
const uuid = crypto.randomUUID()
const apId = uri(domain, uuid).toString()
const sanitizedProperties = await sanitizeObjectProperties(properties)
const row: any = await db
.prepare(
'INSERT INTO objects(id, type, properties, original_actor_id, local, mastodon_id) VALUES(?, ?, ?, ?, ?, ?) RETURNING *'
)
.bind(apId, type, JSON.stringify(properties), originalActorId.toString(), local ? 1 : 0, uuid)
.bind(apId, type, JSON.stringify(sanitizedProperties), originalActorId.toString(), local ? 1 : 0, uuid)
.first()
return {
...properties,
...sanitizedProperties,
type,
id: new URL(row.id),
mastodonId: row.mastodon_id,
published: new Date(row.cdate).toISOString(),
originalActorId: row.original_actor_id,
} as Object
} as Type
}
export async function get<T>(url: URL): Promise<T> {
@ -78,11 +78,13 @@ type CacheObjectRes = {
export async function cacheObject(
domain: string,
db: D1Database,
properties: any,
properties: unknown,
originalActorId: URL,
originalObjectId: URL,
local: boolean
): Promise<CacheObjectRes> {
const sanitizedProperties = await sanitizeObjectProperties(properties)
const cachedObject = await getObjectBy(db, 'original_object_id', originalObjectId.toString())
if (cachedObject !== null) {
return {
@ -100,8 +102,8 @@ export async function cacheObject(
)
.bind(
apId,
properties.type,
JSON.stringify(properties),
sanitizedProperties.type,
JSON.stringify(sanitizedProperties),
originalActorId.toString(),
originalObjectId.toString(),
local ? 1 : 0,
@ -179,3 +181,71 @@ WHERE objects.${key}=?
originalObjectId: result.original_object_id,
} as Object
}
/** Is the given `value` an ActivityPub Object? */
export function isObject(value: unknown): value is Object {
return value !== null && typeof value === 'object'
}
/** Sanitizes the ActivityPub Object `properties` prior to being stored in the DB. */
export async function sanitizeObjectProperties(properties: unknown): Promise<Object> {
if (!isObject(properties)) {
throw new Error('Invalid object properties. Expected an object but got ' + JSON.stringify(properties))
}
const sanitized: Object = {
...properties,
}
if ('content' in properties) {
sanitized.content = await sanitizeContent(properties.content as string)
}
if ('name' in properties) {
sanitized.name = await sanitizeName(properties.name as string)
}
return sanitized
}
/**
* Sanitizes the given string as ActivityPub Object content.
*
* This sanitization follows that of Mastodon
* - convert all elements to `<p>` unless they are recognized as one of `<p>`, `<span>`, `<br>` or `<a>`.
* - remove all CSS classes that are not micro-formats or semantic.
*
* See https://docs.joinmastodon.org/spec/activitypub/#sanitization
*/
export async function sanitizeContent(unsafeContent: string): Promise<string> {
return await contentRewriter.transform(new Response(unsafeContent)).text()
}
/**
* Sanitizes given string as an ActivityPub Object name.
*
* This sanitization removes all HTML elements from the string leaving only the text content.
*/
export async function sanitizeName(unsafeName: string): Promise<string> {
return await nameRewriter.transform(new Response(unsafeName)).text()
}
const contentRewriter = new HTMLRewriter()
contentRewriter.on('*', {
element(el) {
if (!['p', 'span', 'br', 'a'].includes(el.tagName)) {
el.tagName = 'p'
}
if (el.hasAttribute('class')) {
const classes = el.getAttribute('class')!.split(/\s+/)
const sanitizedClasses = classes.filter((c) =>
/^(h|p|u|dt|e)-|^mention$|^hashtag$|^ellipsis$|^invisible$/.test(c)
)
el.setAttribute('class', sanitizedClasses.join(' '))
}
},
})
const nameRewriter = new HTMLRewriter()
nameRewriter.on('*', {
element(el) {
el.removeAndKeepContent()
},
})

Wyświetl plik

@ -335,6 +335,34 @@ describe('ActivityPub', () => {
const row = await db.prepare('SELECT * FROM outbox_objects').first()
assert.equal(row.target, 'some actor')
})
test('Object props get sanitized', async () => {
const db = await makeDB()
const person = await createPerson(domain, db, userKEK, 'sven@cloudflare.com')
const activity = {
'@context': 'https://www.w3.org/ns/activitystreams',
type: 'Create',
actor: person,
object: {
id: 'https://example.com/note2',
type: 'Note',
name: '<script>Dr Evil</script>',
content:
'<div><span class="bad h-10 p-100\tu-22\r\ndt-xi e-bam mention hashtag ellipsis invisible o-bad">foo</span><br/><p><a href="blah"><b>bold</b></a></p><script>alert("evil")</script></div>',
},
}
await activityHandler.handle(domain, activity, db, userKEK, adminEmail, vapidKeys)
const row = await db.prepare(`SELECT * from objects`).first()
const { content, name } = JSON.parse(row.properties)
assert.equal(
content,
'<p><span class="h-10 p-100 u-22 dt-xi e-bam mention hashtag ellipsis invisible">foo</span><br/><p><a href="blah"><p>bold</p></a></p><p>alert("evil")</p></p>'
)
assert.equal(name, 'Dr Evil')
})
})
describe('Update', () => {
@ -472,7 +500,7 @@ describe('ActivityPub', () => {
}
await activityHandler.handle(domain, activity, db, userKEK, adminEmail, vapidKeys)
const object = await db.prepare('SELECT * FROM objects').bind(remoteActorId).first()
const object = await db.prepare('SELECT * FROM objects').first()
assert(object)
assert.equal(object.type, 'Note')
assert.equal(object.original_actor_id, remoteActorId)

Wyświetl plik

@ -231,7 +231,8 @@ describe('Mastodon APIs', () => {
id: 'https://social.com/someone',
url: 'https://social.com/@someone',
type: 'Person',
preferredUsername: 'sven',
preferredUsername: '<script>bad</script>sven',
name: 'Sven <i>Cool<i>',
outbox: 'https://social.com/someone/outbox',
following: 'https://social.com/someone/following',
followers: 'https://social.com/someone/followers',
@ -283,7 +284,9 @@ describe('Mastodon APIs', () => {
assert.equal(res.status, 200)
const data = await res.json<any>()
assert.equal(data.username, 'sven')
// Note the sanitization
assert.equal(data.username, 'badsven')
assert.equal(data.display_name, 'Sven Cool')
assert.equal(data.acct, 'sven@social.com')
assert(isUrlValid(data.url))

Wyświetl plik

@ -48,7 +48,7 @@ describe('Mastodon APIs', () => {
const actor = await createPerson(domain, db, userKEK, 'sven@cloudflare.com')
const body = {
status: 'my status',
status: 'my status <script>evil</script>',
visibility: 'public',
}
const req = new Request('https://example.com', {
@ -87,7 +87,7 @@ describe('Mastodon APIs', () => {
`
)
.first()
assert.equal(row.content, 'my status')
assert.equal(row.content, 'my status <p>evil</p>') // note the sanitization
assert.equal(row.original_actor_id.toString(), actor.id.toString())
assert.equal(row.original_object_id, null)
})

Wyświetl plik

@ -36,7 +36,7 @@ export async function handleRequest(domain: string, id: string, db: D1Database):
}
async function getRemoteAccount(handle: Handle, acct: string): Promise<Response> {
// TODO: using webfinger isn't the optimal implemnetation. We could cache
// TODO: using webfinger isn't the optimal implementation. We could cache
// the object in D1 and directly query the remote API, indicated by the actor's
// url field. For now, let's keep it simple.
const actor = await queryAcct(handle.domain!, acct)