kopia lustrzana https://github.com/cloudflare/wildebeest
MOW-102 - Add sanitization to ActivityPub Objects (#118)
* Add sanitization to ActivityPub Objectspull/121/head
rodzic
5fb59a809e
commit
abd41ae8ab
|
|
@ -1,6 +1,6 @@
|
|||
import { defaultImages } from 'wildebeest/config/accounts'
|
||||
import { generateUserKey } from 'wildebeest/backend/src/utils/key-ops'
|
||||
import type { Object } from '../objects'
|
||||
import { type Object, sanitizeContent, sanitizeName } from '../objects'
|
||||
|
||||
const PERSON = 'Person'
|
||||
const isTesting = typeof jest !== 'undefined'
|
||||
|
|
@ -58,6 +58,16 @@ export async function get(url: string | URL): Promise<Actor> {
|
|||
const actor: Actor = { ...data }
|
||||
actor.id = new URL(data.id)
|
||||
|
||||
if (data.content) {
|
||||
actor.content = await sanitizeContent(data.content)
|
||||
}
|
||||
if (data.name) {
|
||||
actor.name = await sanitizeName(data.name)
|
||||
}
|
||||
if (data.preferredUsername) {
|
||||
actor.preferredUsername = await sanitizeName(data.preferredUsername)
|
||||
}
|
||||
|
||||
// This is mostly for testing where for convenience not all values
|
||||
// are provided.
|
||||
// TODO: eventually clean that to better match production.
|
||||
|
|
|
|||
|
|
@ -29,33 +29,33 @@ export function uri(domain: string, id: string): URL {
|
|||
return new URL('/ap/o/' + id, 'https://' + domain)
|
||||
}
|
||||
|
||||
export async function createObject(
|
||||
export async function createObject<Type extends Object>(
|
||||
domain: string,
|
||||
db: D1Database,
|
||||
type: string,
|
||||
properties: any,
|
||||
originalActorId: URL,
|
||||
local: boolean
|
||||
): Promise<Object> {
|
||||
): Promise<Type> {
|
||||
const uuid = crypto.randomUUID()
|
||||
const apId = uri(domain, uuid).toString()
|
||||
const sanitizedProperties = await sanitizeObjectProperties(properties)
|
||||
|
||||
const row: any = await db
|
||||
.prepare(
|
||||
'INSERT INTO objects(id, type, properties, original_actor_id, local, mastodon_id) VALUES(?, ?, ?, ?, ?, ?) RETURNING *'
|
||||
)
|
||||
.bind(apId, type, JSON.stringify(properties), originalActorId.toString(), local ? 1 : 0, uuid)
|
||||
.bind(apId, type, JSON.stringify(sanitizedProperties), originalActorId.toString(), local ? 1 : 0, uuid)
|
||||
.first()
|
||||
|
||||
return {
|
||||
...properties,
|
||||
|
||||
...sanitizedProperties,
|
||||
type,
|
||||
id: new URL(row.id),
|
||||
mastodonId: row.mastodon_id,
|
||||
published: new Date(row.cdate).toISOString(),
|
||||
originalActorId: row.original_actor_id,
|
||||
} as Object
|
||||
} as Type
|
||||
}
|
||||
|
||||
export async function get<T>(url: URL): Promise<T> {
|
||||
|
|
@ -78,11 +78,13 @@ type CacheObjectRes = {
|
|||
export async function cacheObject(
|
||||
domain: string,
|
||||
db: D1Database,
|
||||
properties: any,
|
||||
properties: unknown,
|
||||
originalActorId: URL,
|
||||
originalObjectId: URL,
|
||||
local: boolean
|
||||
): Promise<CacheObjectRes> {
|
||||
const sanitizedProperties = await sanitizeObjectProperties(properties)
|
||||
|
||||
const cachedObject = await getObjectBy(db, 'original_object_id', originalObjectId.toString())
|
||||
if (cachedObject !== null) {
|
||||
return {
|
||||
|
|
@ -100,8 +102,8 @@ export async function cacheObject(
|
|||
)
|
||||
.bind(
|
||||
apId,
|
||||
properties.type,
|
||||
JSON.stringify(properties),
|
||||
sanitizedProperties.type,
|
||||
JSON.stringify(sanitizedProperties),
|
||||
originalActorId.toString(),
|
||||
originalObjectId.toString(),
|
||||
local ? 1 : 0,
|
||||
|
|
@ -179,3 +181,71 @@ WHERE objects.${key}=?
|
|||
originalObjectId: result.original_object_id,
|
||||
} as Object
|
||||
}
|
||||
|
||||
/** Is the given `value` an ActivityPub Object? */
|
||||
export function isObject(value: unknown): value is Object {
|
||||
return value !== null && typeof value === 'object'
|
||||
}
|
||||
|
||||
/** Sanitizes the ActivityPub Object `properties` prior to being stored in the DB. */
|
||||
export async function sanitizeObjectProperties(properties: unknown): Promise<Object> {
|
||||
if (!isObject(properties)) {
|
||||
throw new Error('Invalid object properties. Expected an object but got ' + JSON.stringify(properties))
|
||||
}
|
||||
const sanitized: Object = {
|
||||
...properties,
|
||||
}
|
||||
if ('content' in properties) {
|
||||
sanitized.content = await sanitizeContent(properties.content as string)
|
||||
}
|
||||
if ('name' in properties) {
|
||||
sanitized.name = await sanitizeName(properties.name as string)
|
||||
}
|
||||
return sanitized
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitizes the given string as ActivityPub Object content.
|
||||
*
|
||||
* This sanitization follows that of Mastodon
|
||||
* - convert all elements to `<p>` unless they are recognized as one of `<p>`, `<span>`, `<br>` or `<a>`.
|
||||
* - remove all CSS classes that are not micro-formats or semantic.
|
||||
*
|
||||
* See https://docs.joinmastodon.org/spec/activitypub/#sanitization
|
||||
*/
|
||||
export async function sanitizeContent(unsafeContent: string): Promise<string> {
|
||||
return await contentRewriter.transform(new Response(unsafeContent)).text()
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitizes given string as an ActivityPub Object name.
|
||||
*
|
||||
* This sanitization removes all HTML elements from the string leaving only the text content.
|
||||
*/
|
||||
export async function sanitizeName(unsafeName: string): Promise<string> {
|
||||
return await nameRewriter.transform(new Response(unsafeName)).text()
|
||||
}
|
||||
|
||||
const contentRewriter = new HTMLRewriter()
|
||||
contentRewriter.on('*', {
|
||||
element(el) {
|
||||
if (!['p', 'span', 'br', 'a'].includes(el.tagName)) {
|
||||
el.tagName = 'p'
|
||||
}
|
||||
|
||||
if (el.hasAttribute('class')) {
|
||||
const classes = el.getAttribute('class')!.split(/\s+/)
|
||||
const sanitizedClasses = classes.filter((c) =>
|
||||
/^(h|p|u|dt|e)-|^mention$|^hashtag$|^ellipsis$|^invisible$/.test(c)
|
||||
)
|
||||
el.setAttribute('class', sanitizedClasses.join(' '))
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
const nameRewriter = new HTMLRewriter()
|
||||
nameRewriter.on('*', {
|
||||
element(el) {
|
||||
el.removeAndKeepContent()
|
||||
},
|
||||
})
|
||||
|
|
|
|||
|
|
@ -335,6 +335,34 @@ describe('ActivityPub', () => {
|
|||
const row = await db.prepare('SELECT * FROM outbox_objects').first()
|
||||
assert.equal(row.target, 'some actor')
|
||||
})
|
||||
|
||||
test('Object props get sanitized', async () => {
|
||||
const db = await makeDB()
|
||||
const person = await createPerson(domain, db, userKEK, 'sven@cloudflare.com')
|
||||
|
||||
const activity = {
|
||||
'@context': 'https://www.w3.org/ns/activitystreams',
|
||||
type: 'Create',
|
||||
actor: person,
|
||||
object: {
|
||||
id: 'https://example.com/note2',
|
||||
type: 'Note',
|
||||
name: '<script>Dr Evil</script>',
|
||||
content:
|
||||
'<div><span class="bad h-10 p-100\tu-22\r\ndt-xi e-bam mention hashtag ellipsis invisible o-bad">foo</span><br/><p><a href="blah"><b>bold</b></a></p><script>alert("evil")</script></div>',
|
||||
},
|
||||
}
|
||||
|
||||
await activityHandler.handle(domain, activity, db, userKEK, adminEmail, vapidKeys)
|
||||
|
||||
const row = await db.prepare(`SELECT * from objects`).first()
|
||||
const { content, name } = JSON.parse(row.properties)
|
||||
assert.equal(
|
||||
content,
|
||||
'<p><span class="h-10 p-100 u-22 dt-xi e-bam mention hashtag ellipsis invisible">foo</span><br/><p><a href="blah"><p>bold</p></a></p><p>alert("evil")</p></p>'
|
||||
)
|
||||
assert.equal(name, 'Dr Evil')
|
||||
})
|
||||
})
|
||||
|
||||
describe('Update', () => {
|
||||
|
|
@ -472,7 +500,7 @@ describe('ActivityPub', () => {
|
|||
}
|
||||
await activityHandler.handle(domain, activity, db, userKEK, adminEmail, vapidKeys)
|
||||
|
||||
const object = await db.prepare('SELECT * FROM objects').bind(remoteActorId).first()
|
||||
const object = await db.prepare('SELECT * FROM objects').first()
|
||||
assert(object)
|
||||
assert.equal(object.type, 'Note')
|
||||
assert.equal(object.original_actor_id, remoteActorId)
|
||||
|
|
|
|||
|
|
@ -231,7 +231,8 @@ describe('Mastodon APIs', () => {
|
|||
id: 'https://social.com/someone',
|
||||
url: 'https://social.com/@someone',
|
||||
type: 'Person',
|
||||
preferredUsername: 'sven',
|
||||
preferredUsername: '<script>bad</script>sven',
|
||||
name: 'Sven <i>Cool<i>',
|
||||
outbox: 'https://social.com/someone/outbox',
|
||||
following: 'https://social.com/someone/following',
|
||||
followers: 'https://social.com/someone/followers',
|
||||
|
|
@ -283,7 +284,9 @@ describe('Mastodon APIs', () => {
|
|||
assert.equal(res.status, 200)
|
||||
|
||||
const data = await res.json<any>()
|
||||
assert.equal(data.username, 'sven')
|
||||
// Note the sanitization
|
||||
assert.equal(data.username, 'badsven')
|
||||
assert.equal(data.display_name, 'Sven Cool')
|
||||
assert.equal(data.acct, 'sven@social.com')
|
||||
|
||||
assert(isUrlValid(data.url))
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ describe('Mastodon APIs', () => {
|
|||
const actor = await createPerson(domain, db, userKEK, 'sven@cloudflare.com')
|
||||
|
||||
const body = {
|
||||
status: 'my status',
|
||||
status: 'my status <script>evil</script>',
|
||||
visibility: 'public',
|
||||
}
|
||||
const req = new Request('https://example.com', {
|
||||
|
|
@ -87,7 +87,7 @@ describe('Mastodon APIs', () => {
|
|||
`
|
||||
)
|
||||
.first()
|
||||
assert.equal(row.content, 'my status')
|
||||
assert.equal(row.content, 'my status <p>evil</p>') // note the sanitization
|
||||
assert.equal(row.original_actor_id.toString(), actor.id.toString())
|
||||
assert.equal(row.original_object_id, null)
|
||||
})
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ export async function handleRequest(domain: string, id: string, db: D1Database):
|
|||
}
|
||||
|
||||
async function getRemoteAccount(handle: Handle, acct: string): Promise<Response> {
|
||||
// TODO: using webfinger isn't the optimal implemnetation. We could cache
|
||||
// TODO: using webfinger isn't the optimal implementation. We could cache
|
||||
// the object in D1 and directly query the remote API, indicated by the actor's
|
||||
// url field. For now, let's keep it simple.
|
||||
const actor = await queryAcct(handle.domain!, acct)
|
||||
|
|
|
|||
Ładowanie…
Reference in New Issue