kopia lustrzana https://github.com/cloudflare/wildebeest
				
				
				
			MOW-102 - Add sanitization to ActivityPub Objects (#118)
* Add sanitization to ActivityPub Objectspull/121/head
							rodzic
							
								
									5fb59a809e
								
							
						
					
					
						commit
						abd41ae8ab
					
				|  | @ -1,6 +1,6 @@ | |||
| import { defaultImages } from 'wildebeest/config/accounts' | ||||
| import { generateUserKey } from 'wildebeest/backend/src/utils/key-ops' | ||||
| import type { Object } from '../objects' | ||||
| import { type Object, sanitizeContent, sanitizeName } from '../objects' | ||||
| 
 | ||||
| const PERSON = 'Person' | ||||
| const isTesting = typeof jest !== 'undefined' | ||||
|  | @ -58,6 +58,16 @@ export async function get(url: string | URL): Promise<Actor> { | |||
| 	const actor: Actor = { ...data } | ||||
| 	actor.id = new URL(data.id) | ||||
| 
 | ||||
| 	if (data.content) { | ||||
| 		actor.content = await sanitizeContent(data.content) | ||||
| 	} | ||||
| 	if (data.name) { | ||||
| 		actor.name = await sanitizeName(data.name) | ||||
| 	} | ||||
| 	if (data.preferredUsername) { | ||||
| 		actor.preferredUsername = await sanitizeName(data.preferredUsername) | ||||
| 	} | ||||
| 
 | ||||
| 	// This is mostly for testing where for convenience not all values
 | ||||
| 	// are provided.
 | ||||
| 	// TODO: eventually clean that to better match production.
 | ||||
|  |  | |||
|  | @ -29,33 +29,33 @@ export function uri(domain: string, id: string): URL { | |||
| 	return new URL('/ap/o/' + id, 'https://' + domain) | ||||
| } | ||||
| 
 | ||||
| export async function createObject( | ||||
| export async function createObject<Type extends Object>( | ||||
| 	domain: string, | ||||
| 	db: D1Database, | ||||
| 	type: string, | ||||
| 	properties: any, | ||||
| 	originalActorId: URL, | ||||
| 	local: boolean | ||||
| ): Promise<Object> { | ||||
| ): Promise<Type> { | ||||
| 	const uuid = crypto.randomUUID() | ||||
| 	const apId = uri(domain, uuid).toString() | ||||
| 	const sanitizedProperties = await sanitizeObjectProperties(properties) | ||||
| 
 | ||||
| 	const row: any = await db | ||||
| 		.prepare( | ||||
| 			'INSERT INTO objects(id, type, properties, original_actor_id, local, mastodon_id) VALUES(?, ?, ?, ?, ?, ?) RETURNING *' | ||||
| 		) | ||||
| 		.bind(apId, type, JSON.stringify(properties), originalActorId.toString(), local ? 1 : 0, uuid) | ||||
| 		.bind(apId, type, JSON.stringify(sanitizedProperties), originalActorId.toString(), local ? 1 : 0, uuid) | ||||
| 		.first() | ||||
| 
 | ||||
| 	return { | ||||
| 		...properties, | ||||
| 
 | ||||
| 		...sanitizedProperties, | ||||
| 		type, | ||||
| 		id: new URL(row.id), | ||||
| 		mastodonId: row.mastodon_id, | ||||
| 		published: new Date(row.cdate).toISOString(), | ||||
| 		originalActorId: row.original_actor_id, | ||||
| 	} as Object | ||||
| 	} as Type | ||||
| } | ||||
| 
 | ||||
| export async function get<T>(url: URL): Promise<T> { | ||||
|  | @ -78,11 +78,13 @@ type CacheObjectRes = { | |||
| export async function cacheObject( | ||||
| 	domain: string, | ||||
| 	db: D1Database, | ||||
| 	properties: any, | ||||
| 	properties: unknown, | ||||
| 	originalActorId: URL, | ||||
| 	originalObjectId: URL, | ||||
| 	local: boolean | ||||
| ): Promise<CacheObjectRes> { | ||||
| 	const sanitizedProperties = await sanitizeObjectProperties(properties) | ||||
| 
 | ||||
| 	const cachedObject = await getObjectBy(db, 'original_object_id', originalObjectId.toString()) | ||||
| 	if (cachedObject !== null) { | ||||
| 		return { | ||||
|  | @ -100,8 +102,8 @@ export async function cacheObject( | |||
| 		) | ||||
| 		.bind( | ||||
| 			apId, | ||||
| 			properties.type, | ||||
| 			JSON.stringify(properties), | ||||
| 			sanitizedProperties.type, | ||||
| 			JSON.stringify(sanitizedProperties), | ||||
| 			originalActorId.toString(), | ||||
| 			originalObjectId.toString(), | ||||
| 			local ? 1 : 0, | ||||
|  | @ -179,3 +181,71 @@ WHERE objects.${key}=? | |||
| 		originalObjectId: result.original_object_id, | ||||
| 	} as Object | ||||
| } | ||||
| 
 | ||||
| /** Is the given `value` an ActivityPub Object? */ | ||||
| export function isObject(value: unknown): value is Object { | ||||
| 	return value !== null && typeof value === 'object' | ||||
| } | ||||
| 
 | ||||
| /** Sanitizes the ActivityPub Object `properties` prior to being stored in the DB. */ | ||||
| export async function sanitizeObjectProperties(properties: unknown): Promise<Object> { | ||||
| 	if (!isObject(properties)) { | ||||
| 		throw new Error('Invalid object properties. Expected an object but got ' + JSON.stringify(properties)) | ||||
| 	} | ||||
| 	const sanitized: Object = { | ||||
| 		...properties, | ||||
| 	} | ||||
| 	if ('content' in properties) { | ||||
| 		sanitized.content = await sanitizeContent(properties.content as string) | ||||
| 	} | ||||
| 	if ('name' in properties) { | ||||
| 		sanitized.name = await sanitizeName(properties.name as string) | ||||
| 	} | ||||
| 	return sanitized | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Sanitizes the given string as ActivityPub Object content. | ||||
|  * | ||||
|  * This sanitization follows that of Mastodon | ||||
|  *  - convert all elements to `<p>` unless they are recognized as one of `<p>`, `<span>`, `<br>` or `<a>`. | ||||
|  *  - remove all CSS classes that are not micro-formats or semantic. | ||||
|  * | ||||
|  * See https://docs.joinmastodon.org/spec/activitypub/#sanitization
 | ||||
|  */ | ||||
| export async function sanitizeContent(unsafeContent: string): Promise<string> { | ||||
| 	return await contentRewriter.transform(new Response(unsafeContent)).text() | ||||
| } | ||||
| 
 | ||||
| /** | ||||
|  * Sanitizes given string as an ActivityPub Object name. | ||||
|  * | ||||
|  * This sanitization removes all HTML elements from the string leaving only the text content. | ||||
|  */ | ||||
| export async function sanitizeName(unsafeName: string): Promise<string> { | ||||
| 	return await nameRewriter.transform(new Response(unsafeName)).text() | ||||
| } | ||||
| 
 | ||||
| const contentRewriter = new HTMLRewriter() | ||||
| contentRewriter.on('*', { | ||||
| 	element(el) { | ||||
| 		if (!['p', 'span', 'br', 'a'].includes(el.tagName)) { | ||||
| 			el.tagName = 'p' | ||||
| 		} | ||||
| 
 | ||||
| 		if (el.hasAttribute('class')) { | ||||
| 			const classes = el.getAttribute('class')!.split(/\s+/) | ||||
| 			const sanitizedClasses = classes.filter((c) => | ||||
| 				/^(h|p|u|dt|e)-|^mention$|^hashtag$|^ellipsis$|^invisible$/.test(c) | ||||
| 			) | ||||
| 			el.setAttribute('class', sanitizedClasses.join(' ')) | ||||
| 		} | ||||
| 	}, | ||||
| }) | ||||
| 
 | ||||
| const nameRewriter = new HTMLRewriter() | ||||
| nameRewriter.on('*', { | ||||
| 	element(el) { | ||||
| 		el.removeAndKeepContent() | ||||
| 	}, | ||||
| }) | ||||
|  |  | |||
|  | @ -335,6 +335,34 @@ describe('ActivityPub', () => { | |||
| 				const row = await db.prepare('SELECT * FROM outbox_objects').first() | ||||
| 				assert.equal(row.target, 'some actor') | ||||
| 			}) | ||||
| 
 | ||||
| 			test('Object props get sanitized', async () => { | ||||
| 				const db = await makeDB() | ||||
| 				const person = await createPerson(domain, db, userKEK, 'sven@cloudflare.com') | ||||
| 
 | ||||
| 				const activity = { | ||||
| 					'@context': 'https://www.w3.org/ns/activitystreams', | ||||
| 					type: 'Create', | ||||
| 					actor: person, | ||||
| 					object: { | ||||
| 						id: 'https://example.com/note2', | ||||
| 						type: 'Note', | ||||
| 						name: '<script>Dr Evil</script>', | ||||
| 						content: | ||||
| 							'<div><span class="bad h-10 p-100\tu-22\r\ndt-xi e-bam mention hashtag ellipsis invisible o-bad">foo</span><br/><p><a href="blah"><b>bold</b></a></p><script>alert("evil")</script></div>', | ||||
| 					}, | ||||
| 				} | ||||
| 
 | ||||
| 				await activityHandler.handle(domain, activity, db, userKEK, adminEmail, vapidKeys) | ||||
| 
 | ||||
| 				const row = await db.prepare(`SELECT * from objects`).first() | ||||
| 				const { content, name } = JSON.parse(row.properties) | ||||
| 				assert.equal( | ||||
| 					content, | ||||
| 					'<p><span class="h-10 p-100 u-22 dt-xi e-bam mention hashtag ellipsis invisible">foo</span><br/><p><a href="blah"><p>bold</p></a></p><p>alert("evil")</p></p>' | ||||
| 				) | ||||
| 				assert.equal(name, 'Dr Evil') | ||||
| 			}) | ||||
| 		}) | ||||
| 
 | ||||
| 		describe('Update', () => { | ||||
|  | @ -472,7 +500,7 @@ describe('ActivityPub', () => { | |||
| 				} | ||||
| 				await activityHandler.handle(domain, activity, db, userKEK, adminEmail, vapidKeys) | ||||
| 
 | ||||
| 				const object = await db.prepare('SELECT * FROM objects').bind(remoteActorId).first() | ||||
| 				const object = await db.prepare('SELECT * FROM objects').first() | ||||
| 				assert(object) | ||||
| 				assert.equal(object.type, 'Note') | ||||
| 				assert.equal(object.original_actor_id, remoteActorId) | ||||
|  |  | |||
|  | @ -231,7 +231,8 @@ describe('Mastodon APIs', () => { | |||
| 							id: 'https://social.com/someone', | ||||
| 							url: 'https://social.com/@someone', | ||||
| 							type: 'Person', | ||||
| 							preferredUsername: 'sven', | ||||
| 							preferredUsername: '<script>bad</script>sven', | ||||
| 							name: 'Sven <i>Cool<i>', | ||||
| 							outbox: 'https://social.com/someone/outbox', | ||||
| 							following: 'https://social.com/someone/following', | ||||
| 							followers: 'https://social.com/someone/followers', | ||||
|  | @ -283,7 +284,9 @@ describe('Mastodon APIs', () => { | |||
| 			assert.equal(res.status, 200) | ||||
| 
 | ||||
| 			const data = await res.json<any>() | ||||
| 			assert.equal(data.username, 'sven') | ||||
| 			// Note the sanitization
 | ||||
| 			assert.equal(data.username, 'badsven') | ||||
| 			assert.equal(data.display_name, 'Sven Cool') | ||||
| 			assert.equal(data.acct, 'sven@social.com') | ||||
| 
 | ||||
| 			assert(isUrlValid(data.url)) | ||||
|  |  | |||
|  | @ -48,7 +48,7 @@ describe('Mastodon APIs', () => { | |||
| 			const actor = await createPerson(domain, db, userKEK, 'sven@cloudflare.com') | ||||
| 
 | ||||
| 			const body = { | ||||
| 				status: 'my status', | ||||
| 				status: 'my status <script>evil</script>', | ||||
| 				visibility: 'public', | ||||
| 			} | ||||
| 			const req = new Request('https://example.com', { | ||||
|  | @ -87,7 +87,7 @@ describe('Mastodon APIs', () => { | |||
|         ` | ||||
| 				) | ||||
| 				.first() | ||||
| 			assert.equal(row.content, 'my status') | ||||
| 			assert.equal(row.content, 'my status <p>evil</p>') // note the sanitization
 | ||||
| 			assert.equal(row.original_actor_id.toString(), actor.id.toString()) | ||||
| 			assert.equal(row.original_object_id, null) | ||||
| 		}) | ||||
|  |  | |||
|  | @ -36,7 +36,7 @@ export async function handleRequest(domain: string, id: string, db: D1Database): | |||
| } | ||||
| 
 | ||||
| async function getRemoteAccount(handle: Handle, acct: string): Promise<Response> { | ||||
| 	// TODO: using webfinger isn't the optimal implemnetation. We could cache
 | ||||
| 	// TODO: using webfinger isn't the optimal implementation. We could cache
 | ||||
| 	// the object in D1 and directly query the remote API, indicated by the actor's
 | ||||
| 	// url field. For now, let's keep it simple.
 | ||||
| 	const actor = await queryAcct(handle.domain!, acct) | ||||
|  |  | |||
		Ładowanie…
	
		Reference in New Issue
	
	 Pete Bacon Darwin
						Pete Bacon Darwin