kopia lustrzana https://git.sr.ht/~tsileo/microblog.pub
				
				
				
			Add Open Graph metadata support
							rodzic
							
								
									23faef985b
								
							
						
					
					
						commit
						648e385c49
					
				
							
								
								
									
										5
									
								
								app.py
								
								
								
								
							
							
						
						
									
										5
									
								
								app.py
								
								
								
								
							|  | @ -238,6 +238,11 @@ def get_attachment_url(url, size): | |||
|     return _get_file_url(url, size, Kind.ATTACHMENT) | ||||
| 
 | ||||
| 
 | ||||
| @app.template_filter() | ||||
| def get_og_image_url(url, size=100): | ||||
|     return _get_file_url(url, size, Kind.OG_IMAGE) | ||||
| 
 | ||||
| 
 | ||||
| @app.template_filter() | ||||
| def permalink_id(val): | ||||
|     return str(hash(val)) | ||||
|  |  | |||
|  | @ -189,9 +189,11 @@ a:hover { | |||
|     h3 { margin: 0; } | ||||
|   } | ||||
| } | ||||
| .note-box { | ||||
|   margin-bottom: 70px; | ||||
| } | ||||
| .note { | ||||
|   display: flex; | ||||
|   margin-bottom: 70px; | ||||
|   .l { | ||||
|     color: $color-note-link; | ||||
|   } | ||||
|  | @ -229,7 +231,11 @@ a:hover { | |||
|     padding:10px 0; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| .color-menu-background { | ||||
|   background: $color-menu-background; | ||||
| } | ||||
| .og-link { text-decoration: none; } | ||||
| .og-link:hover { text-decoration: none; } | ||||
| .bar-item-no-hover { | ||||
|   background: $color-menu-background; | ||||
|   padding: 5px; | ||||
|  |  | |||
							
								
								
									
										38
									
								
								tasks.py
								
								
								
								
							
							
						
						
									
										38
									
								
								tasks.py
								
								
								
								
							|  | @ -19,6 +19,7 @@ from config import ID | |||
| from config import KEY | ||||
| from config import MEDIA_CACHE | ||||
| from config import USER_AGENT | ||||
| from utils import opengraph | ||||
| from utils.media import Kind | ||||
| 
 | ||||
| log = logging.getLogger(__name__) | ||||
|  | @ -103,12 +104,49 @@ def process_new_activity(self, iri: str) -> None: | |||
|         self.retry(exc=err, countdown=int(random.uniform(2, 4) ** self.request.retries)) | ||||
| 
 | ||||
| 
 | ||||
| @app.task(bind=True, max_retries=12)  # noqa: C901 | ||||
| def fetch_og_metadata(self, iri: str) -> None: | ||||
|     try: | ||||
|         activity = ap.fetch_remote_activity(iri) | ||||
|         log.info(f"activity={activity!r}") | ||||
|         if activity.has_type(ap.ActivityType.CREATE): | ||||
|             note = activity.get_object() | ||||
|             links = opengraph.links_from_note(note.to_dict()) | ||||
|             og_metadata = opengraph.fetch_og_metadata(USER_AGENT, links) | ||||
|             for og in og_metadata: | ||||
|                 if not og.get("image"): | ||||
|                     continue | ||||
|                 MEDIA_CACHE.cache_og_image(og["image"]) | ||||
| 
 | ||||
|             log.debug(f"OG metadata {og_metadata!r}") | ||||
|             DB.activities.update_one( | ||||
|                 {"remote_id": iri}, {"$set": {"meta.og_metadata": og_metadata}} | ||||
|             ) | ||||
| 
 | ||||
|         log.info(f"OG metadata fetched for {iri}") | ||||
|     except (ActivityGoneError, ActivityNotFoundError): | ||||
|         log.exception(f"dropping activity {iri}, skip OG metedata") | ||||
|     except requests.exceptions.HTTPError as http_err: | ||||
|         if 400 <= http_err.response.status_code < 500: | ||||
|             log.exception("bad request, no retry") | ||||
|         log.exception("failed to fetch OG metadata") | ||||
|         self.retry( | ||||
|             exc=http_err, countdown=int(random.uniform(2, 4) ** self.request.retries) | ||||
|         ) | ||||
|     except Exception as err: | ||||
|         log.exception(f"failed to fetch OG metadata for {iri}") | ||||
|         self.retry(exc=err, countdown=int(random.uniform(2, 4) ** self.request.retries)) | ||||
| 
 | ||||
| 
 | ||||
| @app.task(bind=True, max_retries=12) | ||||
| def cache_actor(self, iri: str, also_cache_attachments: bool = True) -> None: | ||||
|     try: | ||||
|         activity = ap.fetch_remote_activity(iri) | ||||
|         log.info(f"activity={activity!r}") | ||||
| 
 | ||||
|         if activity.has_type(ap.ActivityType.CREATE): | ||||
|             fetch_og_metadata.delay(iri) | ||||
| 
 | ||||
|         actor = activity.get_actor() | ||||
| 
 | ||||
|         cache_actor_with_inbox = False | ||||
|  |  | |||
|  | @ -21,6 +21,7 @@ | |||
| {% else %} | ||||
| {% set actor = obj.attributedTo | get_actor  %} | ||||
| {% endif %} | ||||
| <div class="note-box"> | ||||
| <div class="note h-entry" id="activity-{{ obj.id | permalink_id }}"> | ||||
| 
 | ||||
| <div class="h-card p-author"> | ||||
|  | @ -63,6 +64,26 @@ | |||
| </div> | ||||
| 	{% endif %} | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| {% if meta and meta.og_metadata %} | ||||
| {% for og in meta.og_metadata %} | ||||
| <a href="{{ og.url }}" class="og-link" style="margin:30px 0;clear:both;display: flex;"> | ||||
| <div> | ||||
| <img  style="width:100px;border-radius:3px;" src="{{ og.image | get_og_image_url }}"> | ||||
| </div> | ||||
| <div style="padding:0 20px;"> | ||||
| <strong>{{ og.title }}</strong> | ||||
| <p>{{ og.description | truncate(80) }}</p> | ||||
| <small>{{ og.site_name }}</small> | ||||
| </div> | ||||
| </a> | ||||
| {% endfor %} | ||||
| {% endif %} | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| <div class="bottom-bar"> | ||||
| {% if perma %}<span class="perma-item">{{ obj.published | format_time }}</span> | ||||
| {% if not (obj.id | is_from_outbox) %} | ||||
|  | @ -163,10 +184,10 @@ | |||
| {% endif %} | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| </div> | ||||
| </div>                        | ||||
| </div> | ||||
| 
 | ||||
| </div>                        | ||||
| 
 | ||||
| {%- endmacro %} | ||||
| 
 | ||||
|  |  | |||
|  | @ -31,6 +31,7 @@ class Kind(Enum): | |||
|     ATTACHMENT = "attachment" | ||||
|     ACTOR_ICON = "actor_icon" | ||||
|     UPLOAD = "upload" | ||||
|     OG_IMAGE = "og" | ||||
| 
 | ||||
| 
 | ||||
| class MediaCache(object): | ||||
|  | @ -38,6 +39,24 @@ class MediaCache(object): | |||
|         self.fs = gridfs.GridFS(gridfs_db) | ||||
|         self.user_agent = user_agent | ||||
| 
 | ||||
|     def cache_og_image(self, url: str) -> None: | ||||
|         if self.fs.find_one({"url": url, "kind": Kind.OG_IMAGE.value}): | ||||
|             return | ||||
|         i = load(url, self.user_agent) | ||||
|         # Save the original attachment (gzipped) | ||||
|         i.thumbnail((100, 100)) | ||||
|         with BytesIO() as buf: | ||||
|             with GzipFile(mode="wb", fileobj=buf) as f1: | ||||
|                 i.save(f1, format=i.format) | ||||
|             buf.seek(0) | ||||
|             self.fs.put( | ||||
|                 buf, | ||||
|                 url=url, | ||||
|                 size=100, | ||||
|                 content_type=i.get_format_mimetype(), | ||||
|                 kind=Kind.OG_IMAGE.value, | ||||
|             ) | ||||
| 
 | ||||
|     def cache_attachment(self, url: str) -> None: | ||||
|         if self.fs.find_one({"url": url, "kind": Kind.ATTACHMENT.value}): | ||||
|             return | ||||
|  | @ -141,6 +160,8 @@ class MediaCache(object): | |||
|     def cache(self, url: str, kind: Kind) -> None: | ||||
|         if kind == Kind.ACTOR_ICON: | ||||
|             self.cache_actor_icon(url) | ||||
|         elif kind == Kind.OG_IMAGE: | ||||
|             self.cache_og_image(url) | ||||
|         else: | ||||
|             self.cache_attachment(url) | ||||
| 
 | ||||
|  |  | |||
|  | @ -23,24 +23,11 @@ def links_from_note(note): | |||
|     return links | ||||
| 
 | ||||
| 
 | ||||
| def fetch_og_metadata(user_agent, col, remote_id): | ||||
|     doc = col.find_one({"remote_id": remote_id}) | ||||
|     if not doc: | ||||
|         raise ValueError | ||||
|     note = doc["activity"]["object"] | ||||
|     print(note) | ||||
|     links = links_from_note(note) | ||||
|     if not links: | ||||
|         return 0 | ||||
|     # FIXME(tsileo): set the user agent by giving HTML directly to OpenGraph | ||||
| def fetch_og_metadata(user_agent, links): | ||||
|     htmls = [] | ||||
|     for l in links: | ||||
|         check_url(l) | ||||
|         r = requests.get(l, headers={"User-Agent": user_agent}) | ||||
|         r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15) | ||||
|         r.raise_for_status() | ||||
|         htmls.append(r.text) | ||||
|     links_og_metadata = [dict(opengraph.OpenGraph(html=html)) for html in htmls] | ||||
|     col.update_one( | ||||
|         {"remote_id": remote_id}, {"$set": {"meta.og_metadata": links_og_metadata}} | ||||
|     ) | ||||
|     return len(links) | ||||
|     return [dict(opengraph.OpenGraph(html=html)) for html in htmls] | ||||
|  |  | |||
		Ładowanie…
	
		Reference in New Issue
	
	 Thomas Sileo
						Thomas Sileo