Initial

2011-10-16 12:06:50 +03:00 · 2011-10-16 12:06:50 +03:00 · 7cd6f33d41
commit 7cd6f33d41
--- a/6
+++ b/6
@ -0,0 +1,6 @@
+Requires:
+	Python 2.x (http://python.org/)
+	HTMLtoBBCode (https://bitbucket.org/saparvia/htmltobbcode)
+	Feedparser (http://code.google.com/p/feedparser/)
+	Mako (http://www.makotemplates.org/)
+	lxml (http://lxml.de/)
--- a/9
+++ b/9
@ -0,0 +1,9 @@
+RSStoFriendika is a program to post items from RSS feeds to Friendika. The items are formatted using templates before posting.
+
+Usage:
+1) Create a file containing RSS URL and path to template on each line
+e.g.
+http://myblog.org/feed.rss templates/generic_summary.templ
+http://myphotos.org/feed.rss templates/generic_photo.templ
+
+2) run RSStoFriendika.py
--- a/RSStoFriendika.py
+++ b/RSStoFriendika.py
@ -0,0 +1,146 @@
+#!/usr/bin/env python
+# Application to post RSS updates to Friendika
+import sys
+import urllib
+import urllib2
+import urlparse
+import cPickle as pickle
+import hashlib
+import StringIO
+import ConfigParser
+import copy
+import time
+
+# External libraries
+import feedparser
+import mako.template as mako
+import lxml.etree as etree
+
+conf_path = 'RSStoFriendika.conf'
+
+def tweet(server, message):
+	url = server + '/api/statuses/update'
+	urllib2.urlopen(url, urllib.urlencode({'status': message}))
+
+htmltobbcode = etree.parse('htmltobbcode/HTMLtoBBCode.xslt')
+htmlparser = etree.HTMLParser()
+def makebbcode(html):
+	return unicode(etree.parse(StringIO.StringIO(html), htmlparser).xslt(htmltobbcode))
+
+config = ConfigParser.SafeConfigParser()
+config.read(conf_path)
+config_changed = False
+def getconfig(config, option, valid=''):
+	"""Interactively read config values"""
+	global config_changed # Ugly!
+	try:
+		return config.get('main',option)
+	except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
+		config.set('main',option,raw_input('%s%s: '%(option.replace('_',' '), valid)))
+		config_changed = True
+		return config.get('main',option)
+
+# Read config
+old_config = copy.copy(config)
+server = getconfig(config,'server').rstrip('/')
+username = getconfig(config,'username')
+password = getconfig(config,'password')
+feeds_path = getconfig(config,'feeds_file')
+# A bit of trickery to store a bool. ConfigParser.readboolean is no good because it doesn't accept "y" and "n".
+store_guids = config.set('main','always_store_guid', str(getconfig(config,'always_store_guid', ' (y/N)').lower().startswith('y')))
+# Handle defaults like this or a DEFAULT section is written to output config file
+try:
+	guids_path = config.get('main','guids_file')
+except:
+	guids_path = 'processed.dat'
+try:
+	feeds_updated = config.getfloat('main','updated')
+except:
+	feeds_updated = 0
+
+if config_changed:
+	reply = raw_input('Save config? (y/N): ')
+	if reply.lower().startswith('y'):
+		with open(conf_path, 'w') as configfile:
+		    config.write(configfile)
+	else:
+		config = old_config
+
+# Set up basic authentication
+passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
+passman.add_password(None, server, username, password)
+authhandler = urllib2.HTTPBasicAuthHandler(passman)
+opener = urllib2.build_opener(authhandler)
+urllib2.install_opener(opener)
+
+# Try to open "database" containing already processed GUIDs
+# TODO: Something more efficient and robust
+try:
+	with open(guids_path, 'r') as guids_file:
+		processed = pickle.load(guids_file)
+except:
+	processed = set()
+
+# Iterate over feed list
+for line in open(feeds_path):
+	if line.startswith('#'): # Skip comments
+		continue
+
+	feed_url, template_path = line.strip().split()
+	feed = feedparser.parse(feed_url)
+
+	for entry in feed['entries']:
+		try:
+			guid = entry['guid']
+		except:
+			guid = hashlib.sha256(repr(entry)).hexdigest()
+
+		try:
+			updated = time.mktime(entry['updated_parsed'])
+		except KeyError:
+			updated = None
+
+		updated = False # Too many feeds lie to actually make this useful, so turn off date checking :-(
+
+		if (updated and updated < feeds_updated) or guid in processed:
+			continue
+
+		# See above about lying feeds
+		#if updated == None or config.getboolean('main','always_store_guid'):
+
+		processed.add(guid) # Remember that we've processed this one
+
+		# Convert from HTML to BBCode which Friendika understands
+		# Title is also processed to get rid of HTML entities
+		for key in ['title', 'summary']:
+			try:
+				entry[key] = makebbcode('<html>%s</html>'%entry[key])
+			except KeyError:
+				pass
+		try:
+			for i, content in enumerate(entry.content):
+				content.value = makebbcode('<html>%s</html>'%content.value)
+		except AttributeError:
+			pass
+
+		# TODO: Also check link rel="icon" etc.
+		try:
+			favicon = '://'.join(urlparse.urlparse(entry['link'])[0:2])+'/favicon.ico'
+		except KeyError:
+			favicon = ''
+
+		try:
+			linked_title = '[url=%s][img=16x16]%s[/img]%s[/url]'%(entry['link'],favicon, entry['title'])
+		except KeyError:
+			linked_title = entry['title']
+
+		message = mako.Template(filename=template_path).render_unicode(entry=entry, favicon=favicon, linked_title=linked_title).encode('utf-8')
+		tweet(server, message)
+
+config.set('main','updated',str(time.time()))
+
+with open(conf_path, 'w') as configfile:
+    config.write(configfile)
+
+with open(guids_path, 'w') as guids_file:
+	pickle.dump(processed, guids_file)
--- a/feeds.sample
+++ b/feeds.sample
@ -0,0 +1,2 @@
+http://myblog.org/feed.rss templates/generic_summary.templ
+http://myphotos.org/feed.rss templates/generic_photo.templ
--- a/templates/activitystream.templ
+++ b/templates/activitystream.templ
@ -0,0 +1,2 @@
+## Grammar-nazis beware!
+${entry['activity_verb'].split('/')[-1]}ed a ${entry['activity_object-type'].split('/')[-1]}: ${linked_title}
--- a/templates/citeulike.templ
+++ b/templates/citeulike.templ
@ -0,0 +1,3 @@
+cited the paper ${linked_title}
+
+${entry['summary']}
--- a/templates/delicious.templ
+++ b/templates/delicious.templ
@ -0,0 +1 @@
+shared the link ${linked_title}
--- a/templates/generic_photo.templ
+++ b/templates/generic_photo.templ
@ -0,0 +1,2 @@
+posted the photo [url=${entry['media_content'][0]['url']}][img]${favicon}[/img]${entry['title']}[/url]
+[img]${entry['media_thumbnail'][0]['url']}[/img]
--- a/templates/generic_skipname.templ
+++ b/templates/generic_skipname.templ
@ -0,0 +1 @@
+[url=${entry['link']}][img]${favicon}[/img]${' '.join(entry['title'].split()[1:])}[/url]
--- a/templates/goodreads.com
+++ b/templates/goodreads.com
@ -0,0 +1 @@
+added ${linked_title} to his library
--- a/templates/googlecalendar.templ
+++ b/templates/googlecalendar.templ
@ -0,0 +1 @@
+is attending ${linked_title}
--- a/templates/imdb_ratings.templ
+++ b/templates/imdb_ratings.templ
@ -0,0 +1,4 @@
+<%!
+    import re
+%>
+rated ${linked_title} ${re.search(r'\d+', entry['summary']).group(0)}/10
--- a/templates/lastfm_loved.templ
+++ b/templates/lastfm_loved.templ
@ -0,0 +1 @@
+loves ${linked_title}
--- a/templates/lastfm_topfeed.templ
+++ b/templates/lastfm_topfeed.templ
@ -0,0 +1 @@
+has been listening mostly to ${entry['content'][0].value} lately according to [url=http://last.fm/][img]http://last.fm/favicon.ico[/img]Last.fm[/url]
--- a/templates/librarything_added.templ
+++ b/templates/librarything_added.templ
@ -0,0 +1 @@
+added ${linked_title} to his library
--- a/templates/summary.templ
+++ b/templates/summary.templ
@ -0,0 +1 @@
+${entry['summary']}
--- a/templates/youtube_published.templ
+++ b/templates/youtube_published.templ
@ -0,0 +1 @@
+published the video ${linked_title}
				`@ -0,0 +1 @@`
				`[url=${entry['link']}][img]${favicon}[/img]${' '.join(entry['title'].split()[1:])}[/url]`
				`@ -0,0 +1 @@`
				`has been listening mostly to ${entry['content'][0].value} lately according to [url=http://last.fm/][img]http://last.fm/favicon.ico[/img]Last.fm[/url]`