From 7cd6f33d411231eee3a67405147b356425cf8813 Mon Sep 17 00:00:00 2001 From: Stefan Parviainen Date: Sun, 16 Oct 2011 12:06:50 +0300 Subject: [PATCH] Initial --- INSTALL | 6 ++ README | 9 ++ RSStoFriendika.py | 146 +++++++++++++++++++++++++++++ feeds.sample | 2 + templates/activitystream.templ | 2 + templates/citeulike.templ | 3 + templates/delicious.templ | 1 + templates/generic_photo.templ | 2 + templates/generic_skipname.templ | 1 + templates/goodreads.com | 1 + templates/googlecalendar.templ | 1 + templates/imdb_ratings.templ | 4 + templates/lastfm_loved.templ | 1 + templates/lastfm_topfeed.templ | 1 + templates/librarything_added.templ | 1 + templates/summary.templ | 1 + templates/youtube_published.templ | 1 + 17 files changed, 183 insertions(+) create mode 100644 INSTALL create mode 100644 README create mode 100644 RSStoFriendika.py create mode 100644 feeds.sample create mode 100644 templates/activitystream.templ create mode 100644 templates/citeulike.templ create mode 100644 templates/delicious.templ create mode 100644 templates/generic_photo.templ create mode 100644 templates/generic_skipname.templ create mode 100644 templates/goodreads.com create mode 100644 templates/googlecalendar.templ create mode 100644 templates/imdb_ratings.templ create mode 100644 templates/lastfm_loved.templ create mode 100644 templates/lastfm_topfeed.templ create mode 100644 templates/librarything_added.templ create mode 100644 templates/summary.templ create mode 100644 templates/youtube_published.templ diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..5887f0d --- /dev/null +++ b/INSTALL @@ -0,0 +1,6 @@ +Requires: + Python 2.x (http://python.org/) + HTMLtoBBCode (https://bitbucket.org/saparvia/htmltobbcode) + Feedparser (http://code.google.com/p/feedparser/) + Mako (http://www.makotemplates.org/) + lxml (http://lxml.de/) diff --git a/README b/README new file mode 100644 index 0000000..e77cfaf --- /dev/null +++ b/README @@ -0,0 +1,9 @@ +RSStoFriendika is a program to post items from RSS feeds to Friendika. The items are formatted using templates before posting. + +Usage: +1) Create a file containing RSS URL and path to template on each line +e.g. +http://myblog.org/feed.rss templates/generic_summary.templ +http://myphotos.org/feed.rss templates/generic_photo.templ + +2) run RSStoFriendika.py diff --git a/RSStoFriendika.py b/RSStoFriendika.py new file mode 100644 index 0000000..42e300d --- /dev/null +++ b/RSStoFriendika.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python +# Application to post RSS updates to Friendika +import sys +import urllib +import urllib2 +import urlparse +import cPickle as pickle +import hashlib +import StringIO +import ConfigParser +import copy +import time + +# External libraries +import feedparser +import mako.template as mako +import lxml.etree as etree + +conf_path = 'RSStoFriendika.conf' + +def tweet(server, message): + url = server + '/api/statuses/update' + urllib2.urlopen(url, urllib.urlencode({'status': message})) + +htmltobbcode = etree.parse('htmltobbcode/HTMLtoBBCode.xslt') +htmlparser = etree.HTMLParser() +def makebbcode(html): + return unicode(etree.parse(StringIO.StringIO(html), htmlparser).xslt(htmltobbcode)) + +config = ConfigParser.SafeConfigParser() +config.read(conf_path) +config_changed = False +def getconfig(config, option, valid=''): + """Interactively read config values""" + global config_changed # Ugly! + try: + return config.get('main',option) + except (ConfigParser.NoSectionError, ConfigParser.NoOptionError): + config.set('main',option,raw_input('%s%s: '%(option.replace('_',' '), valid))) + config_changed = True + return config.get('main',option) + +# Read config +old_config = copy.copy(config) +server = getconfig(config,'server').rstrip('/') +username = getconfig(config,'username') +password = getconfig(config,'password') +feeds_path = getconfig(config,'feeds_file') +# A bit of trickery to store a bool. ConfigParser.readboolean is no good because it doesn't accept "y" and "n". +store_guids = config.set('main','always_store_guid', str(getconfig(config,'always_store_guid', ' (y/N)').lower().startswith('y'))) +# Handle defaults like this or a DEFAULT section is written to output config file +try: + guids_path = config.get('main','guids_file') +except: + guids_path = 'processed.dat' +try: + feeds_updated = config.getfloat('main','updated') +except: + feeds_updated = 0 + +if config_changed: + reply = raw_input('Save config? (y/N): ') + if reply.lower().startswith('y'): + with open(conf_path, 'w') as configfile: + config.write(configfile) + else: + config = old_config + +# Set up basic authentication +passman = urllib2.HTTPPasswordMgrWithDefaultRealm() +passman.add_password(None, server, username, password) +authhandler = urllib2.HTTPBasicAuthHandler(passman) +opener = urllib2.build_opener(authhandler) +urllib2.install_opener(opener) + +# Try to open "database" containing already processed GUIDs +# TODO: Something more efficient and robust +try: + with open(guids_path, 'r') as guids_file: + processed = pickle.load(guids_file) +except: + processed = set() + +# Iterate over feed list +for line in open(feeds_path): + if line.startswith('#'): # Skip comments + continue + + feed_url, template_path = line.strip().split() + feed = feedparser.parse(feed_url) + + for entry in feed['entries']: + try: + guid = entry['guid'] + except: + guid = hashlib.sha256(repr(entry)).hexdigest() + + try: + updated = time.mktime(entry['updated_parsed']) + except KeyError: + updated = None + + updated = False # Too many feeds lie to actually make this useful, so turn off date checking :-( + + if (updated and updated < feeds_updated) or guid in processed: + continue + + # See above about lying feeds + #if updated == None or config.getboolean('main','always_store_guid'): + + processed.add(guid) # Remember that we've processed this one + + # Convert from HTML to BBCode which Friendika understands + # Title is also processed to get rid of HTML entities + for key in ['title', 'summary']: + try: + entry[key] = makebbcode('%s'%entry[key]) + except KeyError: + pass + try: + for i, content in enumerate(entry.content): + content.value = makebbcode('%s'%content.value) + except AttributeError: + pass + + # TODO: Also check link rel="icon" etc. + try: + favicon = '://'.join(urlparse.urlparse(entry['link'])[0:2])+'/favicon.ico' + except KeyError: + favicon = '' + + try: + linked_title = '[url=%s][img=16x16]%s[/img]%s[/url]'%(entry['link'],favicon, entry['title']) + except KeyError: + linked_title = entry['title'] + + message = mako.Template(filename=template_path).render_unicode(entry=entry, favicon=favicon, linked_title=linked_title).encode('utf-8') + tweet(server, message) + +config.set('main','updated',str(time.time())) + +with open(conf_path, 'w') as configfile: + config.write(configfile) + +with open(guids_path, 'w') as guids_file: + pickle.dump(processed, guids_file) diff --git a/feeds.sample b/feeds.sample new file mode 100644 index 0000000..d1a1d47 --- /dev/null +++ b/feeds.sample @@ -0,0 +1,2 @@ +http://myblog.org/feed.rss templates/generic_summary.templ +http://myphotos.org/feed.rss templates/generic_photo.templ diff --git a/templates/activitystream.templ b/templates/activitystream.templ new file mode 100644 index 0000000..747e2f4 --- /dev/null +++ b/templates/activitystream.templ @@ -0,0 +1,2 @@ +## Grammar-nazis beware! +${entry['activity_verb'].split('/')[-1]}ed a ${entry['activity_object-type'].split('/')[-1]}: ${linked_title} diff --git a/templates/citeulike.templ b/templates/citeulike.templ new file mode 100644 index 0000000..be6f41a --- /dev/null +++ b/templates/citeulike.templ @@ -0,0 +1,3 @@ +cited the paper ${linked_title} + +${entry['summary']} diff --git a/templates/delicious.templ b/templates/delicious.templ new file mode 100644 index 0000000..51cef32 --- /dev/null +++ b/templates/delicious.templ @@ -0,0 +1 @@ +shared the link ${linked_title} diff --git a/templates/generic_photo.templ b/templates/generic_photo.templ new file mode 100644 index 0000000..a6da8db --- /dev/null +++ b/templates/generic_photo.templ @@ -0,0 +1,2 @@ +posted the photo [url=${entry['media_content'][0]['url']}][img]${favicon}[/img]${entry['title']}[/url] +[img]${entry['media_thumbnail'][0]['url']}[/img] diff --git a/templates/generic_skipname.templ b/templates/generic_skipname.templ new file mode 100644 index 0000000..330979d --- /dev/null +++ b/templates/generic_skipname.templ @@ -0,0 +1 @@ +[url=${entry['link']}][img]${favicon}[/img]${' '.join(entry['title'].split()[1:])}[/url] diff --git a/templates/goodreads.com b/templates/goodreads.com new file mode 100644 index 0000000..97c2d56 --- /dev/null +++ b/templates/goodreads.com @@ -0,0 +1 @@ +added ${linked_title} to his library diff --git a/templates/googlecalendar.templ b/templates/googlecalendar.templ new file mode 100644 index 0000000..6792a1f --- /dev/null +++ b/templates/googlecalendar.templ @@ -0,0 +1 @@ +is attending ${linked_title} diff --git a/templates/imdb_ratings.templ b/templates/imdb_ratings.templ new file mode 100644 index 0000000..73f4eff --- /dev/null +++ b/templates/imdb_ratings.templ @@ -0,0 +1,4 @@ +<%! + import re +%> +rated ${linked_title} ${re.search(r'\d+', entry['summary']).group(0)}/10 diff --git a/templates/lastfm_loved.templ b/templates/lastfm_loved.templ new file mode 100644 index 0000000..fe2d547 --- /dev/null +++ b/templates/lastfm_loved.templ @@ -0,0 +1 @@ +loves ${linked_title} diff --git a/templates/lastfm_topfeed.templ b/templates/lastfm_topfeed.templ new file mode 100644 index 0000000..856f9b5 --- /dev/null +++ b/templates/lastfm_topfeed.templ @@ -0,0 +1 @@ +has been listening mostly to ${entry['content'][0].value} lately according to [url=http://last.fm/][img]http://last.fm/favicon.ico[/img]Last.fm[/url] diff --git a/templates/librarything_added.templ b/templates/librarything_added.templ new file mode 100644 index 0000000..97c2d56 --- /dev/null +++ b/templates/librarything_added.templ @@ -0,0 +1 @@ +added ${linked_title} to his library diff --git a/templates/summary.templ b/templates/summary.templ new file mode 100644 index 0000000..0c07834 --- /dev/null +++ b/templates/summary.templ @@ -0,0 +1 @@ +${entry['summary']} diff --git a/templates/youtube_published.templ b/templates/youtube_published.templ new file mode 100644 index 0000000..670391c --- /dev/null +++ b/templates/youtube_published.templ @@ -0,0 +1 @@ +published the video ${linked_title}