kopia lustrzana https://github.com/pafcu/RSStoFriendika
Initial
commit
7cd6f33d41
|
@ -0,0 +1,6 @@
|
|||
Requires:
|
||||
Python 2.x (http://python.org/)
|
||||
HTMLtoBBCode (https://bitbucket.org/saparvia/htmltobbcode)
|
||||
Feedparser (http://code.google.com/p/feedparser/)
|
||||
Mako (http://www.makotemplates.org/)
|
||||
lxml (http://lxml.de/)
|
|
@ -0,0 +1,9 @@
|
|||
RSStoFriendika is a program to post items from RSS feeds to Friendika. The items are formatted using templates before posting.
|
||||
|
||||
Usage:
|
||||
1) Create a file containing RSS URL and path to template on each line
|
||||
e.g.
|
||||
http://myblog.org/feed.rss templates/generic_summary.templ
|
||||
http://myphotos.org/feed.rss templates/generic_photo.templ
|
||||
|
||||
2) run RSStoFriendika.py
|
|
@ -0,0 +1,146 @@
|
|||
#!/usr/bin/env python
|
||||
# Application to post RSS updates to Friendika
|
||||
import sys
|
||||
import urllib
|
||||
import urllib2
|
||||
import urlparse
|
||||
import cPickle as pickle
|
||||
import hashlib
|
||||
import StringIO
|
||||
import ConfigParser
|
||||
import copy
|
||||
import time
|
||||
|
||||
# External libraries
|
||||
import feedparser
|
||||
import mako.template as mako
|
||||
import lxml.etree as etree
|
||||
|
||||
conf_path = 'RSStoFriendika.conf'
|
||||
|
||||
def tweet(server, message):
|
||||
url = server + '/api/statuses/update'
|
||||
urllib2.urlopen(url, urllib.urlencode({'status': message}))
|
||||
|
||||
htmltobbcode = etree.parse('htmltobbcode/HTMLtoBBCode.xslt')
|
||||
htmlparser = etree.HTMLParser()
|
||||
def makebbcode(html):
|
||||
return unicode(etree.parse(StringIO.StringIO(html), htmlparser).xslt(htmltobbcode))
|
||||
|
||||
config = ConfigParser.SafeConfigParser()
|
||||
config.read(conf_path)
|
||||
config_changed = False
|
||||
def getconfig(config, option, valid=''):
|
||||
"""Interactively read config values"""
|
||||
global config_changed # Ugly!
|
||||
try:
|
||||
return config.get('main',option)
|
||||
except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
|
||||
config.set('main',option,raw_input('%s%s: '%(option.replace('_',' '), valid)))
|
||||
config_changed = True
|
||||
return config.get('main',option)
|
||||
|
||||
# Read config
|
||||
old_config = copy.copy(config)
|
||||
server = getconfig(config,'server').rstrip('/')
|
||||
username = getconfig(config,'username')
|
||||
password = getconfig(config,'password')
|
||||
feeds_path = getconfig(config,'feeds_file')
|
||||
# A bit of trickery to store a bool. ConfigParser.readboolean is no good because it doesn't accept "y" and "n".
|
||||
store_guids = config.set('main','always_store_guid', str(getconfig(config,'always_store_guid', ' (y/N)').lower().startswith('y')))
|
||||
# Handle defaults like this or a DEFAULT section is written to output config file
|
||||
try:
|
||||
guids_path = config.get('main','guids_file')
|
||||
except:
|
||||
guids_path = 'processed.dat'
|
||||
try:
|
||||
feeds_updated = config.getfloat('main','updated')
|
||||
except:
|
||||
feeds_updated = 0
|
||||
|
||||
if config_changed:
|
||||
reply = raw_input('Save config? (y/N): ')
|
||||
if reply.lower().startswith('y'):
|
||||
with open(conf_path, 'w') as configfile:
|
||||
config.write(configfile)
|
||||
else:
|
||||
config = old_config
|
||||
|
||||
# Set up basic authentication
|
||||
passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
|
||||
passman.add_password(None, server, username, password)
|
||||
authhandler = urllib2.HTTPBasicAuthHandler(passman)
|
||||
opener = urllib2.build_opener(authhandler)
|
||||
urllib2.install_opener(opener)
|
||||
|
||||
# Try to open "database" containing already processed GUIDs
|
||||
# TODO: Something more efficient and robust
|
||||
try:
|
||||
with open(guids_path, 'r') as guids_file:
|
||||
processed = pickle.load(guids_file)
|
||||
except:
|
||||
processed = set()
|
||||
|
||||
# Iterate over feed list
|
||||
for line in open(feeds_path):
|
||||
if line.startswith('#'): # Skip comments
|
||||
continue
|
||||
|
||||
feed_url, template_path = line.strip().split()
|
||||
feed = feedparser.parse(feed_url)
|
||||
|
||||
for entry in feed['entries']:
|
||||
try:
|
||||
guid = entry['guid']
|
||||
except:
|
||||
guid = hashlib.sha256(repr(entry)).hexdigest()
|
||||
|
||||
try:
|
||||
updated = time.mktime(entry['updated_parsed'])
|
||||
except KeyError:
|
||||
updated = None
|
||||
|
||||
updated = False # Too many feeds lie to actually make this useful, so turn off date checking :-(
|
||||
|
||||
if (updated and updated < feeds_updated) or guid in processed:
|
||||
continue
|
||||
|
||||
# See above about lying feeds
|
||||
#if updated == None or config.getboolean('main','always_store_guid'):
|
||||
|
||||
processed.add(guid) # Remember that we've processed this one
|
||||
|
||||
# Convert from HTML to BBCode which Friendika understands
|
||||
# Title is also processed to get rid of HTML entities
|
||||
for key in ['title', 'summary']:
|
||||
try:
|
||||
entry[key] = makebbcode('<html>%s</html>'%entry[key])
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
for i, content in enumerate(entry.content):
|
||||
content.value = makebbcode('<html>%s</html>'%content.value)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
# TODO: Also check link rel="icon" etc.
|
||||
try:
|
||||
favicon = '://'.join(urlparse.urlparse(entry['link'])[0:2])+'/favicon.ico'
|
||||
except KeyError:
|
||||
favicon = ''
|
||||
|
||||
try:
|
||||
linked_title = '[url=%s][img=16x16]%s[/img]%s[/url]'%(entry['link'],favicon, entry['title'])
|
||||
except KeyError:
|
||||
linked_title = entry['title']
|
||||
|
||||
message = mako.Template(filename=template_path).render_unicode(entry=entry, favicon=favicon, linked_title=linked_title).encode('utf-8')
|
||||
tweet(server, message)
|
||||
|
||||
config.set('main','updated',str(time.time()))
|
||||
|
||||
with open(conf_path, 'w') as configfile:
|
||||
config.write(configfile)
|
||||
|
||||
with open(guids_path, 'w') as guids_file:
|
||||
pickle.dump(processed, guids_file)
|
|
@ -0,0 +1,2 @@
|
|||
http://myblog.org/feed.rss templates/generic_summary.templ
|
||||
http://myphotos.org/feed.rss templates/generic_photo.templ
|
|
@ -0,0 +1,2 @@
|
|||
## Grammar-nazis beware!
|
||||
${entry['activity_verb'].split('/')[-1]}ed a ${entry['activity_object-type'].split('/')[-1]}: ${linked_title}
|
|
@ -0,0 +1,3 @@
|
|||
cited the paper ${linked_title}
|
||||
|
||||
${entry['summary']}
|
|
@ -0,0 +1 @@
|
|||
shared the link ${linked_title}
|
|
@ -0,0 +1,2 @@
|
|||
posted the photo [url=${entry['media_content'][0]['url']}][img]${favicon}[/img]${entry['title']}[/url]
|
||||
[img]${entry['media_thumbnail'][0]['url']}[/img]
|
|
@ -0,0 +1 @@
|
|||
[url=${entry['link']}][img]${favicon}[/img]${' '.join(entry['title'].split()[1:])}[/url]
|
|
@ -0,0 +1 @@
|
|||
added ${linked_title} to his library
|
|
@ -0,0 +1 @@
|
|||
is attending ${linked_title}
|
|
@ -0,0 +1,4 @@
|
|||
<%!
|
||||
import re
|
||||
%>
|
||||
rated ${linked_title} ${re.search(r'\d+', entry['summary']).group(0)}/10
|
|
@ -0,0 +1 @@
|
|||
loves ${linked_title}
|
|
@ -0,0 +1 @@
|
|||
has been listening mostly to ${entry['content'][0].value} lately according to [url=http://last.fm/][img]http://last.fm/favicon.ico[/img]Last.fm[/url]
|
|
@ -0,0 +1 @@
|
|||
added ${linked_title} to his library
|
|
@ -0,0 +1 @@
|
|||
${entry['summary']}
|
|
@ -0,0 +1 @@
|
|||
published the video ${linked_title}
|
Ładowanie…
Reference in New Issue