Trackback for Vellum

Tags: , , ,

Enough was enough, and I decided I needed at least outgoing trackback in Vellum. Grabbing tblib, I got outgoing trackback working in a few minutes. Given the relative ease, I also added incoming trackback support.

Here's the highlighted code for trackback.py, which you can put in your vellum plugins folder. Put tblib somewhere where it can be found.

import vellum.hooks,vellum.Entry,vellum.functions,vellum.config
import re,sys,os,time,bsddb,pickle
import urllib2,urlparse
from vellum.vellumObject import vellumObject
import tblib
import sgmllib
import StringIO,traceback
exit = sys.exit

__pluginname__ = "Trackback"
__description__ = """Implements the Trackback spec"""

class WordCountingParser(sgmllib.SGMLParser):
    words = 0
    def handle_data(self, data):
        self.words += len(data.strip().split(" "))

def description(e):
    wc = WordCountingParser()
    wc.feed(e.body)
    if e.extended:
        wc.feed(e.extended)
    ret = e.body
    ret += " (%d words)" % wc.words
    return ret

def trackback_send_tb(entry):
    link_re = re.compile('<a[^>]*href="(?P<href>[^"]+)"[^>]*>',re.I | re.S)

    text = entry.body
    if entry.extended: text += entry.extended

    # Parse all links out of the text
    links = link_re.findall(text)
    for link in links:
        try:
            absoluteLink = urlparse.urljoin(entry.permalink,link)
            tb = tblib.TrackBack()
            tb.autodiscover(absoluteLink)
            blog = vellum.Blog.get(entry.blogid)
            tb.blog_name = blog.title
            tb.title = entry.title
            tb.url = entry.permalink
            tb.excerpt = description(entry)
            tb.ping()

        except e:
            # Like the raven said, carrion regardless, and
            # don't throw an error for the user to see
            pass

# Run the send_pings function after rebuilding an entry

vellum.hooks.register_hook("entry-save-first", trackback_send_tb)

class Trackback(vellumObject):
    id = None
    url = ''
    title = ''
    excerpt = ''
    blog_name = ''
    entryid = None
    created = None

    def save(self):
        self.setMyID()
        if not self.created: self.created = time.time()
        vellumObject.save(self)

dbdir = vellum.config.get("DatabaseDir")
if not dbdir:
    raise "There is no DatabaseDir listed in the config file"

Trackback.db = bsddb.hashopen(os.path.join(dbdir,"trackback"),"c")

def all_trackbacks():
    return map(get_trackback,Trackback.db.keys())

def get_trackback(id):
    "Gets one individual trackback by ID"
    sid = str(id)
    if Trackback.db.has_key(sid):
        k,v = Trackback.db.set_location(sid)
        return pickle.loads(v)
    else:
        return None

def get_trackbacks(entry):
    "Gets all trackbacks for the passed entry"
    ep = filter(lambda x,i=entry.id:str(x.entryid)==str(i),all_trackbacks())
    ep.sort(lambda x,y:cmp(y.created,x.created))
    return ep

# Add the trackbacks() method to Entry
vellum.Entry.Entry.trackbacks = get_trackbacks

def trackbackServer(form):
    import cgi
    ping = cgi.FieldStorage()
    qsd = cgi.parse_qs(os.environ.get('QUERY_STRING', ''))
    #bid = qsd.get('bid', None)
    #if not bid:
    #    return tbError("You didn't say what resource you wanted to ping (bid).")
    eid = qsd.get('eid', None)
    if not eid:
        return tbError("You didn't say what resource you wanted to ping (eid).")

    eid = eid[0]

    try:
        tb = Trackback()
        tb.url = ping.getfirst('url')
        tb.title = ping.getfirst('title', tb.url)
        tb.excerpt = ping.getfirst('excerpt', "")
        tb.blog_name = ping.getfirst('blog_name', tb.url)
        tb.entryid = eid

        #import vellum.Blog
        import vellum.Entry
        #matchingEntry = None
        #blog = vellum.Blog.get(bid)
        matchingEntry = vellum.Entry.get(eid)

        if not matchingEntry:
            return tbError("Could not find entry: %s" % (eid))

        tb.save()
        matchingEntry.save()

        print """Content-type: text/xml

<?xml version="1.0"?>
<response>
    <error>0</error>
</response>"""


        exit(0)

    except SystemExit:
        exit(0)
    except:
        catcherr = StringIO.StringIO()
        traceback.print_exc(file=catcherr)
        errtext = catcherr.getvalue()
        return tbError(errtext)

def tbError(err):
    """Print the appropriate Trackback error response and exit."""

    from xml.sax.saxutils import escape
    print """Content-type: text/xml

<?xml version="1.0"?>
<response>
    <error>1</error>
    <message>%s</message>
</response>""" % (escape(err))
    exit(0)


vellum.functions.trackback = trackbackServer
# Don't need to be authenticated to call the pingback function
vellum.functions.trackback.noAuth = 1
# and it wants to parse stdin itself
vellum.functions.trackback.noStdinParsing = 1

There's bound to be a bug or two in there, but so far it seems to work fine. Oh, and you need my entry-save-first hook that only fires when the entry is first saved. Or something. While I'm here, let me test it by linking to another page that shows off how nice vellum's plugin system is. dubya-blog has a similar plugin setup - now to finish it...

2 old-style comments

  1. bitserfJanuary 12, 2005 at 11:43 AM.

    i suspect vellum.functions.trackback.noAuth will be set to 0 pretty soon :) i like the idea of trackback, but it needs something like greylisting to be effective. even then...some people i know have gotten 1000s of trackback spam messages in the span of a few hours... good luck :)
  2. Neil Blakey-MilnerJanuary 12, 2005 at 02:33 PM.

    I'm going to reinstitute the trackback verification soon - basically if will require that trackbacked pages actually contain the URL they claim in them. This obviously won't fix the situation entirely, but it should cut out a lot of problems, as well as not cut out valid uses (I think).
blog comments powered by Disqus