We decided to move from Atlassian's Confluence wiki software to MediaWiki, in the hopes that a more familiar wiki system will encourage participation. To start, I exported from Confluence, got a zip file with entities.xml in it. Attached is a script to create a text file per page.

#!/usr/bin/env python

from cElementTree import iterparse
from cStringIO import StringIO
import codecs

for event, elem in iterparse(file("entities.xml")):
    if elem.tag == "object" and elem.get('class') == 'Page':
        save = True
        title = None
        content = None
        children = elem.getchildren()
        id = elem.find('id')
        for child in children:
            if child.tag == "property" and child.get('name') == "title":
                title = child.text

            if child.tag == "property" and child.get('name') == "content":
                content = child.text

            if child.tag == "property" and child.get('name') == "originalVersion":
                save = False
                orig_id = child.getchildren()[0]

        if not save:
            continue

        print "Will save page with title '%s'" % (title,)

        if not content:
            print "... but has no contents"
            continue

        f = codecs.open('pages/%s' % (title,), 'w', 'utf-8')
        f.write(content)