About
Swaroop C H is 29 years of age. He is a coder and startupper. He has previously worked at Yahoo!, Adobe, his own startup and Infibeam.
Views
Support
Personal tools
COLLECTION
Collection
Basecamp to MediaWiki
From Notes
Ever wanted to convert your content in Basecamp to MediaWiki?
Here's what you do:
- Get your export zip file from Basecamp.
- Pass that zip file as an argument to the below code, it will output an XML file.
- Go to the Special:Import page in your MediaWiki instance and import the XML file that was outputted in the previous step.
#!/usr/bin/env python # Convert Basecamp export -> MediaWiki import. # # URL: http://www.swaroopch.com/notes/Basecamp_to_MediaWiki # License: http://sam.zoy.org/wtfpl/COPYING # Version: 0.1 # Last-Updated-On: Thu, 23 Oct 2008 12:24:15 +0530 # Tested-With: Python 2.5+ # Limitations # * WriteBoards are NOT present in the export, so we lose that content. # References # MediaWiki format : http://meta.wikimedia.org/wiki/Help:Export#Export_format # Processing XML : http://www.amk.ca/talks/2006-02-07/ import os import sys import re import zipfile import xml.etree.cElementTree as ElementTree import cStringIO as StringIO from pprint import pprint # Where is the Basecamp Exported zip file? if len(sys.argv) != 2 or not sys.argv[1].endswith('.zip'): print 'Please provide the location of the Basecamp Export Zip file.' sys.exit(1) zip_file_name = sys.argv[1] output_file_name = zip_file_name.replace('.zip', '_mediawiki.xml') # Read contents of the single XML file in the zip archive zip = zipfile.ZipFile(zip_file_name) assert len(zip.namelist()) == 1 xml_file_name = zip.namelist()[0] xml_data = zip.read(xml_file_name) tree = ElementTree.XML(xml_data) output_root = ElementTree.XML( '<?xml version="1.0" encoding="UTF-8"?><mediawiki xml:lang="en"></mediawiki>') # Process the XML tree basecamp_name = tree.findtext('name') ElementTree.SubElement(output_root, 'sitename').text = basecamp_name categories = [category.text for category in tree.findall('default-post-categories/category')] categories.sort() def format_content(text): '''Given text in Textile syntax, convert to MediaWiki syntax.''' if text is None: return '' text = re.sub(r'"([^"]+)":([^\s]+)', r'[\2 \1]', text) # external link text = re.sub(r"(?:\*\*|__)(\w+)(?:\*\*|__)", r"'''\1'''", text) # bold text = re.sub(r"(?:\*|_)(\w+)(?:\*|_)", r"''\1''", text) # italics # TODO Convert links to Writeboards into internal links on this wiki page return text for project in tree.findall('projects/project'): project_name = project.findtext('name') project_last_updated = project.findtext('last-changed-on') project_page = ElementTree.SubElement(output_root, 'page') ElementTree.SubElement(project_page, 'title').text = project_name content_tree = ElementTree.SubElement(project_page, 'revision') ElementTree.SubElement(content_tree, 'timestamp').text = project_last_updated ElementTree.SubElement(content_tree, 'contributor').text = '' ElementTree.SubElement(content_tree, 'comment').text = '' content = StringIO.StringIO() print >>content, '== Announcements ==' print >>content, format_content(project.findtext('announcement')) print >>content, '== Milestones ==' for milestone in project.findall('milestones/milestone'): completed_on = milestone.findtext('completed-on') completed_on = ', completed on ' + completed_on[:10] if completed_on is not None else '' print >>content, '* %s, due on %s%s' \ % (milestone.findtext('title'), milestone.findtext('deadline'), completed_on) print >>content, '== Todo Lists ==' for todo_list in project.findall('todo-lists/todo-list'): todo_list_name = todo_list.findtext('name') print >>content, '=== %s ===' % todo_list_name for todo_item in todo_list.findall('todo-items/todo-item'): completed = ' (completed)' if todo_item.findtext('completed') == 'true' else '' print >>content, '* %s%s' \ % (format_content(todo_item.findtext('content')), completed) print >>content, '\n[[Category:%s]]' % basecamp_name ElementTree.SubElement(content_tree, 'text').text = content.getvalue() content.close() # Write to file output_tree = ElementTree.ElementTree(output_root) #ElementTree.dump(output_tree) output_tree.write(output_file_name) print 'Success! MediaWiki Import XML saved at %s' % output_file_name
Please add your comments by clicking on the 'Discussion' link in the left sidebar.