""" Convert mediawiki revisions into git commits in a new repository. """ import os import lxml.etree # switch into the git repository os.chdir("./git-repo") revision_dir = "../dumpspot/revisions/" # get a list of all revision files revision_filenames = os.listdir(revision_dir) # ascending order revision_filenames = sorted(revision_filenames) # use absolute paths revision_filenames = [os.path.join(revision_dir, path) for path in revision_filenames] # TODO: parse the giant xml file again and this time keep track of which # filename or page name that each revision is associated with. Save this # association in another file, probably json. Next load that file here. for revision_filename in revision_filenames: tree = lxml.etree.parse(revision_filename)