"""This module demonstates an entity resolver that makes it possible to process XML files stored in zip archives. File types other than zip archives are handled normally. Zip files must end in a '.zip' extension. For archives containing more than one file, the exact file in the zip archive must be specified. This is done by appending a '?' and then the name of the file e.g. This example would be better if the Python ZipFile object presented a real stream-oriented interface. """ import Pyana import sys from StringIO import StringIO from urlparse import urlparse from urllib import urlopen from zipfile import ZipFile from re import match class StringSource: def __init__(self, string): self.string = string def makeStream(self): return StringIO(self.string) class ZipEntityResolver: def __init__(self, reportExceptions = 1): # Failed document() calls are not reported by Xalan, # so we'll use reportExceptions for debugging # purposes self.reportExceptions = reportExceptions def resolveEntity(self, public, system): try: # Xalan presents file URIs as file:///C:/file.txt # Python expects file URIs as file:///C|/file.txt filematch = match('file:///(\w):(.*)', system) if filematch: system = 'file:///%s|%s' % ( filematch.group(1), filematch.group(2) ) # Lot of code duplication here. If I weren't the laziest # person in the world, I'd fix that. if system.lower().endswith('.zip'): zipRead = StringIO(urlopen(system).read()) zipfile = ZipFile(zipRead) if len(zipfile.namelist()) != 1: raise ValueError('Zip archive must contain a single '\ 'file or the query notation must '\ 'be used.') else: return StringSource( zipfile.read(zipfile.namelist()[0]) ) else: ziploc = system.lower().find('.zip?') if ziploc != -1: zipRead = StringIO(urlopen(system[:ziploc + 4]).read()) zipfile = ZipFile(zipRead) return StringSource( zipfile.read(system[ziploc + 5:]) ) else: return None except: if self.reportExceptions: import traceback traceback.print_exc() raise def main(xml, xsl): t = Pyana.Transformer() resolver = ZipEntityResolver() xmlSource = resolver.resolveEntity('', xml) or Pyana.URI(xml) xslSource = resolver.resolveEntity('', xsl) or Pyana.URI(xsl) t.setEntityResolver(resolver) print t.transform2String(xmlSource, xslSource) if __name__ == '__main__': main(sys.argv[1], sys.argv[2])