#!/usr/bin/env python2 import sys sys.setrecursionlimit(10000) import pprint import pdf_contents import page_renderers import symbols import pygtk pygtk.require("2.0") import gtk import pdfs import pdf_fonts import decompressors try: import io except ImportError: import StringIO as io def process_page(page, renderer): ToUnicode = None resources = page.Resources UserUnit = page.get(symbols.UserUnit) or 1.0 # in units of 1/72 inch. CropBox = page.get(symbols.CropBox) or page[symbols.MediaBox] print("CropBox", CropBox) fonts = dict(map(lambda source: (source[0], pdf_fonts.Font(source[1])), (resources.get(symbols.Font) or {}).items())) XObjects = resources.get(symbols.XObject) or {} font_size = 1 font = None renderer.set_user_unit(UserUnit) # ExtGState = resources.get(symbols.ExtGState) # MEdiaBox = resources.get(symbols.MediaBox) # ColorSpace = resources.get(symbols.ColorSpace) contents = page.Contents if not isinstance(contents, list): contents = [contents] content_parser = pdf_contents.Parser() contents = content_parser.parse(map(lambda o: o.stream, contents)) for operator_, operands in contents: #print("OP", operator_) if operator_ == symbols.BT: renderer.add_operation(operator_, []) elif operator_ == symbols.intern(b"'") : renderer.add_operation(symbols.Tstar, []) renderer.add_operation(symbols.Tj, operands, (font, font_size)) elif operator_ == symbols.intern(b"\""): renderer.add_operation(symbols.Tw, [operands[0]]) renderer.add_operation(symbols.Tc, [operands[1]]) renderer.add_operation(symbols.Tstar, []) renderer.add_operation(symbols.Tj, [operands[2]], (font, font_size)) elif operator_ == symbols.TD: renderer.add_operation(symbols.TL, [-operands[1]]) renderer.add_operation(symbols.Td, operands) elif operator_ == symbols.Do: assert(len(operands) == 1) XO_name = operands[0] XO = XObjects.get(XO_name) buf = io.BytesIO() decompressors.copy_stream(XO.stream, buf) renderer.add_operation(symbols.Do, [XO_name], buf.getvalue()) elif operator_ == symbols.Tf: font_name = operands[0] font_size = operands[1] if len(operands) > 1 else 1 font = fonts[font_name] elif operator_ in [symbols.Tj, symbols.TJ]: renderer.add_operation(operator_, operands, (font, font_size)) else: renderer.add_operation(operator_, operands) #pprint.pprint(XObjects) #pprint.pprint(contents) if __name__ == "__main__": input_file = open(sys.argv[1], "rb") parser = pdfs.Parser() PDF = parser.parse(input_file) Pages = PDF.Root.Pages Page_Count = Pages.Count pages = list(pdfs.linearize_pages(Pages)) Kids = Pages.Kids assert(Page_Count == len(pages)) #pprint.pprint(pages[0]) window, drawing_area, renderer = page_renderers.create_default_window() process_page(pages[5], renderer) drawing_area.connect("expose-event", renderer.render) window.connect("delete-event", gtk.main_quit) gtk.main()