#!/usr/bin/env python # -*- coding: utf-8 -*- import fuse import errno import fuse import symbols import stat import time import sys import pdfs import os import decompressors PDF_name = sys.argv[1] PDF_input_file = open(PDF_name, "rb") PDF_stat = os.fstat(PDF_input_file.fileno()) parser = pdfs.Parser() PDF = parser.parse(PDF_input_file) assert(isinstance(PDF, dict)) #Pages = PDF.Root.Pages #Page_Count = Pages.Count #pages = list(pdfs.linearize_pages(Pages)) #Kids = Pages.Kids #assert(Page_Count == len(pages)) ##pprint.pprint(pages[0]) #for i in range(Page_Count): # process_page(pages[i]) fuse.fuse_python_api = (0, 2) def access_PDF(path): print(path) node = PDF for part in path.split("/"): if part == "": continue if isinstance(node, list): v = int(part) assert("%05d" % (v,) == part) node = node[v] else: try: node = getattr(node, part) # node[symbols.intern("/%s" % (part, ))] except AttributeError: node = node[symbols.intern("/%s" % (part, ))] if node is None: raise KeyError("file not found") return node def wrap_into_file(obj): if isinstance(obj, int) or isinstance(obj, float): return(decompressors.StringDecoder(str(obj))) elif isinstance(obj, symbols.Symbol): return(decompressors.StringDecoder(obj.name[1:])) # skips slash elif isinstance(obj, str): return(decompressors.StringDecoder(obj)) else: return(obj) def file_P(obj): print(obj.__class__) #return(isinstance(obj, int) or isinstance(obj, float) or isinstance(obj, symbols.Symbol) or hasattr(obj, "read")) return (not (isinstance(obj, list) or isinstance(obj, dict))) def get_file_size(f): try: f.seek(0, 2) result = f.tell() except ValueError: # seek not supported l = 0 while True: s = f.read(8192) if s == "": # EOF break l += len(s) result = l f.seek(0, 0) return(result) MAX = 1000000000000000 file_cache = {} class File(object): def __init__(self, path, flags, *mode): self.PDF_obj = access_PDF(path) if path in file_cache: self.file = file_cache[path] else: self.file = wrap_into_file(self.PDF_obj) self.file = decompressors.StringDecoder(self.file.read(MAX)) file_cache[path] = self.file self.size = get_file_size(self.file) print("=> Sz, ", self.size) def read(self, length, offset): print("READ", self.file, length, offset) self.file.seek(offset) result = self.file.read(length) # print("=> %s" % (result,)) return(result) def fgetattr(self): st = fuse.Stat() st.st_mode = stat.S_IFREG | 0755 st.st_nlink = 1 # FIXME this CAN be much more st.st_atime = PDF_stat.st_atime st.st_mtime = PDF_stat.st_mtime st.st_ctime = PDF_stat.st_ctime st.st_size = self.size if hasattr(self.PDF_obj, "object_ID"): object_ID = getattr(self.PDF_obj, "object_ID") st.st_ino = object_ID return(st) #def getxattr(self, path, name, size): # print("GETXATTR", ) #def listxattr(self, path, size): # aa = ["user.%s" % (a, ) for a in ["objID"]] # if size == 0: # We are asked for the size # return(len("".join(aa)) + len(aa)) # else: # return aa def release(self, flags): pass pass class FS(fuse.Fuse): def __init__(self, *args, **kw): fuse.Fuse.__init__(self, *args, **kw) self.file_class = File def getattr(self, path): try: PDF_obj = access_PDF(path) except KeyError: return (-errno.ENOENT) if file_P(PDF_obj): return File(path, 0).fgetattr() st = fuse.Stat() ft = stat.S_IFREG if file_P(PDF_obj) else stat.S_IFDIR st.st_mode = ft | 0755 st.st_nlink = 2 st.st_atime = PDF_stat.st_atime st.st_mtime = PDF_stat.st_mtime st.st_ctime = PDF_stat.st_ctime if hasattr(PDF_obj, "object_ID"): object_ID = getattr(PDF_obj, "object_ID") print("YES", object_ID) if object_ID: st.st_ino = object_ID #if path == '/': # pass #else: # return (-errno.ENOENT) return st def readdir(self, path, offset): PDF_obj = access_PDF(path) print PDF_obj.__class__ if isinstance(PDF_obj, list): PDF_obj = dict([("%05d" % (k, ), v) for k, v in enumerate(PDF_obj)]) assert(isinstance(PDF_obj, dict)) # or hasattr(PDF_obj, "read")) if isinstance(PDF_obj, pdfs.Object) and PDF_obj.stream: l = ["stream"] else: l = [] for s in [".", ".."] + PDF_obj.keys() + l: if isinstance(s, symbols.Symbol): s = s.name if s.startswith("/"): s = s[1:] yield fuse.Direntry(s) #def open(self, path, flags): # access_flags = os.O_RDONLY | os.O_WRONLY | os.O_RDWR # if flags & access_flags != os.O_RDONLY: # return -errno.EACCES # PDF_obj = access_PDF(path) # return PDF_obj if __name__ == '__main__': fs = FS() fs.parse(errex=1) fs.main()