{PDF parser. Copyright (C) 2008 Danny Milosavljevic This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA } {$MODE OBJFPC} uses classes, PDFs, variants, sysutils, PDF_content_parsers, PDF_parsers, PDF_ToUnicode_map_parsers, PDF_ToUnicode_maps, PDF_contents, windowed_streams, unix, strings, baseunix, PDF_fonts; function Spaces(aCount : Integer) : String; begin Result := ''; Result := Result + ' '; // #9#9#9#9#9#9#9#9#9#9#9#9; // ' '; SetLength(Result, aCount * 2); end; procedure Traverse(const aObject : PDFs.IPDFObject; aLevel : Integer; aKey : TSymbolTableKey = ''); forward; procedure TraversePages(const aPages : PDFs.IPages; aLevel : Integer); var vCount : String; vKids : Variant; vIndex : Integer; begin Writeln(Spaces(aLevel) + 'Pages:'); vCount := aPages.MetadataGet('/Count'); Writeln(Spaces(aLevel + 1) + vCount); vKids := aPages.MetadataGet('/Kids'); if VarArrayDimCount(vKids) > 0 then for vIndex := VarArrayLowBound(vKids, 1) to VarArrayHighBound(vKids, 1) do begin Traverse(vKids[vIndex], aLevel + 2); end; end; procedure DebugStream(aStream : TStream); var vItem : Byte; begin Writeln('BEGIN CODE'); while True do begin vItem := aStream.ReadByte(); Write(chr(vItem)); end; Writeln('END CODE'); end; function CopyFrom(Source: TStream; Destination : TStream): Int64; var i : Int64; readCount : Int64; writtenCount : Int64; buffer : array[0..1023] of byte; begin CopyFrom := 0; repeat i := sizeof(Buffer); readCount := Source.Read(buffer,i); if readCount = 0 then Break; writtenCount := Destination.Write(buffer, readCount); if writtenCount <> readCount then raise EWriteError.Create('error'); CopyFrom := CopyFrom+i; until readCount = 0; end; procedure DumpAttributeNames(const aAttributes : Variant); var vAttributes : IAttributes; vAttributeIndex : Integer; begin vAttributes := InterfaceFromVariant(aAttributes) as IAttributes; assert(Assigned(vAttributes)); writeln('==S=='); if vAttributes.ItemCount > 0 then for vAttributeIndex := 0 to vAttributes.ItemCount - 1 do begin Write(vAttributeIndex, ': '); Writeln(vAttributes.GetItemKey(vAttributeIndex)); end; writeln('==E=='); end; var Number : Cardinal = 0; function GetFontToUnicode(const aContents : PDFs.IToUnicode; aLevel : Integer) : PDF_ToUnicode_maps.IMap; var vOutputFile : TStream; vParser : PDF_ToUnicode_map_parsers.TParser; vAttributes : IAttributes; vValue : Variant; begin { Writeln(Spaces(aLevel), 'ToUnicode:'); vOutputFile := TFileStream.Create('decoded/unicode/' + IntToStr(Number), fmCreate); Inc(Number); CopyFrom(aContents.Stream, vOutputFile); FreeAndNil(vOutputFile); Result := nil; } vAttributes := InterfaceFromVariant(aContents.MetaData) as IAttributes; // TODO cache. if Assigned(vAttributes) then begin vValue := vAttributes.Get('$UnicodeMap'); if not VarIsEmpty(vValue) then begin Result := InterfaceFromVariant(vValue) as PDF_ToUnicode_maps.IMap; Exit; end; end; // FIXME else create attributes? vParser := PDF_ToUnicode_map_parsers.TParser.Create(aContents.Stream, False, nil); try Result := vParser.Parse(); Result.ID := aContents.GetID(); //Writeln(Format('^^ ID WAS %d ^^', [Result.ID])); vAttributes.Add('$UnicodeMap', IInterface(Result)); except on E : EReadError do begin vParser.Error('', ''); end; end; FreeAndNil(vParser); end; function Str(const aValue : Variant) : String; var vIndex : Cardinal; begin if VarIsStr(aValue) then begin Result := QuotedStr(aValue) end else if VarIsArray(aValue) then begin Result := '['; for vIndex := VarArrayLowBound(aValue, 1) to VarArrayHighBound(aValue, 1) do begin Result := Result + Str(aValue[vIndex]); Result := Result + ' '; end; Result := Result + ']'; end else begin Result := aValue; end; end; procedure DumpToFile(const aObject : PDFs.IPDFObject {or IContents?}; aName : TFilename); var vOutputFile : TStream; begin try vOutputFile := TFileStream.Create(aName, fmCreate); CopyFrom(aObject.Stream, vOutputFile); FreeAndNil(vOutputFile); except Writeln('whoops'); end; end; procedure TraverseResources(const aResources : PDFs.IAttributes; aLevel : Integer); forward; procedure TraverseContents(const aContents : Variant; aLevel : Integer; const aResources : PDFs.IAttributes); forward; function ExtractXObject(const aResources : PDFs.IAttributes; const aObject : PDFs.IPDFObject; aName : string) : ANSIString; var vAttributes : IAttributes; vIndex : Cardinal; vWidth : Cardinal; vHeight : Cardinal; vSubtype : String; vResources : PDFs.IAttributes; begin assert(Assigned(aObject)); assert(Copy(aName, 1, 1) = '/'); Result := ''; vAttributes := InterfaceFromVariant(aObject.MetaData) as IAttributes; vSubtype := vAttributes.Get('/Subtype'); if vSubtype = '/Form' then begin Result := ''; vResources := InterfaceFromVariant(Dereference(vAttributes.Get('/Resources'))) as PDFs.IAttributes; TraverseResources(vResources, 1); // TODO TraverseMediaBox(vAttributes.MetadataGet('/BBox'), aLevel + 1); TraverseContents(aObject, 1, vResources); Writeln('
'); Exit; end else if vSubtype <> '/Image' then begin Result := Result + Format('(skipping non-image %s (actual subtype was "%s"))', [aName, vSubtype]); end else begin vWidth := Dereference(vAttributes.Get('/Width')); vHeight := Dereference(vAttributes.Get('/Height')); aName := Copy(aName, 2, Length(aName)); Result := Result + Format('%s', [aName, vWidth, vHeight, aName]); end; DumpToFile(aObject, 'decoded/XObject/' + aName); end; // FIXME this isn't nice: var fFontUnicodeTables : ISymbolTable; function UnicharToUTF8(aCodepoint : TUnicodeCharacter) : UTF8String; inline; var fString : WideString; begin SetLength(fString, 1); fString[1] := WideChar(aCodepoint); Result := UTF8Encode(fString); end; function ToUTF8String(const aValue : ANSIString; const aToUnicodeTable : PDF_ToUnicode_maps.IMap) : UTF8String; var fValue : PChar; fValueSize : Cardinal; fPreviousValueSize : Cardinal; fCodepoint : TUnicodeCharacter; begin if aToUnicodeTable = nil then begin Result := 'XXX' + aValue + 'XXX'; Exit; end; Result := ''; // '(' + aValue + ')'; fValueSize := Length(aValue); fValue := PChar(aValue); repeat fPreviousValueSize := fValueSize; if fValueSize = 0 then Break; fCodepoint := aToUnicodeTable.NextUnicodeCodepoint(fValue, fValueSize); if (fCodepoint = cNilCodepoint) or (fPreviousValueSize = fValueSize) then Break; Result := Result + UnicharToUTF8(fCodepoint); until fCodepoint = cNilCodepoint; end; function DebugTable(const aToUnicodeTable : PDF_ToUnicode_maps.IMap) : ANSIString; var fString : array[0..1] of Char; fPString : PChar; fValue : Cardinal; fValueSize : Cardinal; begin fString[0] := #1; fString[1] := #0; fPString := @fString[0]; fValueSize := 1; Result := ''; if aToUnicodeTable <> nil then begin Result := Result + IntToStr(aToUnicodeTable.ID); // + '/'; //fValue := aToUnicodeTable.NextUnicodeCodepoint(fPString, fValueSize); //Result := Result + IntToStr(fValue); //aToUnicodeTable.Dump(); end; end; procedure PrintAttributes(const aAttributes : PDFs.IAttributes; aLevel : Integer); forward; function ExtractContentUsingFont(const aFontMeta : PDFs.IFont; const aResources : PDFs.IAttributes; const aInstruction : PDF_contents.IInstruction; aFont : TSymbolTableKey) : ANSIString; var vArgument : Variant; vIndex : Integer; fFontName : TFileName; fFontFile : PDFs.IFontFile; fEncodingRaw : Variant; fFontCharset : PDF_fonts.ICharset; fFontDescriptor : PDFs.IFontDescriptor; vToUnicodeTable : PDF_ToUnicode_maps.IMap; function ToCharset(const text : ANSIString) : ANSIString; var i : Integer; n : String; begin Result := ''; for i := 1 to Length(text) do begin n := fFontCharset.GetCharName(Ord(text[i])); if n = '' then begin Writeln(StdErr, Format('unknown char %d', [Ord(text[i])])); n := '/unknown'; end; Result := Result + n; // includes slash end; end; // FIXME do that without external program function Decode(text : ANSIString) : ANSIString; var PP : PPchar; a : ANSIString; b : ANSIString; c : ANSIString; status:cint; begin GetMem (PP,5*SizeOf(Pchar)); c := IntToStr(Number); if fpFork() = 0 then begin a := '/home/dannym/source/playground/trunk/PDF/doc/CFF/to_SVG'; PP[0]:=PChar(a); b := fFontName; PP[1]:=PChar(b); PP[2]:=PChar(text); PP[3]:=PChar(c); PP[4]:=nil; { Execute '/bin/ls -l', with current environment } fpExecv (PP[0], PP); Halt(1); end else begin fpWait(status); Result := Format('', [fFontName, Number]); Inc(Number); end; end; begin vToUnicodeTable := fFontUnicodeTables.Get(aFont); fFontName := Copy(aFont, 2, Length(aFont)); // without '/' fEncodingRaw := Dereference(aFontMeta.MetaDataGet('/Encoding')); fFontCharset := PDF_fonts.TCharset.ParseEncoding(fEncodingRaw); if not FileExists(fFontName) then begin fFontDescriptor := InterfaceFromVariant(aFontMeta.MetaDataGet('/FontDescriptor')) as PDFs.IFontDescriptor; fFontFile := InterfaceFromVariant(fFontDescriptor.MetaDataGet('/FontFile3')) as PDFs.IFontFile; if fFontFile = nil then fFontFile := InterfaceFromVariant(fFontDescriptor.MetaDataGet('/FontFile')) as PDFs.IFontFile; DumpToFile(fFontFile, fFontName); end; Result := ''; with aInstruction do begin if Operator_ in [ opShowString {Tj} {non-array}, opNextLineShowString {'}, opNextLineSpacedShowString {"}, //opSetWordSpacing {Tw}, //opSetCharacterSpacing {Tc}, //opMoveCaret {Td}, opShowStringWithVariableSpacing {TJ}] then begin vArgument := GetArgument(0); if VarIsArray(vArgument) then begin for vIndex := VarArrayLowBound(vArgument, 1) to VarArrayHighBound(vArgument, 1) do begin if VarIsStr(vArgument[vIndex]) then begin // esp. not number Result := Result + Decode(ToCharset(ANSIString(vArgument[vIndex]))); //Result := Result + ' '; end else if VarIsNumeric(vArgument[vIndex]) then begin if vArgument[vIndex] < -100 then Result := Result + ' '; end; end; //Result := Copy(Result, 1, Length(Result) - 1); end else Result := Result + Decode(ToCharset(ANSIString(vArgument))); if Operator_ in [opNextLineShowString, opNextLineSpacedShowString] then begin Result := Result + Chr(10); end; end; end; end; function ExtractUsefulContent(const aResources : PDFs.IAttributes; const aInstruction : PDF_contents.IInstruction; var aFont : TSymbolTableKey) : ANSIString; var vArgument : Variant; vIndex : Integer; vXObject : PDFs.IAttributes; vToUnicodeTable : PDF_ToUnicode_maps.IMap; fResourceFont : PDFs.IAttributes; fFontMeta : PDFs.IFont; fBaseFont : string; begin Result := ''; vToUnicodeTable := fFontUnicodeTables.Get(aFont); // Writeln(Format('Font "%s" not found.', [aFont])); Result := Result + (Format('', [Copy(aFont, 2, Length(aFont))])); // skip '/' with aInstruction do begin //Debug(); if Operator_ in [opMoveCaret, opMoveCaretToStartOfNextLineAndOffsetAndSetLeading, opMoveCaretToStartOfNextLine, opSetTextMatrix] then begin if Operator_ = opMoveCaret then begin // TODO //Arg#0:0 Arg#1:-13.555 end; if Operator_ = opMoveCaretToStartOfNextLineAndOffsetAndSetLeading then begin // a little heuristics if (GetArgument(0) >= 10) and (GetArgument(1) = 0) then begin // actually not a new line, just being silly. Result := Result + (''); Exit; end; end; Result := #10; Result := Result + ('
'); Exit; end; if Operator_ = opPaintExternalObject then begin vArgument := GetArgument(0); // "/Im0" symbol name. {/Resources <>} vXObject := InterfaceFromVariant(Dereference(aResources.Get('/XObject'))) as PDFs.IAttributes; if Assigned(vXObject) then begin //Writeln(Format('XObject "%s":', [ANSIString(vArgument)])); // FIXME escape Result := Result + ExtractXObject(aResources, vXObject.Get(vArgument), vArgument); end; { if aResources.ItemCount > 0 then for vIndex := 0 to aResources.ItemCount - 1 do begin Writeln('key:', aResources.GetItemKey(vIndex)); // function GetItemValue(aIndex : Cardinal) : Variant; end; } //aResources.Parser.GetExternalObject(vArgument); //Debug(); end; if Operator_ = opSetFontAndSize then begin vArgument := GetArgument(0); // TODO second argument: font size. aFont := vArgument; //Result := Result + Format('', [aFont]); vToUnicodeTable := fFontUnicodeTables.Get(aFont); //Result := Result + DebugTable(vToUnicodeTable); Result := Result + (Format('', [Copy(aFont, 2, Length(aFont))])); // skip '/' end; if Operator_ in [ opShowString {Tj} {non-array}, opNextLineShowString {'}, opNextLineSpacedShowString {"}, //opSetWordSpacing {Tw}, //opSetCharacterSpacing {Tc}, //opMoveCaret {Td}, opShowStringWithVariableSpacing {TJ}] then begin // if (aFont <> '/F5') and (aFont <> '/F14') and (aFont <> '/F18') and (aFont <> '/F10') and (aFont <> '/F21') then begin // FIXME remove. fResourceFont := InterfaceFromVariant(Dereference(NVL(aResources.Get('/Font'), variants.Null))) as PDFs.IAttributes; fFontMeta := InterfaceFromVariant(NVL(fResourceFont.Get(aFont), variants.Null)) as PDFs.IFont; fBaseFont := NVL(fFontMeta.MetaDataGet('/BaseFont'), ''); if Pos('Minion', fBaseFont) = 0 then begin //need special handling for that. Result := Result + ExtractContentUsingFont(fFontMeta, aResources, aInstruction, aFont); Exit; end; if vToUnicodeTable = nil then begin Result := Result + Format('(Font "%s" unknown)', [aFont]); end; // FIXME escape vArgument := GetArgument(0); if VarIsArray(vArgument) then begin for vIndex := VarArrayLowBound(vArgument, 1) to VarArrayHighBound(vArgument, 1) do begin if VarIsStr(vArgument[vIndex]) then begin // esp. not number Result := Result + ToUTF8String(ANSIString(vArgument[vIndex]), vToUnicodeTable); //Result := Result + ' '; end else if VarIsNumeric(vArgument[vIndex]) then begin if vArgument[vIndex] < -100 then Result := Result + ' '; end; end; //Result := Copy(Result, 1, Length(Result) - 1); end else Result := Result + ToUTF8String(ANSIString(vArgument), vToUnicodeTable); if Operator_ in [opNextLineShowString, opNextLineSpacedShowString] then begin Result := Result + Chr(10); end; //vContent.Instruction[vInstructionIndex].Debug(); { Writeln('STR'); Writeln(ArgumentCount); if ArgumentCount > 0 then Writeln(Str(Argument[0])); // ArgumentCount } end; end; Result := Result + ''; end; procedure WriteHTMLText(aText : ANSIString; aBAttribute : Boolean); var vIndex : Integer; begin for vIndex := 1 to Length(aText) do begin case aText[vIndex] of //#2: Write('fi'); // FIXME I think this is a PDF ligature, it shouldn't be here, thus. //#3: Write('fl'); // FIXME I think this is a PDF ligature, it shouldn't be here, thus. '<': Write('<'); '>': Write('>'); '&': Write('&'); '"': if aBAttribute then Write('"') else Write(aText[vIndex]); #10: Write(''); // eew.. else Write(aText[vIndex]); end; end; end; { or TTextBlock } procedure PrintContent(const vContent : PDF_contents.IContent; const aResources : PDFs.IAttributes; var aFont : TSymbolTableKey); var vInstructionIndex : Cardinal; vArgument : Variant; //vToUnicodeTable : PDF_ToUnicode_maps.IMap; begin if vContent.InstructionCount > 0 then for vInstructionIndex := 0 to vContent.InstructionCount - 1 do begin with vContent.GetInstruction(vInstructionIndex) do begin if Operator_ = opBeginText then begin // recurse. vArgument := GetArgument(0); PrintContent(PDF_contents.IContent(IInterface(vArgument)), aResources, aFont); Continue; end; //vToUnicodeTable := fFontUnicodeTables.Get(aFont); Write(ExtractUsefulContent(aResources, vContent.GetInstruction(vInstructionIndex), aFont)); end; end; Writeln(); // ' - end content'); end; procedure TraverseContents(const aContents : Variant; aLevel : Integer; const aResources : PDFs.IAttributes); var vOutputFile : TStream; vParser : PDF_content_parsers.TParser; vContent : PDF_contents.TContent; fFont : TSymbolTableKey; begin fFont := ''; Writeln(Format('File is ''%s''', [IntToStr(Number)])); { vOutputFile := TFileStream.Create('decoded/content/' + IntToStr(Number), fmCreate); CopyFrom(aContents.Stream, vOutputFile); FreeAndNil(vOutputFile);} {vContents : Variant; vContentIndex : Cardinal;} {FIXME array for vContentIndex := VarArrayLowBound(vContents, 1) to VarArrayHighBound(vContents, 1) do begin Write(Spaces(aLevel + 1) + '#', vContentIndex, ':'); vContent := vContents[vContentIndex]; TraverseContents(vContent, aLevel + 1, vResources); end; } Inc(Number); {if not Assigned(aContents.Stream) then begin Writeln('(no stream found)'); Exit; end;} // aContents.Stream loop until Windowed stream.SourceStartingPosition vParser := PDF_content_parsers.TParser.Create(aContents); try vContent := vParser.Parse(); PrintContent(vContent, aResources, fFont); FreeAndNil(vContent); except on E : EReadError do begin vParser.Error('', ''); end; end; FreeAndNil(vParser); end; procedure PrintAttributes(const aAttributes : PDFs.IAttributes; aLevel : Integer); var vIndex : Integer; vItemKey : TSymbolTableKey; vItemValue : Variant; begin if aAttributes.ItemCount > 0 then for vIndex := 0 to aAttributes.ItemCount - 1 do begin vItemKey := aAttributes.GetItemKey(vIndex); Write(vItemKey); Write(':'); vItemValue := aAttributes.GetItemValue(vIndex); try Write(NVL(vItemValue, '')); except on E : EVariantError do begin if VarIsClear(vItemValue) or VarIsNull(vItemValue) then Write('nothing') else begin Writeln(''); Traverse(vItemValue, aLevel + 1, vItemKey); end; end; end; Writeln(''); end; end; procedure PrintArray(const aArray : Variant; aLevel : Integer); var vIndex : Integer; begin if VarIsClear(aArray) or VarIsNull(aArray) then begin Write('nothing'); Exit; end; Write('['); if VarArrayDimCount(aArray) > 0 then for vIndex := VarArrayLowBound(aArray, 1) to VarArrayHighBound(aArray, 1) do begin Write(aArray[vIndex]); Write(' '); end; Write(']'); end; procedure TraverseResourcesFont(const aFont : PDFs.IAttributes; aLevel : Integer); var i : Integer; vToUnicodeTable : PDF_ToUnicode_maps.IMap; begin //Exit; // FIXME if not Assigned(aFont) then Exit; fFontUnicodeTables := TSymbolTable.Create(); // clear... Writeln(Spaces(aLevel) + 'ResourcesFont <<'); PrintAttributes(aFont, aLevel); Writeln(Spaces(aLevel) + '>>'); with fFontUnicodeTables do for i := 0 to ItemCount - 1 do begin Writeln(Format(''); end; end; procedure TraverseFontFile(const aFontFile : PDFs.IFontFile; aLevel : Integer); var vOutputFile : TStream; begin Exit; if not Assigned(aFontFile) then Exit; Writeln(Spaces(aLevel) + 'font file...'); //DebugStream(aFontFile.Stream); vOutputFile := TFileStream.Create('decoded/font/' + IntToStr(Number), fmCreate); Inc(Number); CopyFrom(aFontFile.Stream, vOutputFile); FreeAndNil(vOutputFile); end; procedure TraverseFontDescriptor(const aFontDescriptor : PDFs.IFontDescriptor; aLevel : Integer); var i : Integer; vAttributes : IAttributes; begin Exit; // FIXME if not Assigned(aFontDescriptor) then Exit; Writeln(Spaces(aLevel) + 'FontDescriptor:'); Writeln(Spaces(aLevel + 1) + 'Name: ', aFontDescriptor.MetaDataGet('/FontName')); Writeln(Spaces(aLevel + 1) + 'Flags: ', aFontDescriptor.MetaDataGet('/Flags')); Write(Spaces(aLevel + 1) + 'FontBBox: '); PrintArray(aFontDescriptor.MetaDataGet('/FontBBox'), aLevel + 1); Writeln(''); Writeln(Spaces(aLevel + 1) + 'Ascent: ', aFontDescriptor.MetaDataGet('/Ascent')); Writeln(Spaces(aLevel + 1) + 'CapHeight: ', aFontDescriptor.MetaDataGet('/CapHeight')); Writeln(Spaces(aLevel + 1) + 'Descent: ', aFontDescriptor.MetaDataGet('/Descent')); Writeln(Spaces(aLevel + 1) + 'ItalicAngle: ', aFontDescriptor.MetaDataGet('/ItalicAngle')); Writeln(Spaces(aLevel + 1) + 'StemV: ', aFontDescriptor.MetaDataGet('/StemV')); Writeln(Spaces(aLevel + 1) + 'XHeight: ', aFontDescriptor.MetaDataGet('/XHeight')); Writeln(Spaces(aLevel + 1) + 'CharSet: ', aFontDescriptor.MetaDataGet('/CharSet')); // '(/A/C/D/E/F/I/M/zero)'. TraverseFontFile(InterfaceFromVariant(aFontDescriptor.MetaDataGet('/FontFile')) as IPDFObject, aLevel + 1); end; procedure TraverseFontWidths(const aFontWidths : Variant{array}; aLevel : Integer); begin Write(Spaces(aLevel) + 'Widths: '); if VarIsClear(aFontWidths) or VarIsNull(aFontWidths) then Write('nothing') else PrintArray(aFontWidths, aLevel); Writeln(''); end; procedure TraverseFont(const aFont : PDFs.IFont; aLevel : Integer; aKey : TSymbolTableKey = ''); var vEncodingV : Variant; vToUnicodeV : Variant; vMap : PDF_ToUnicode_maps.IMap; begin // Writeln(Spaces(aLevel) + 'Font:'); // Writeln(Spaces(aLevel + 1) + 'Subtype: ' + aFont.MetaDataGet('/Subtype')); // Writeln(Spaces(aLevel + 1) + 'BaseFont: ' + aFont.MetaDataGet('/BaseFont')); // Writeln(Spaces(aLevel + 1) + 'FirstChar: ', aFont.MetaDataGet('/FirstChar')); // Writeln(Spaces(aLevel + 1) + 'LastChar: ', aFont.MetaDataGet('/LastChar')); // TraverseFontWidths(aFont.MetaDataGet('/Widths'), aLevel + 1); // TraverseFontDescriptor(aFont.MetaDataGet('/FontDescriptor'), aLevel + 1); vEncodingV := Dereference(aFont.MetaDataGet('/Encoding')); // can be a sub-object! //Writeln(VarType(vEncodingV)); //Writeln(Spaces(aLevel + 1) + 'Encoding: ', NVL(vEncodingV, '')); vToUnicodeV := aFont.MetaDataGet('/ToUnicode'); if not VarIsClear(vToUnicodeV) then begin //Writeln(''); vMap := GetFontToUnicode(InterfaceFromVariant(vToUnicodeV) as PDFs.IToUnicode, aLevel + 1); if aKey <> '' then fFontUnicodeTables.Add(aKey, vMap); end; // Writeln(Spaces(aLevel) + ''); end; procedure TraverseResourcesProcSet(const aProcSet : Variant{array}; aLevel : Integer); begin Write(Spaces(aLevel) + 'ProcSet '); PrintArray(aProcSet, aLevel); Writeln(''); end; procedure TraverseResources(const aResources : PDFs.IAttributes; aLevel : Integer); begin if not Assigned(aResources) then Exit; Writeln(Spaces(aLevel) + ''); end; procedure TraverseMediaBox(const aMediaBox : Variant{array}; aLevel : Integer); begin {Write(Spaces(aLevel) + 'MediaBox '); PrintArray(aMediaBox, aLevel); Writeln('');} end; procedure TraversePage(const aPage : PDFs.IPage; aLevel : Integer); var vResources : PDFs.IAttributes; vContent : Variant; begin Writeln(''); vContent := aPage.MetadataGet('/Contents'); TraverseContents(vContent, aLevel + 1, vResources); end; procedure Traverse(const aObject : PDFs.IPDFObject; aLevel : Integer; aKey : TSymbolTableKey = ''); var vType_ : String; begin vType_ := aObject.Type_; if vType_ = '/Pages' then TraversePages(aObject, aLevel) else if vType_ = '/Page' then TraversePage(aObject, aLevel) else if vType_ = '/Font' then TraverseFont(aObject, aLevel, aKey) else begin Write(Spaces(aLevel) + '???'); Writeln(vType_); end; end; procedure Parse(aInputStream : TStream); var vParser : PDF_parsers.TParser; vPDF : PDFs.IPDF; vPages : PDFs.IPDFObject; begin vParser := PDF_parsers.TParser.Create(aInputStream, True); vPDF := vParser.Parse(); vPDF.Validate(); vPages := IPDFObject(InterfaceFromVariant(vPDF.Root.MetadataGet('/Pages'))); Writeln(''); Writeln(''); {TODO simple document } Writeln(''); TraversePages(vPages, 0); Writeln(''); Writeln(''); FreeAndNil(vParser); end; var vInputStream : TStream; begin fFontUnicodeTables := TSymbolTable.Create(); vInputStream := TFileStream.Create(ParamStr(1), fmOpenRead); Parse(vInputStream); FreeAndNil(vInputStream); end.