unit PDF_content_parsers; {PDF parser. Copyright (C) 2008 Danny Milosavljevic This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA } {$MODE OBJFPC} {$M+} interface uses PDF_parsers, PDF_contents, classes; // TODO make it possible for this to parse across streams. // The reason this is needed is that /Contents can list [1 0 R 2 0 R] and then just half-finish an object in "1 0 obj"'s stream. type TParser = class(PDF_parsers.TParser) private fStreams : Variant; // pins it down. fStreamIndex : Cardinal; protected function Instruction() : PDF_contents.TInstruction; function Operator_() : PDF_contents.TOperator; inline; function TextBlockBodyE() : PDF_contents.TTextBlock; function GoToNextStream(out aBOwnsStream : Boolean) : TStream; override; published function Parse() : PDF_contents.TContent; constructor Create(const aStreams : Variant); end; implementation uses variants, PDFs; function TParser.Instruction() : TInstruction; var vArgument : Variant; begin Result := TInstruction.Create(); Result.Operator_ := opInvalid; OptionalWhitespace(); while Result.Operator_ = opInvalid do begin if Input in ['a'..'z', 'A'..'Z', '"', ''''] then begin // operator. Result.Operator_ := Operator_(); Break; end else begin // argument. //FPC 2.2.4 debugger BPrintConsumedInput := True; vArgument := ValueOrBracedValueList(); Result.AddArgument(vArgument); //FPC 2.2.4 debugger BPrintConsumedInput := False; //FPC 2.2.4 debugger Write(' '); end; OptionalWhitespace(); // yeah, really optional. end; end; function TParser.Operator_() : TOperator; inline; begin Result := opInvalid; // TODO: use 'case', sort. //Writeln('OP', Input); if Input = 'B' then begin Consume(); case Input of '*': begin Consume(); Result := opPaintFillAndStrokeEvenOdd {B*}; end; 'M': begin Consume(); Consume('C'); Result := opBeginMarkedContentBlock{BMC}; end; 'D': begin Consume(); Consume('C'); Result := opBeginMarkedContentBlockWithAttributes{BDC}; end; 'T': begin Consume(); Result := opBeginText; end; 'X': begin Consume(); Result := opBeginCompabilitySection {BX}; end; else Result := opPaintFillAndStroke {B, like construct, f, construct, S}; //Error('B[MT*]', 'B' + Input); end; end else if Input = 'C' then begin Consume(); Consume('S'); Result := opColorSetStrokingColorspace {CS}; end else if Input = 'D' then begin Consume(); case Input of 'o': begin Consume(); Result := opPaintExternalObject{Do}; end; 'P': begin Consume(); Result := opSetMarkedContentPointWithAttributes{DP}; end; else Error('D[oP]', 'D' + Input); end; end else if Input = 'E' then begin Consume(); case Input of 'T': begin Consume(); Result := opEndText; end; 'M': begin Consume(); Consume('C'); Result := opEndMarkedContentBlock{EMC}; end; 'X': begin Consume(); Result := opEndCompabilitySection{EX}; end; else Error('E[TM]', 'E' + Input); end; end else if Input = 'M' then begin Consume(); case Input of 'P': begin Consume(); Result := opSetMarkedContentPoint{MP}; end; else begin // 'M' alone. //Consume(); Result := opSetMiterLimit {M}; end; end; end else if Input = 'T' then begin Consume(); if Input = 'j' then begin Consume(); Result := opShowString end else if Input = 'w' then begin Consume(); Result := opSetWordSpacing end else if Input = 'c' then begin Consume(); if Input = 'm' then begin // FIXME is that right? Consume(); Result := opTransformationMatrixAppend; end else Result := opSetCharacterSpacing end else if Input = 'J' then begin Consume(); Result := opShowStringWithVariableSpacing end else if Input = 'f' then begin Consume(); Result := opSetFontAndSize end else if Input = 'd' then begin Consume(); Result := opMoveCaret end else if Input = 'm' then begin Consume(); Result := opSetTextMatrix {Tm}; end else if Input = 'L' then begin Consume(); Result := opSetTextLeading {TL} {for T*, ', "}; end else if Input = '*' then begin Consume(); Result := opMoveCaretToStartOfNextLine{T*}; end else if Input = 'r' then begin Consume(); Result := opSetTextRenderingMode{Tr}; end else if Input = 's' then begin Consume(); Result := opSetTextRise{Ts}; end else if Input = 'D' then begin Consume(); Result := opMoveCaretToStartOfNextLineAndOffsetAndSetLeading{TD}; end else if Input = 'z' then begin Consume(); Result := opSetHorizontalScaling{Tz}; // in percent of normal width. end else Error('', 'T' + Input); end else if Input = '''' then begin Consume(); Result := opNextLineShowString; end else if Input = '"' then begin Consume(); Result := opNextLineSpacedShowString; end else if Input = 'w' then begin Consume(); Result := opSetLineWidth {w}; end else if Input = 'J' then begin Consume(); Result := opSetLineCap {J}; end else if Input = 'j' then begin Consume(); Result := opSetLineJoin {j}; end else if Input = 'd' then begin Consume(); Result := opSetDashPattern{d}; end else if Input = 'i' then begin Consume(); Result := opFlatness {i}; end else if Input = 'g' then begin Consume(); if Input = 's' then begin Consume(); Result := opSetParameterValue{gs}; end else Result := opColorSetNonstrokingGrayColor{g}; end else if Input = 'G' then begin Consume(); Result := opColorSetStrokingGrayColor{G}; end else if Input = 'k' then begin Consume(); Result := opColorSetNonstrokingCMYKColor {k}; // colorspace := CMYK, Color := . end else if Input = 'K' then begin Consume(); Result := opColorSetStrokingCMYKColor {K}; // colorspace := CMYK, Color := . end else if Input = 'q' then begin Consume(); Result := opPushGraphicsState; end else if Input = 'Q' then begin Consume(); Result := opPopGraphicsState; end else if Input = 'm' then begin Consume(); Result := opPathBegin; end else if Input = 'l' then begin Consume(); Result := opPathAddLine; end else if Input = 'c' then begin Consume(); if Input = 's' then begin Consume(); Result := opColorSetNonstrokingColorspace {cs}; end else if Input = 'm' then begin Consume(); Result := opTransformationMatrixAppend {cm}; end else Result := opPathAddCubicBezier123; end else if Input = 'v' then begin Consume(); Result := opPathAddCubicBezier23 {v}; end else if Input = 'y' then begin Consume(); Result := opPathAddCubicBezier13 {y}; end else if Input = 'h' then begin Consume(); Result := opPathClose {h}; end else if Input = 'R' then begin Consume(); Consume('G'); Result := opColorSetStrokingRGBColor {RG}; // colorspace := RGB, Color := . end else if Input = 'r' then begin Consume(); if Input = 'i' then begin Consume(); Result := opSetColorIntent{ri}; end else if Input ='g' then begin Consume(); Result := opColorSetNonstrokingRGBColor {rg}; // colorspace := RGB, Color := end else begin Consume('e'); Result := opPathRectangle {re}; end; end else if Input = 'S' then begin Consume(); if Input = 'C' then begin Consume(); if Input = 'N' then begin Consume(); Result := opColorSetStrokingColor {SCN}; end else Result := opColorSetStrokingColorLimited {SC}; // in current color space. end else Result := opPaintStroke {S}; end else if Input = 's' then begin Consume(); if Input = 'c' then begin Consume(); if Input = 'n' then begin Consume(); Result := opColorSetNonstrokingColor {scn}; end else Result := opColorSetNonstrokingColorLimited {sc}; end else Result := opPaintCloseAndStroke {s}; // = h S. end else if Input in ['f', 'F'] then begin if Consume() = 'f' then begin if Input = '*' then begin Consume(); Result := opPaintFillEvenOdd {f*}; end else Result := opPaintCloseAndFill {f, F}; end else Result := opPaintCloseAndFill {f, F}; end else if Input = 'b' then begin Consume(); if Input = '*' then begin Consume(); Result := opPaintCloseAndFillAndStrokeEvenOdd {b*}; end else Result := opPaintCloseAndFillAndStroke {b}; end else if Input = 'n' then begin Consume(); Result := opPaintNoPaint {n}; end else if Input = 'W' then begin Consume(); if Input = '*' then begin Consume(); Result := opClipIntersectEvenOdd {W*}; end else Result := opClipIntersect {W}; end else Error(''); end; function TParser.TextBlockBodyE() : PDF_contents.TTextBlock; var vOperator : TOperator; vInstruction : TInstruction; begin Result := TTextBlock.Create(); vOperator := opInvalid; while (vOperator <> opEndText) and (BInput) do begin vInstruction := Instruction(); vOperator := vInstruction.Operator_; Result.AddInstruction(vInstruction); // FIXME use. end; end; function TParser.Parse() : PDF_contents.TContent; var vInstruction : TInstruction; begin Result := TContent.Create(); while BInput do begin vInstruction := Instruction(); if vInstruction.Operator_ = opBeginText then begin Whitespace(); //Result.AddTextBlock(TextBlockBodyE()); // FIXME use. vInstruction.AddArgument(IInterface(TextBlockBodyE())); //Consume('E'); end; Result.AddInstruction(vInstruction); OptionalWhitespace(); end; end; function TParser.GoToNextStream(out aBOwnsStream : Boolean) : TStream; begin if fStreamIndex <= VarArrayHighBound(fStreams, 1) then begin aBOwnsStream := False; Result := IContents(InterfaceFromVariant(fStreams[fStreamIndex])).Stream; Inc(fStreamIndex); end else Result := inherited GoToNextStream(aBOwnsStream); end; constructor TParser.Create(const aStreams : Variant); var vContents : PDFs.IContents; begin fStreams := aStreams; vContents := InterfaceFromVariant(aStreams) as IContents; // FIXME does that crash on nil? if Assigned(vContents) then begin fStreams := VarArrayCreate([0,0], varVariant); fStreams[0] := aStreams; end; //assert(VarType(aStreams) and mask = vtArray); fStreamIndex := VarArrayLowBound(fStreams, 1); inherited Create(TMemoryStream.Create(), True, nil); end; end.