// I, Danny Milosavljevic, hereby release this code into the public domain. unit scanners; {$MODE OBJFPC} {$M+} interface uses sysutils, classes, ring_buffers; type EParseError = class(Exception); // TODO line number, offset etc. TScanner = class(TInterfacedObject, IInterface) private fInput : Char; fBInput : Boolean; fBPrintConsumedInput : Boolean; fStream : TStream; fBOwnsStream : Boolean; fOffset : Int64; fLineNumber : Cardinal; fColumnNumber : Cardinal; fEchoChamber : TStream; fEOFInputMark : Char; fBuffer : TRingBuffer; private procedure IncreaseOffset(); inline; public procedure CalculatePosition(); // this will calculate LineNumber and ColumnNumber by actually reading the stream. protected procedure Read(); inline; overload; procedure Echo(); inline; public destructor Destroy(); override; published constructor Create(aStream : TStream; aBOwnsStream : Boolean); property Input : Char read fInput; property BInput : Boolean read fBInput; property EOFInputMark : Char read fEOFInputMark write fEOFInputMark; { this field is there because: - some streams have multiple users and hence Stream.Position is not retained for our use. Hence use this field instead. - the scanner reads ahead and buffers some data in order to cut down on function call count. If you want to know where in the Stream the Input was, read Offset, NOT the Stream Position.} property Offset : Int64 read fOffset write fOffset { use setter sparingly (LineNumber and ColumnNumber won't be correct), and the stream's Position should match. }; property LineNumber : Cardinal read fLineNumber write fLineNumber; property ColumnNumber : Cardinal read fColumnNumber write fColumnNumber; property Stream : TStream read fStream; property BPrintConsumedInput : Boolean read fBPrintConsumedInput write fBPrintConsumedInput; procedure Consume(aInput : Char); //inline; function Consume() : Char; //inline; procedure ConsumeBlock(aCount : Cardinal); procedure Consume(aInput : String); procedure Unload(aInput : Char); inline; // TODO remove this? procedure Error(aExpected : String = ''; aGot : String = ''); protected function GoToNextStream(out aBOwnsStream : Boolean) : TStream; virtual; end; implementation destructor TScanner.Destroy(); begin FreeAndNil(fBuffer); inherited Destroy(); end; constructor TScanner.Create(aStream : TStream; aBOwnsStream : Boolean); begin //assert(Assigned(aStream)); fBuffer := TRingBuffer.Create(); fInput := fEOFInputMark; fLineNumber := 1; fColumnNumber := 1; fOffset := aStream.Position; if fOffset > 0 then fLineNumber := 0; // unknown. fStream := aStream; fBOwnsStream := aBOwnsStream; Read(); end; procedure TScanner.Echo(); inline; begin if fBInput and Assigned(fEchoChamber) then fEchoChamber.WriteByte(Ord(fInput)); end; procedure TScanner.IncreaseOffset(); inline; begin if fBInput then begin Echo(); if fInput = #10 then begin if fLineNumber <> 0 then begin // not disabled Inc(fLineNumber); fColumnNumber := 0; end; end; Inc(fOffset); Inc(fColumnNumber); end; end; procedure TScanner.Read(); inline; var vCount : Cardinal; vBuffer : array[0..511] of Char; begin IncreaseOffset(); //fInput := Chr(fStream.ReadByte()); // fStream.Read(fInput, Sizeof(fInput)) > 0; //fBInput := True; if Assigned(fStream) and (fBuffer.FillCount = 0) then begin // buffer some more data to amortize virtual function call overhead (and an eventual trap overhead)... vCount := fStream.Read(vBuffer[0], Length(vBuffer)); while vCount = 0 do begin // EOF fStream := GoToNextStream(fBOwnsStream); if not Assigned(fStream) then Break; vCount := fStream.Read(vBuffer[0], Length(vBuffer)); end; if vCount > 0 then fBuffer.Write(vBuffer[0], vCount); end; fBInput := fBuffer.Read(fInput, 1) > 0; if not fBInput then fInput := fEOFInputMark; // TODO configurable? end; procedure TScanner.Unload(aInput : Char); inline; begin if (not fBInput) or (Self.Input <> aInput) then Self.Error(aInput); IncreaseOffset(); fInput := fEOFInputMark; fBInput := False; fLineNumber := 0; end; procedure TScanner.ConsumeBlock(aCount : Cardinal); begin // FIXME optimize this (block-wise reading out of fBuffer, block-wise consume string call). while aCount > 0 do begin Consume(); Dec(aCount); end; end; procedure TScanner.Consume(aInput : Char); //inline; begin if (Self.Input <> aInput) then Self.Error(aInput); //Writeln('consumed', Self.Input); if fBPrintConsumedInput then Write(fInput); Self.Read(); end; procedure TScanner.Consume(aInput : String); var i : Integer; begin for i := 1 to Length(aInput) do begin if aInput[i] <> Self.Input then Self.Error(aInput, Copy(aInput, 1, i - 1) + Self.Input); Self.Consume(); end; end; function TScanner.Consume() : Char; {inline;} begin //Writeln('consumed', Self.Input); if fBPrintConsumedInput then Write(fInput); Result := fInput; Self.Read(); end; function Repr(aString : String) : String; var vOldIndex : Integer; vNewIndex : Integer; vChar : Char; begin SetLength(Result, 2 + 7 * Length(aString)); Result[1] := ''''; vNewIndex := 2; for vOldIndex := 1 to Length(aString) do begin vChar := aString[vOldIndex]; if ord(vChar) < 32 then begin Result[vNewIndex] := ''''; Result[vNewIndex + 1] := '#'; Result[vNewIndex + 2] := chr(ord('0') + ((ord(vChar) div 100) mod 10)); Result[vNewIndex + 3] := chr(ord('0') + ((ord(vChar) div 10) mod 10)); Result[vNewIndex + 4] := chr(ord('0') + (ord(vChar) mod 10)); Result[vNewIndex + 5] := ''''; Inc(vNewIndex, 6 - 1); end else if vChar = '''' then begin Result[vNewIndex] := ''''; Result[vNewIndex + 1] := ''''; Inc(vNewIndex, 2 - 1); end else Result[vNewIndex] := vChar; Inc(vNewIndex); end; Result[vNewIndex] := ''''; SetLength(Result, vNewIndex); end; procedure TScanner.CalculatePosition(); var vOldPosition : Int64; vPosition : Cardinal; vItem : Byte; begin // debugging helper, not that useful otherwise. vOldPosition := fStream.Position; fStream.Position := 0; fLineNumber := 1; fColumnNumber := 1; for vPosition := 1 to vOldPosition do begin vItem := fStream.ReadByte(); if vItem = 10 then begin Inc(fLineNumber); fColumnNumber := 0; end; Inc(fColumnNumber); end; // should already be there... assert(fStream.Position = vOldPosition); end; procedure TScanner.Error(aExpected : String = ''; aGot : String = ''); var vInfo : String; begin vInfo := ''; if aExpected <> '' then begin if aGot = '' then aGot := Self.Input; vInfo := ': expected ' + Repr(aExpected) + ' but got ' + Repr(aGot); end; if Self.LineNumber = 0 then begin CalculatePosition(); end; raise EParseError.Create(Format('error: invalid syntax near offset %d (line %d, column %d)%s.', [Integer(Self.Offset), Self.LineNumber, Self.ColumnNumber, vInfo])); end; function TScanner.GoToNextStream(out aBOwnsStream : Boolean) : TStream; begin aBOwnsStream := False; Result := nil; end; end.