unit lexer_interfaces; interface uses classes, sysutils, typinfo; {$M+} const StateInvalid = 0; StateInitial = 1; TokenNone = 0; TokenInvalid = 1; type ELexerLoadError = class(Exception) end; ELexerEofError = class(Exception) end; TToken = Integer; (* 0: none, 1: invalid, < 10: reserved *) TLexerState = Integer; (* 0: invalid; 1: initial *) //TLexerConstArray = array of string; //TLexerStateMatches = array of TLexerState; // set of states which matched something searched for. // TODO use "Byte" to make clear that we don't really care _what_ the input is. TLexerChar = Char; { TODO just have THE STREAM support this. } ISourcePosition = interface ['{f02de8f2-b319-11dd-b13b-2724b9d46feb}'] function GetSourceFile : TFileName; //function GetSourceLine : Cardinal; procedure SetSourceFile(aValue : TFileName); //procedure SetSourceLine(aValue : Cardinal); property SourceFile : TFileName read GetSourceFile write SetSourceFile; //property SourceLine : Cardinal read GetSourceLine write SetSourceLine; function GetSourcePosition : Cardinal; procedure SetSourcePosition(aValue : Cardinal); property SourcePosition : Cardinal read GetSourcePosition write SetSourcePosition; end; TLexerInputConsumed = procedure(aSender : TObject; aInput: TLexerChar) of object; ILexer = interface ['{06d7c29e-b31a-11dd-9b5e-f7f16c1695de}'] { use the properties, not the getters! } function GetSourceStream : TStream; procedure SetSourceStream(stream : TStream); function GetEOF : Boolean; // BAD TYPE function GetState : TLexerState; // BAD TYPE procedure SetState(aValue : TLexerState); function GetToken : TToken; procedure SetToken(aValue : TToken); function GetInputChar : TLexerChar; procedure SetInputChar(aValue : TLexerChar); //function GetMatchStart : Cardinal; // inclusive Position <--- better? optional? //function GetMatchEnd : Cardinal; // exclusive Position <--- better? optional? //function GetMatchedText : string; // only use this when you need to, wastes memory. //procedure SetMatchedText(aValue : string); // ditto. function GetMatchBeginning : Cardinal; procedure SetMatchBeginning(aValue : Cardinal); function GetMatchEnd : Cardinal; procedure SetMatchEnd(aValue : Cardinal); // property State : TLexerState read GetState write SetState; property Token : TToken read GetToken write SetToken; //property UTF8Mode : Boolean read fUTF8Mode write SetUTF8Mode; property SourceStream : TStream read GetSourceStream write SetSourceStream; // stored False; property EOF : Boolean read GetEOF stored False; property InputChar : TLexerChar read GetInputChar write SetInputChar; // stored False; // TODO use positions, only, f. e. (matchStart, matchEndExclusive) //property MatchedText : string read GetMatchedText write SetMatchedText; // stored False; // TODO remove this? Annoying... property MatchBeginning : Cardinal read GetMatchBeginning write SetMatchBeginning; // Position, inclusive. property MatchEnd : Cardinal read GetMatchEnd write SetMatchEnd; // Position, exclusive. // property LexerStates: TLexerStates; for streaming function Consume : TToken; // TODO: pass token, ensure that it's there? // TODO: only update #"fToken", no return. function ConsumeOne : TToken; { returns: 0 if no token matched yet; usually you will just use Consume. } //function StringTransitionsInternal(statenr: Integer; var trans2: TLexerStateTrans; forceDetail: Boolean): string; //function StringTransitions(statenr: Integer; forceDetail: Boolean): string; //procedure PrintStates(forceDetail: Boolean); ////procedure SetUTF8Mode(value: Boolean); //procedure LoadLexer(stream: TStream; tokenConsts: TLexerConstArray; caseInsensitive: Boolean); // TODO Tokens : map( key :: , value :: ) or something, maybe some ID object. // or GetTokens : List(TToken), GetTokenNames : List(String> eeew. function GetInputConsumed : TLexerInputConsumed; procedure SetInputConsumed(aValue : TLexerInputConsumed); property InputConsumed : TLexerInputConsumed read GetInputConsumed write SetInputConsumed; end; // TODO store lexer version to avoid restoring state from another version. //function TokensFromEnum(enum1: PTypeInfo): TLexerConstArray; // call TokensFromEnum(TypeInfo(TToken)); { an actual lexer has in a state transition: false-or fallback_token; map transitions; } implementation end.