unit encodings; {$M+} interface uses classes, lexer_interfaces, sysutils; type TUTF8ParseState = (upInvalid, upBeginning, upSecondOf2Bytes, upSecondOf3Bytes, upThirdOf3Bytes, upSecondOf4Bytes, upThirdOf4Bytes, upFourthOf4Bytes); TUnicodeCodepoint = Cardinal; TUTF8ToUnicodeCodepointCompleted = procedure(aSender : TObject; aCodepoint : TUnicodeCodepoint) of object; // TODO use normal #lexer_interfaces ICharacterDecoder = interface function GetCodepoint : TUnicodeCodepoint; property Codepoint : TUnicodeCodepoint read GetCodepoint; end; TUTF8EncodedCharacter = array[0..3] of Char; // len: see first byte. make sure to 0-pad in order to be able to detect broken code. TUnicodeCharacter = array of TUnicodeCodepoint; // this is also used for runs of characters right now, so do yourself a favour and don't think it's just 4 bytes. TUTF8ToUnicode = class(TInterfacedObject, ILexer, ISourcePosition, ICharacterDecoder) private fState : TUTF8ParseState; fUnicode : TUnicodeCodepoint; // so far. fCharacterCompleted : TUTF8ToUnicodeCodepointCompleted; fSourceStream : TStream; fSourceFile : TFileName; fSourcePosition : Cardinal; fMatchBeginning : Cardinal; fMatchEnd : Cardinal; fClearUnicodeNextP : Boolean; // TODO fSourceColumn : Integer; fInputChar : Char; fInputCharAvailableP : Boolean; fInputConsumed : TLexerInputConsumed; protected function EmitCharacterCompleted : TToken; procedure EmitInvalid; procedure LLConsume; function GetSourceStream : TStream; procedure SetSourceStream(aSourceStream : TStream); function GetSourceFile : TFileName; function GetSourcePosition : Cardinal; procedure SetSourceFile(aValue : TFileName); procedure SetSourcePosition(aValue : Cardinal); function GetInputChar : Char; procedure SetInputChar(aInputChar : Char); function GetMatchBeginning : Cardinal; procedure SetMatchBeginning(aValue : Cardinal); function GetMatchEnd : Cardinal; procedure SetMatchEnd(aValue : Cardinal); function GetState : TLexerState; procedure SetState(aValue : TLexerState); function GetToken : TToken; procedure SetToken(aValue : TToken); procedure CacheInput; function GetEOF : Boolean; function GetCodepoint : TUnicodeCodepoint; function GetInputConsumed : TLexerInputConsumed; procedure SetInputConsumed(aValue : TLexerInputConsumed); published function ConsumeOne : TToken; function Consume : TToken; constructor Create; overload; constructor Create(aSourceStream : TStream); overload; property SourceStream : TStream read GetSourceStream write SetSourceStream; property InputChar : Char read GetInputChar write SetInputChar; property SourcePosition : Cardinal read GetSourcePosition write SetSourcePosition; property MatchBeginning : Cardinal read GetMatchBeginning write SetMatchBeginning; property MatchEnd : Cardinal read GetMatchEnd write SetMatchEnd; property State : TLexerState read GetState write SetState; property EOF : Boolean read GetEOF; property Codepoint : TUnicodeCodepoint read GetCodepoint; property InputConsumed : TLexerInputConsumed read GetInputConsumed write SetInputConsumed; public property SourceFile : TFileName read GetSourceFile write SetSourceFile; property Token : TToken read GetToken write SetToken; end; function UTF8Length(const aCharacter : TUTF8EncodedCharacter) : Byte; overload; // in bytes. function UTF8Length(aFirstItem : Char) : Byte; overload; function IsBeginningOfNewCharacter(aCodepoint : TUnicodeCodepoint) : Boolean; inline; function DecodeUTF8(const aCharacter : TUTF8EncodedCharacter; aOffset : Cardinal = 0) : TUnicodeCodepoint; implementation uses debug; function TUTF8ToUnicode.GetMatchBeginning : Cardinal; begin Result := fMatchBeginning; end; procedure TUTF8ToUnicode.SetMatchBeginning(aValue : Cardinal); begin fMatchBeginning := aValue; end; function TUTF8ToUnicode.GetMatchEnd : Cardinal; begin Result := fMatchEnd; end; procedure TUTF8ToUnicode.SetMatchEnd(aValue : Cardinal); begin fMatchEnd := aValue; end; function TUTF8ToUnicode.GetSourceStream : TStream; begin Result := fSourceStream; end; procedure TUTF8ToUnicode.LLConsume; begin assert(fInputCharAvailableP); if Assigned(fInputConsumed) then fInputConsumed(Self, fInputChar); Inc(fSourcePosition); fInputCharAvailableP := False; end; function TUTF8ToUnicode.GetSourceFile : TFileName; begin Result := fSourceFile; end; function TUTF8ToUnicode.GetSourcePosition : Cardinal; begin Result := fSourcePosition; end; procedure TUTF8ToUnicode.SetSourceFile(aValue : TFileName); begin fSourceFile := aValue; end; procedure TUTF8ToUnicode.SetSourcePosition(aValue : Cardinal); begin fSourcePosition := aValue; end; function TUTF8ToUnicode.GetInputChar : Char; begin Result := fInputChar; end; procedure TUTF8ToUnicode.SetInputChar(aInputChar : Char); begin fInputChar := aInputChar; end; procedure TUTF8ToUnicode.SetSourceStream(aSourceStream : TStream); begin fSourceStream := aSourceStream; end; constructor TUTF8ToUnicode.Create; overload; begin Create(nil); end; constructor TUTF8ToUnicode.Create(aSourceStream : TStream); overload; begin fSourceStream := aSourceStream; fState := upBeginning; end; function TUTF8ToUnicode.EmitCharacterCompleted : TToken; begin if Assigned(fCharacterCompleted) then fCharacterCompleted(Self, fUnicode); Result := fUnicode; fClearUnicodeNextP := True; fState := upBeginning; end; procedure TUTF8ToUnicode.EmitInvalid; begin // ??? Self.LLConsume; //fClearUnicodeNextP := True; fUnicode := 0; fState := upBeginning; end; procedure TUTF8ToUnicode.CacheInput; begin // TODO buffer. if fInputCharAvailableP then Exit; try fInputChar := Chr(fSourceStream.ReadByte); fInputCharAvailableP := True; except on Exception : EReadError do begin end; end; end; function TUTF8ToUnicode.GetEOF : Boolean; begin Result := not fInputCharAvailableP; end; function TUTF8ToUnicode.ConsumeOne : TToken; var tInput : Byte; begin //Dump('TUTF8ToUnicode.ConsumeOne...'); if fClearUnicodeNextP then begin fClearUNicodeNextP := False; fUnicode := 0; end; if not fInputCharAvailableP then Self.CacheInput; if not fInputCharAvailableP then begin Dump('TUTF8ToUnicode: EOF'); Result := TokenNone; Exit; end; //raise EEOFError.Create('Unexpected End of Source Stream.'); // ??? Result := TokenNone; tInput := Ord(fInputChar); //Dump(Format('fState: %d tInput: %d', [fState, tInput])); case fState of upBeginning: begin if (tInput < $80) then begin // ASCII. fUnicode := tInput; Self.LLConsume; Result := Self.EmitCharacterCompleted; if tInput = 10 then begin // TODO "CR" ? //Inc(fSourceLine); end; end else begin // UTF-8, not ASCII. // find out how many bytes it's supposed to have. if (tInput >= $C2) and (tInput <= $DF) then begin // 2 bytes total. fUnicode := tInput and $1F; // 5 bits. Self.LLConsume; fState := upSecondOf2Bytes; end else if (tInput >= $E0) and (tInput <= $EF) then begin // 3 bytes total. fUnicode := tInput and $0F; // 4 bits. Self.LLConsume; fState := upSecondOf3Bytes; end else if (tInput >= $F0) and (tInput <= $F4) then begin // 4 bytes total. fUnicode := tInput and $7; // 3 bits. Self.LLConsume; fState := upSecondOf4Bytes; end else Self.EmitInvalid; end; end; upSecondOf2Bytes: begin if (tInput >= $80) and (tInput < $C0) then begin // "10xxxxxx" binary. tInput := tInput and $3F; fUnicode := (fUnicode shl 6) or tInput; Self.LLConsume; Result := Self.EmitCharacterCompleted; end else Self.EmitInvalid; end; upSecondOf3Bytes: begin if (tInput >= $80) and (tInput < $C0) then begin // "10xxxxxx" binary. tInput := tInput and $3F; fUnicode := (fUnicode shl 6) or tInput; Self.LLConsume; fState := upThirdOf3Bytes; end else Self.EmitInvalid; end; upThirdOf3Bytes: begin if (tInput >= $80) and (tInput < $C0) then begin // "10xxxxxx" binary. tInput := tInput and $3F; fUnicode := (fUnicode shl 6) or tInput; // FIXME test. Self.LLConsume; Result := Self.EmitCharacterCompleted; end else Self.EmitInvalid; end; upSecondOf4Bytes: begin if (tInput >= $80) and (tInput < $C0) then begin // "10xxxxxx" binary. tInput := tInput and $3F; fUnicode := (fUnicode shl 6) or tInput; Self.LLConsume; fState := upThirdOf4Bytes; end else Self.EmitInvalid; end; upThirdOf4Bytes: begin if (tInput >= $80) and (tInput < $C0) then begin // "10xxxxxx" binary. tInput := tInput and $3F; fUnicode := (fUnicode shl 6) or tInput; // FIXME test. Self.LLConsume; fState := upFourthOf4Bytes; end else Self.EmitInvalid; end; upFourthOf4Bytes: begin if (tInput >= $80) and (tInput < $C0) then begin // "10xxxxxx" binary. tInput := tInput and $3F; fUnicode := (fUnicode shl 6) or tInput; // FIXME test. Self.LLConsume; Result := Self.EmitCharacterCompleted; end else Self.EmitInvalid; end; end; end; function TUTF8ToUnicode.GetState : TLexerState; begin Result := TLexerState(fState); end; procedure TUTF8ToUnicode.SetState(aValue : TLexerState); begin fState := TUTF8ParseState(aValue); end; function TUTF8ToUnicode.Consume : TToken; begin fUnicode := 0; // FIXME !?! if not fInputCharAvailableP then Self.CacheInput; while fInputCharAvailableP and (fUnicode = 0) do begin fUnicode := ConsumeOne; end; Result := fUnicode; end; function TUTF8ToUnicode.GetToken : TToken; begin Result := TToken(fUnicode); end; function TUTF8ToUnicode.GetCodepoint : TUnicodeCodepoint; begin Result := fUnicode; end; procedure TUTF8ToUnicode.SetToken(aValue : TToken); begin fUnicode := TUnicodeCodepoint(aValue); end; function TUTF8ToUnicode.GetInputConsumed : TLexerInputConsumed; begin Result := fInputConsumed; end; procedure TUTF8ToUnicode.SetInputConsumed(aValue : TLexerInputConsumed); begin fInputConsumed := aValue; end; { Width by first byte: Binary Hexadecimal Decimal Width 00000000-01111111 00-7F 0-127 1 byte (7 data bits remaining here) 11000010-11011111 C2-DF 194-223 2 bytes (5 data bits remaining here) 11100000-11101111 E0-EF 224-239 3 bytes (4 data bits remaining here) 11110000-11110100 F0-F4 240-244 4 bytes (3 data bits remaining here) the remaining bits of the first byte are used for data. the next bytes, if available, always start with "10..." and always have 6 data bits. 1100000x C0, C1 Overlong encoding: lead byte of a 2-byte sequence, but code point <= 127 11110101 1111011x F5, F6, F7 Restricted by RFC 3629: lead byte of 4-byte sequence for codepoint above 10FFFF 111110xx 1111110x F8, F9, FA, FB, FC, FD Restricted by RFC 3629: lead byte of a sequence 5 or 6 bytes long 1111111x FE, FF Invalid: lead byte of a sequence 7 or 8 bytes long TODO: Note 1 The range D800-DFFF is disallowed by Unicode. The encoding scheme reliably transforms values in that range, but they are not valid scalar values in Unicode. See Table 3-7 in the Unicode 5.0 standard. } function UTF8Length(aFirstItem : Char) : Byte; overload; var fItem : Char; begin fItem := aFirstItem; if Ord(fItem) < $80 then Result := 1 else if Ord(fItem) < $C2 then // oops... raise EConvertError.Create('block was cut in the middle of an UTF-8 sequence') else if Ord(fItem) < $E0 then // what about C0? unused? Result := 2 else if Ord(fItem) < $F0 then // ..$F4 Result := 3 else if Ord(fItem) < $F5 then Result := 4 else raise EConvertError.Create('invalid UTF-8 sequence'); // EParserError end; function UTF8Length(const aCharacter : TUTF8EncodedCharacter) : Byte; overload; // in bytes. var fIndex : Integer; begin Result := UTF8Length(aCharacter[0]); if Result > 1 then for fIndex := 1 to Result - 1 do assert(Ord(aCharacter[fIndex]) >= $80); end; function DecodeUTF8(const aCharacter : TUTF8EncodedCharacter; aOffset : Cardinal = 0) : TUnicodeCodepoint; var fLength : Byte; fIndex : Byte; begin // FIXME check bounds. fLength := UTF8Length(aCharacter[aOffset]); case fLength of 1: Result := Ord(aCharacter[aOffset]); 2: Result := Ord(aCharacter[aOffset]) and $15; 3: Result := Ord(aCharacter[aOffset]) and $0F; 4: Result := Ord(aCharacter[aOffset]) and $7; // if (tInput < $C0) then begin // "10xxxxxx" binary. end; if fLength >= 2 then for fIndex := 1 to fLength - 1 do begin assert((Ord(aCharacter[aOffset + fIndex]) >= $80) and (Ord(aCharacter[aOffset + fIndex]) < $C0)); Result := (Result shl 6) or (Ord(aCharacter[aOffset + fIndex]) and $3F); end; end; {procedure UTF8ExtractNext(var aPointer : PChar; var aCount : Cardinal; out aOutput : TUTF8EncodedCharacter); begin end;} function IsControlCodepoint(aCodepoint : TUnicodeCodepoint) : Boolean; inline; begin Result := aCodepoint < $20; end; // TODO optimize this for the common case. function IsBeginningOfNewCharacter(aCodepoint : TUnicodeCodepoint) : Boolean; inline; begin if IsControlCodepoint(aCodepoint) then // these should be seen as a unit of themselves. Result := True else if (aCodepoint >= $20) and (aCodepoint < $80) then // ASCII Result := True else if (aCodepoint >= $0300) and (aCodepoint <= $036F) then // diacritical marks FOR a character (whose codepoint came before). Result := False else if (aCodepoint >= $1DC0) and (aCodepoint <= $1DFF) then // Combining Diacritical Marks Supplement. Result := False else if (aCodepoint >= $20D0) and (aCodepoint <= $20FF) then // Combining Diacritical Marks for Symbols. Result := False else if (aCodepoint = $3099) or (aCodepoint = $309A) then // Combining Katakana-Hiragana semi-voiced sound mark. FIXME use more of this range. Result := False // http://www.sql-und-xml.de/unicode-database/mn.html else if (aCodepoint >= $FE20) and (aCodepoint <= $FE2F) then // Combining Half Marks. Result := False else if (aCodepoint = $034F) then // combining grapheme joiner. Result := False else if (aCodepoint >= $483) and (aCodepoint <= $486) then // Combining Cyrillic. Result := False else if (aCodepoint >= $591) and (aCodepoint <= $5B9) then // Hebrew Accent. Result := False else if (aCodepoint >= $5BB) and (aCodepoint <= $5BD) then // Hebrew nonspacing points. Result := False else if (aCodepoint = $5BF) then // Hebrew nonspacing. Result := False else if (aCodepoint >= $5C1) and (aCodepoint <= $5C2) then // Hebrew nonspacing points. Result := False else if (aCodepoint >= $5C4) and (aCodepoint <= $5C4) then // Hebrew nonspacing points. Result := False else if (aCodepoint >= $610) and (aCodepoint <= $615) then // FIXME put comment here. Result := False else if (aCodepoint >= $64B) and (aCodepoint <= $658) then // FIXME put comment here. Result := False else if (aCodepoint >= $670) and (aCodepoint <= $670) then // FIXME put comment here. Result := False else if (aCodepoint >= $6D6) and (aCodepoint <= $6DC) then // FIXME put comment here. Result := False else if (aCodepoint >= $6DF) and (aCodepoint <= $6E4) then // FIXME put comment here. Result := False else if (aCodepoint >= $6E7) and (aCodepoint <= $6E8) then // FIXME put comment here. Result := False else if (aCodepoint >= $6EA) and (aCodepoint <= $6ED) then // FIXME put comment here. Result := False else if (aCodepoint >= $711) and (aCodepoint <= $711) then // FIXME put comment here. Result := False else if (aCodepoint >= $730) and (aCodepoint <= $74A) then // FIXME put comment here. Result := False else if (aCodepoint >= $7A6) and (aCodepoint <= $7B0) then // FIXME put comment here. Result := False else if (aCodepoint >= $901) and (aCodepoint <= $902) then // FIXME put comment here. Result := False else if (aCodepoint >= $93C) and (aCodepoint <= $93C) then // FIXME put comment here. Result := False else if (aCodepoint >= $941) and (aCodepoint <= $948) then // FIXME put comment here. Result := False else if (aCodepoint >= $94D) and (aCodepoint <= $94D) then // FIXME put comment here. Result := False else if (aCodepoint >= $951) and (aCodepoint <= $954) then // FIXME put comment here. Result := False else if (aCodepoint >= $962) and (aCodepoint <= $963) then // FIXME put comment here. Result := False else if (aCodepoint >= $981) and (aCodepoint <= $981) then // FIXME put comment here. Result := False else if (aCodepoint >= $9BC) and (aCodepoint <= $9BC) then // FIXME put comment here. Result := False else if (aCodepoint >= $9C1) and (aCodepoint <= $9C4) then // FIXME put comment here. Result := False else if (aCodepoint >= $9CD) and (aCodepoint <= $9CD) then // FIXME put comment here. Result := False else if (aCodepoint >= $9E2) and (aCodepoint <= $9E3) then // FIXME put comment here. Result := False else if (aCodepoint >= $A01) and (aCodepoint <= $A02) then // FIXME put comment here. Result := False else if (aCodepoint >= $A3C) and (aCodepoint <= $A3C) then // FIXME put comment here. Result := False else if (aCodepoint >= $A41) and (aCodepoint <= $A42) then // FIXME put comment here. Result := False else if (aCodepoint >= $A47) and (aCodepoint <= $A48) then // FIXME put comment here. Result := False else if (aCodepoint >= $A4B) and (aCodepoint <= $A4D) then // FIXME put comment here. Result := False else if (aCodepoint >= $A70) and (aCodepoint <= $A71) then // FIXME put comment here. Result := False else if (aCodepoint >= $A81) and (aCodepoint <= $A82) then // FIXME put comment here. Result := False else if (aCodepoint >= $ABC) and (aCodepoint <= $ABC) then // FIXME put comment here. Result := False else if (aCodepoint >= $AC1) and (aCodepoint <= $AC5) then // FIXME put comment here. Result := False else if (aCodepoint >= $AC7) and (aCodepoint <= $AC8) then // FIXME put comment here. Result := False else if (aCodepoint >= $ACD) and (aCodepoint <= $ACD) then // FIXME put comment here. Result := False else if (aCodepoint >= $AE2) and (aCodepoint <= $AE3) then // FIXME put comment here. Result := False else if (aCodepoint >= $B01) and (aCodepoint <= $B01) then // FIXME put comment here. Result := False else if (aCodepoint >= $B3C) and (aCodepoint <= $B3C) then // FIXME put comment here. Result := False else if (aCodepoint >= $B3F) and (aCodepoint <= $B3F) then // FIXME put comment here. Result := False else if (aCodepoint >= $B41) and (aCodepoint <= $B43) then // FIXME put comment here. Result := False else if (aCodepoint >= $B4D) and (aCodepoint <= $B4D) then // FIXME put comment here. Result := False else if (aCodepoint >= $B56) and (aCodepoint <= $B56) then // FIXME put comment here. Result := False else if (aCodepoint >= $B82) and (aCodepoint <= $B82) then // FIXME put comment here. Result := False else if (aCodepoint >= $BC0) and (aCodepoint <= $BC0) then // FIXME put comment here. Result := False else if (aCodepoint >= $BCD) and (aCodepoint <= $BCD) then // FIXME put comment here. Result := False else if (aCodepoint >= $C3E) and (aCodepoint <= $C40) then // FIXME put comment here. Result := False else if (aCodepoint >= $C46) and (aCodepoint <= $C48) then // FIXME put comment here. Result := False else if (aCodepoint >= $C4A) and (aCodepoint <= $C4D) then // FIXME put comment here. Result := False else if (aCodepoint >= $C55) and (aCodepoint <= $C56) then // FIXME put comment here. Result := False else if (aCodepoint >= $CBC) and (aCodepoint <= $CBC) then // FIXME put comment here. Result := False else if (aCodepoint >= $CBF) and (aCodepoint <= $CBF) then // FIXME put comment here. Result := False else if (aCodepoint >= $CC6) and (aCodepoint <= $CC6) then // FIXME put comment here. Result := False else if (aCodepoint >= $CCC) and (aCodepoint <= $CCD) then // FIXME put comment here. Result := False else if (aCodepoint >= $D41) and (aCodepoint <= $D43) then // FIXME put comment here. Result := False else if (aCodepoint >= $D4D) and (aCodepoint <= $D4D) then // FIXME put comment here. Result := False else if (aCodepoint >= $DCA) and (aCodepoint <= $DCA) then // FIXME put comment here. Result := False else if (aCodepoint >= $DD2) and (aCodepoint <= $DD4) then // FIXME put comment here. Result := False else if (aCodepoint >= $DD6) and (aCodepoint <= $DD6) then // FIXME put comment here. Result := False else if (aCodepoint >= $E31) and (aCodepoint <= $E31) then // FIXME put comment here. Result := False else if (aCodepoint >= $E34) and (aCodepoint <= $E3A) then // FIXME put comment here. Result := False else if (aCodepoint >= $E47) and (aCodepoint <= $E4E) then // FIXME put comment here. Result := False else if (aCodepoint >= $EB1) and (aCodepoint <= $EB1) then // FIXME put comment here. Result := False else if (aCodepoint >= $EB4) and (aCodepoint <= $EB9) then // FIXME put comment here. Result := False else if (aCodepoint >= $EBB) and (aCodepoint <= $EBC) then // FIXME put comment here. Result := False else if (aCodepoint >= $EC8) and (aCodepoint <= $ECD) then // FIXME put comment here. Result := False else if (aCodepoint >= $F18) and (aCodepoint <= $F19) then // FIXME put comment here. Result := False else if (aCodepoint >= $F35) and (aCodepoint <= $F35) then // FIXME put comment here. Result := False else if (aCodepoint >= $F37) and (aCodepoint <= $F37) then // FIXME put comment here. Result := False else if (aCodepoint >= $F39) and (aCodepoint <= $F39) then // FIXME put comment here. Result := False else if (aCodepoint >= $F71) and (aCodepoint <= $F7E) then // FIXME put comment here. Result := False else if (aCodepoint >= $F80) and (aCodepoint <= $F84) then // FIXME put comment here. Result := False else if (aCodepoint >= $F86) and (aCodepoint <= $F87) then // FIXME put comment here. Result := False else if (aCodepoint >= $F90) and (aCodepoint <= $F97) then // FIXME put comment here. Result := False else if (aCodepoint >= $F99) and (aCodepoint <= $FBC) then // FIXME put comment here. Result := False else if (aCodepoint >= $FC6) and (aCodepoint <= $FC6) then // FIXME put comment here. Result := False else if (aCodepoint >= $102D) and (aCodepoint <= $1030) then // FIXME put comment here. Result := False else if (aCodepoint >= $1032) and (aCodepoint <= $1032) then // FIXME put comment here. Result := False else if (aCodepoint >= $1036) and (aCodepoint <= $1037) then // FIXME put comment here. Result := False else if (aCodepoint >= $1039) and (aCodepoint <= $1039) then // FIXME put comment here. Result := False else if (aCodepoint >= $1058) and (aCodepoint <= $1059) then // FIXME put comment here. Result := False else if (aCodepoint >= $1712) and (aCodepoint <= $1714) then // FIXME put comment here. Result := False else if (aCodepoint >= $1732) and (aCodepoint <= $1734) then // FIXME put comment here. Result := False else if (aCodepoint >= $1752) and (aCodepoint <= $1753) then // FIXME put comment here. Result := False else if (aCodepoint >= $1772) and (aCodepoint <= $1773) then // FIXME put comment here. Result := False else if (aCodepoint >= $17B7) and (aCodepoint <= $17BD) then // FIXME put comment here. Result := False else if (aCodepoint >= $17C6) and (aCodepoint <= $17C6) then // FIXME put comment here. Result := False else if (aCodepoint >= $17C9) and (aCodepoint <= $17D3) then // FIXME put comment here. Result := False else if (aCodepoint >= $17DD) and (aCodepoint <= $17DD) then // FIXME put comment here. Result := False else if (aCodepoint >= $180B) and (aCodepoint <= $180D) then // FIXME put comment here. Result := False else if (aCodepoint >= $18A9) and (aCodepoint <= $18A9) then // FIXME put comment here. Result := False else if (aCodepoint >= $1920) and (aCodepoint <= $1922) then // FIXME put comment here. Result := False else if (aCodepoint >= $1927) and (aCodepoint <= $1928) then // FIXME put comment here. Result := False else if (aCodepoint >= $1932) and (aCodepoint <= $1932) then // FIXME put comment here. Result := False else if (aCodepoint >= $1939) and (aCodepoint <= $193B) then // FIXME put comment here. Result := False else if (aCodepoint >= $302A) and (aCodepoint <= $302F) then // FIXME put comment here. Result := False else if (aCodepoint >= $FB1E) and (aCodepoint <= $FB1E) then // FIXME put comment here. Result := False else if (aCodepoint >= $FE00) and (aCodepoint <= $FE0F) then // FIXME put comment here. Result := False else Result := True; // TODO more? end; end.