unit PDF_ToUnicode_maps; {PDF parser. Copyright (C) 2008 Danny Milosavljevic This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA } {$MODE OBJFPC} {$M+} interface type TUnicodeCharacter = Cardinal; // don't blame me, most programming languages don't have a builtin type to actually represent a unicode codepoint. //TUnicodeString = array of TUnicodeCharacter; const cNilCodepoint : TUnicodeCharacter = $FFFD; type TRangeEntryP = ^TRangeEntry; TRangeEntry = record NativePrefix : array[0..2] of Char; NativePrefixSize : Byte; NativeBeginning : Char; NativeEnd : Char; UnicodeBeginning : TUnicodeCharacter; end; TRangeEntryArray = array of TRangeEntry; IMap = interface ['{602cb294-f574-11de-8c93-473784b0d928}'] function NextUnicodeCodepoint(var aNativeString : PChar; var aNativeStringSize : Cardinal) : TUnicodeCharacter; function GetID() : Cardinal; // for debugging. procedure SetID(aValue : Cardinal); // for debugging. property ID : Cardinal read GetID write SetID; procedure Dump(); end; TMap = class(TInterfacedObject, IMap) private fRanges : TRangeEntryArray; fID : Cardinal; public constructor Create(); destructor Destroy(); override; published function NextUnicodeCodepoint(var aNativeString : PChar; var aNativeStringSize : Cardinal) : TUnicodeCharacter; inline; //function GetUnicodeCodepoints(aNativeString : String) : TUnicodeString; procedure AddRange(aNativeBeginning : String; aNativeEnd : String; aUnicodeBeginning : TUnicodeCharacter); inline; function GetID() : Cardinal; // for debugging. procedure SetID(aValue : Cardinal); // for debugging. property ID : Cardinal read GetID write SetID; procedure Dump(); end; implementation uses sysutils; { TMap } constructor TMap.Create(); begin SetLength(fRanges, 0); end; destructor TMap.Destroy(); begin inherited Destroy(); end; function TMap.NextUnicodeCodepoint(var aNativeString : PChar; var aNativeStringSize : Cardinal) : TUnicodeCharacter; inline; var vIndex : Integer; vNativeCharacter : Char; vPrefixCMP : Integer; begin Result := cNilCodepoint; assert(aNativeStringSize > 0); for vIndex := Low(fRanges) to High(fRanges) do begin with fRanges[vIndex] do begin if (NativePrefixSize > 0) and (aNativeStringSize >= NativePrefixSize) then begin vPrefixCMP := CompareMemRange(@NativePrefix[0], aNativeString, NativePrefixSize); end else if (NativePrefixSize > 0) { and not (aNativeStringSize >= NativePrefixSize)} then vPrefixCMP := 1 else vPrefixCMP := 0; if (vPrefixCMP = 0) and (aNativeStringSize > NativePrefixSize) then begin vNativeCharacter := (aNativeString + NativePrefixSize)^; if (vNativeCharacter >= NativeBeginning) and (vNativeCharacter <= NativeEnd) then begin Inc(aNativeString, NativePrefixSize + 1); Dec(aNativeStringSize, NativePrefixSize + 1); Result := UnicodeBeginning + Ord(vNativeCharacter) - Ord(NativeBeginning); Break; end; end; end; end; end; procedure TMap.AddRange(aNativeBeginning : String; aNativeEnd : String; aUnicodeBeginning : TUnicodeCharacter); var vPrefixIndex : Integer; vPrefixLength : Integer; begin // common prefix? vPrefixLength := Length(aNativeBeginning) - 1; assert(vPrefixLength = Length(aNativeEnd) - 1); assert(Copy(aNativeBeginning, 1, vPrefixLength) = Copy(aNativeEnd, 1, vPrefixLength)); SetLength(fRanges, Length(fRanges) + 1); with fRanges[High(fRanges)] do begin assert(vPrefixLength <= Length(NativePrefix)); for vPrefixIndex := 0 to vPrefixLength - 1 do NativePrefix[vPrefixIndex] := aNativeBeginning[1 + vPrefixIndex]; NativePrefixSize := vPrefixLength; NativeBeginning := aNativeBeginning[1 + vPrefixLength]; NativeEnd := aNativeEnd[1 + vPrefixLength]; UnicodeBeginning := aUnicodeBeginning; end; end; function TMap.GetID() : Cardinal; // for debugging. begin Result := fID; end; procedure TMap.SetID(aValue : Cardinal); // for debugging. begin fID := aValue; end; procedure TMap.Dump(); var i : Integer; begin Writeln(Format('Map %d is :
', [ID])); for i := Low(fRanges) to High(fRanges) do begin with fRanges[i] do Writeln(Format('%d %d..%d => %d
', [NativePrefixSize, Ord(NativeBeginning), Ord(NativeEnd), UnicodeBeginning])); end; Writeln('End of map.
'); end; end.