unit XMLParser;
{$MODE OBJFPC}
{$M+}
interface
uses scanners, DOM, type_fixes, contnrs, classes;
// .
// TODO UTF-16, other encodings?
// TODO XML namespaces.
// TODO \x0D removal.
// FIXME put all the Result statements into a try...except to free it in the error case.
{ TODO Parameter-entity references use percent-sign (%) and semicolon (;) as delimiters.]
// TODO Namespaces .
Example of a parameter-entity reference:
%ISOLat2;
Entities in attribute values?
}
{ TODO includes:
]]>
]]>
}
type
TXMLDecl = record // private.
Encoding : TDOMString;
Version : TDOMString;
BStandalone : Boolean;
end;
TBase = 8..16;
// TODO rename to "Occurences" or something...
TDocumentTypeElementBodyFactorRepetition = (reOne, reZeroOrOne, reZeroOrMore, reOneOrMore); // public.
TDocumentTypeElementBodyFactor = class(TDocumentTypeBodyItem)
private
fItem : TDocumentTypeBodyItem;
fRepetition : TDocumentTypeElementBodyFactorRepetition;
published
property Item : TDocumentTypeBodyItem read fItem;
property Repetition : TDocumentTypeElementBodyFactorRepetition read fRepetition write fRepetition;
constructor Create(aItem : TDocumentTypeBodyItem; aRepetition : TDocumentTypeElementBodyFactorRepetition = reOne);
end;
TDocumentTypeElementBodyElementReference = class(TDocumentTypeBodyItem)
private
fTagName : TDOMString;
published
property TagName : TDOMString read fTagName;
constructor Create(aTagName : TDOMString);
end;
type
TDocumentTypeElementBodyChoice = class(TDocumentTypeBodyItem)
private
fList : TObjectList;
public
destructor Destroy(); override;
published
constructor Create();
procedure Add(vItem : TDocumentTypeBodyItem);
end;
TDocumentTypeElementBodySequence = class(TDocumentTypeBodyItem)
private
fList : TObjectList;
public
destructor Destroy(); override;
published
constructor Create();
published
procedure Append(vItem : TDocumentTypeBodyItem);
end;
TDocumentTypeElementBodyEmpty = class(TDocumentTypeBodyItem)
end;
TDocumentTypeElementBodyAny = class(TDocumentTypeBodyItem)
end;
TDocumentTypePCData = class(TDocumentTypeBodyItem)
end;
TDocumentTypeAttributeDeclarations = TObjectList{TDocumentTypeAttributeDeclaration};
TExternalID = record
SystemID : TDOMString;
PublicID : TDOMString; // ='' => not public.
end;
TParser = class(TScanner, IInterface)
private
fDocument : TDocument;
property Document : TDocument read fDocument;
published
//constructor Create();
function Parse() : TDocument;
protected
procedure AddDefaultEntities(aDocumentType { IO } : TDocumentType); virtual; // override for XHTML / HTML?
protected
function Comment() : TComment;
function CDATASection() : TCDATASection;
function Element() : TElement;
function EndElement() : TDOMString;
function Text() : TText;
function ProcessingInstruction() : TProcessingInstruction;
function Attribute() : TAttr; inline;
function Attributes() : TNamedNodeMap;
function QuotedAttributeValue() : TDOMString;
//function EntityReference() : TEntityReference;
function ChildNodes(out aPending : Char) : TNodeList;
function ToplevelChildNodes() : TNodeList;
function OptionalChildNode(out aPending : Char) : TNode;
function NMToken() : TDOMString;
function OptionalNameStartChar() : TDOMString; inline;
function OptionalNameChar() : TDOMString; inline;
function Name() : TDOMString;
function PositiveInteger(aBase : TBase) : TCardinal; inline;
procedure Whitespace(); inline;
function WhitespaceP() : Boolean; inline;
procedure OptionalWhitespace(); inline;
function EncName() : TDOMString;
function XMLDecl() : TXMLDecl;
procedure EQ(); inline;
function DocumentType() : TDocumentType;
function EntityReference() : TDOMString; inline;
// meta (doctype) layer:
function DocumentTypeSubset() : TNodeList; // not self-recursive.
function DocumentTypeSubsetElement() : TObjectList{TNode}; // not self-recursive.
function DocumentTypePEReference() : TDOMString;
function DocumentTypeElementBody() : TDocumentTypeElementDeclaration;
function DocumentTypeEntityBody() : TNode; // PE or not PE.
function DocumentTypeATTListBody() : TDocumentTypeAttributeDeclarations;
function DocumentTypeNotationBody() : TNotation;
function ExternalID() : TExternalID;
end;
TDocumentTypeBodyPEEntity = class(TNode)
private
fPublicID : TDOMString;
fSystemID : TDOMString;
fContents : TDOMString;
public
constructor Create(aOwnerDocument : TDocument; aNodeName, aPublicID, aSystemID : TDOMString; aContents : TDOMString);
published
property Contents : TDOMString read fContents;
published // official DOM Level 1 core interface:
property publicId : TDOMString read fPublicID;
property systemId : TDOMString read fSystemID;
end;
implementation
uses sysutils;
{ TParser }
procedure TParser.AddDefaultEntities(aDocumentType { IO } : TDocumentType);
begin
with aDocumentType do begin
if not HasEntityP('lt') then
// constructor Create(aOwnerDocument : TDocument; aNodeName, aPublicID, aSystemID, aNotationName : TDOMString);
AddEntity(TEntity.Create(Document, 'lt', {public ID FIXME}'', '', '', #60)); // FIXME #38 ??
if not HasEntityP('gt') then
AddEntity(TEntity.Create(Document, 'gt', '', '', '', #62));
if not HasEntityP('amp') then
AddEntity(TEntity.Create(Document, 'amp', '', '', '', #38));
if not HasEntityP('apos') then
AddEntity(TEntity.Create(Document, 'apos', '', '', '', #39));
if not HasEntityP('quot') then
AddEntity(TEntity.Create(Document, 'quot', '', '', '', #34));
{
}
end;
end;
{ for the document; Like "ChildNodes", but also handles DOCTYPE. }
function TParser.ToplevelChildNodes() : TNodeList;
var
vDocumentType : TDocumentType;
vDocumentElement : TElement;
begin
Result := TNodeList.Create(True);
try
repeat
OptionalWhitespace();
if Input = '<' then begin
Consume();
if Input = '!' then begin
Consume();
if Input = 'D' then begin
vDocumentType := DocumentType();
if Document.doctype <> nil then
Error('', '');
Document.SetDocumentType(vDocumentType);
Result.Append(vDocumentType);
end else
Result.Append(Comment());
end else if Input = '?' then begin
Consume();
Result.Append(ProcessingInstruction());
end else begin
if Assigned(Document.documentElement) then begin
Error('', '');
end;
vDocumentElement := Element();
Document.SetDocumentElement(vDocumentElement);
Result.Append(vDocumentElement);
end;
end else
Break;
until False;
except
FreeAndNil(Result);
raise;
end;
end;
{ aPending will be #0 if nothing is pending, otherwise the pending char ('<'). }
function TParser.OptionalChildNode(out aPending : Char) : TNode;
begin
aPending := #0;
if Input = '<' then begin
Consume();
if Input = '/' then begin // whooops. this is the end tag of the parent's Element and we just threw the '<' away.
aPending := '<';
Result := nil;
Exit;
end;
if Input = '!' then begin
Consume();
if Input = '[' then
Result := CDATASection()
else
Result := Comment();
end else if Input = '?' then
Result := ProcessingInstruction() // do not process entity references!
else
Result := Element();
end else if Input = '&' then
Result := Document.createTextNode(EntityReference()) // TODO merge?
else
Result := Text();
end;
{ for elements. }
function TParser.ChildNodes(out aPending : Char) : TNodeList;
var
vNode : TNode;
begin
aPending := #0;
// content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
Result := TNodeList.Create(True);
try
repeat
vNode := OptionalChildNode(aPending);
if Assigned(vNode) then
Result.Append(vNode);
until vNode = nil;
// note that because of the infinite wisdom *cough* of XML, we just consumed the '<' of ''.
except
FreeAndNil(Result);
raise;
end;
end;
function TParser.Comment() : TComment;
var
vValue : TDOMString;
begin
vValue := '';
Consume('--');
// FIXME lookahead more.
while BInput and (Input <> '-') do begin
vValue := vValue + Input;
Consume();
end;
// FIXME find wrong in: ''
Consume('-->');
Result := Document.createComment(vValue);
end;
function TParser.CDATASection() : TCDATASection;
var
vValue : TDOMString;
begin
Consume('[CDATA[');
vValue := '';
// FIXME lookahead more.
while BInput and (Input <> ']') do begin
vValue := vValue + Input;
Consume();
end;
Consume(']]>');
Result := Document.createCDATASection(vValue);
end;
function TParser.EndElement() : TDOMString;
begin // after the "<".
Result := '';
Consume('/');
// FIXME whitespace?
while BInput and (Input <> '>') and not WhitespaceP() do begin
Result := Result + Consume();
end;
OptionalWhitespace();
Consume('>');
end;
function TParser.Element() : TElement;
var
vBEmpty : Boolean;
vName : TDOMString;
vEndName : TDOMString;
vPending : Char;
begin // after the "<".
vName := Name();
Result := Document.createElement(vName);
try
Result.SetAttributes(Attributes());
OptionalWhitespace();
vBEmpty := (Input = '/');
if vBEmpty then begin
Consume();
end;
Consume('>');
if not vBEmpty then begin
Result.SetChildNodes(ChildNodes(vPending));
if vPending <> '<' then begin // 'ChildNodes will return whether it accidentially consumed something it shouldn't have.
Consume('<');
end;
vEndName := EndElement();
if vEndName <> vName then
Error(Format('%s>', [vName]), Format('%s>', [vEndName]));
end;
except
FreeAndNil(Result);
raise;
end;
end;
function TParser.OptionalNameStartChar() : TDOMString; inline;
begin
case Input of
':', 'A'..'Z', '_', 'a'..'z':
// FIXME: [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
begin
Result := Consume();
end;
else
Result := '';
end;
end;
function TParser.OptionalNameChar() : TDOMString; inline;
begin
if Input in [':', 'A'..'Z', '_', 'a'..'z', '-', '.', '0'..'9']
// FIXME: #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
// FIXME: [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
then
Result := Consume()
else
Result := OptionalNameStartChar();
end;
function TParser.Name() : TDOMString;
var
vChar : TDOMString;
begin
Result := OptionalNameStartChar();
if Result = '' then
Error('');
repeat
vChar := OptionalNameChar();
if Length(vChar) > 0 then
Result := Result + vChar;
until vChar = '';
end;
function TParser.NMToken() : TDOMString; // enumeration?
var
vChar : TDOMString;
begin
Result := OptionalNameChar();
if Result = '' then
Error('');
repeat
vChar := OptionalNameChar();
if Length(vChar) > 0 then
Result := Result + vChar;
until vChar = '';
end;
function TParser.Text() : TText;
var
vValue : TDOMString;
begin
vValue := '';
while BInput and ((Input in [#9, #10, #13]) or ((Input >= #32) and (Input <> '<') and (Input <> '>') and (Input <> '&'))) do begin
vValue := vValue + Consume();
end;
Result := Document.createTextNode(vValue);
// TODO Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
end;
function TParser.ProcessingInstruction() : TProcessingInstruction;
var
vValue : TDOMString;
vTarget : TDOMString;
begin
// do not process entity references!
Consume('?');
vTarget := Name();
Whitespace();
vValue := '';
while BInput and (Input <> '?') do begin
vValue := vValue + Consume();
end;
Consume('?>');
Result := Document.createProcessingInstruction(vTarget, vValue);
end;
function TParser.WhitespaceP() : Boolean; inline;
begin
Result := Input in [#32, #9, #13, #10];
end;
procedure TParser.Whitespace(); inline;
begin
if Input in [#32, #9, #13, #10] then // #13 isn't really supposed to be there.
Consume()
else
Consume(#32);
OptionalWhitespace();
end;
procedure TParser.OptionalWhitespace(); inline;
begin
while Input in [#32, #9, #13, #10] do // #13 isn't really supposed to be there.
Consume();
end;
function TParser.QuotedAttributeValue() : TDOMString;
var
vExpectedEndingQuote : Char;
begin
//AttValue ::= '"' ([^<&"] | Reference)* '"'
Result := '';
if Input in ['''', '"'] then begin
vExpectedEndingQuote := Input;
Consume();
while BInput and not (Input in [vExpectedEndingQuote, '<']) do begin
if Input = '&' then
Result := Result + EntityReference() // implies charref.
else
Result := Result + Consume();
end;
Consume(vExpectedEndingQuote);
end else
Error('');
end;
function TParser.Attributes() : TNamedNodeMap;
begin
Result := TNamedNodeMap.Create();
try
repeat
OptionalWhitespace();
if (Input = '>') or (Input = '/') then
Exit;
// FIXME handle duplicates?
Result.setNamedItem(Attribute());
until (Input = '>') or (Input = '/');
except
FreeAndNil(Result);
raise;
end;
end;
function TParser.Attribute() : TAttr; inline;
var
vName : TDOMString;
vValue : TDOMString;
begin
vName := Name();
EQ();
vValue := QuotedAttributeValue();
Result := Document.createAttribute(vName);
try
Result.NodeValue := vValue;
except
FreeAndNil(Result);
raise;
end;
end;
function TParser.PositiveInteger(aBase : TBase) : TCardinal; inline;
const
ValidCharacters : String = '0123456789ABCDEF';
ValidCharacters2 : String = '0123456789abcdef';
var
vDigit : Integer;
begin
vDigit := Pos(Input, ValidCharacters);
if vDigit = 0 then
vDigit := Pos(Input, ValidCharacters2);
if vDigit = 0 then
Error('');
Result := vDigit;
repeat
vDigit := Pos(Input, ValidCharacters);
if vDigit = 0 then
vDigit := Pos(Input, ValidCharacters2);
if vDigit <> 0 then
Result := Result * aBase + Cardinal(vDigit - 1);
until vDigit = 0;
end;
function TParser.EncName() : TDOMString;
begin
if Input in ['a'..'z', 'A'..'Z'] then
Result := Consume()
else
Error('');
while Input in ['a'..'z', 'A'..'Z', '0'..'9', '.', '_', '-'] do
Result := Result + Consume();
end;
procedure TParser.EQ(); inline;
begin
OptionalWhitespace();
Consume('=');
OptionalWhitespace();
end;
function TParser.XMLDecl() : TXMLDecl;
begin
Consume(' '1.0' then
Error('1.0');
if Input in [#32, #9, #13, #10] then begin
Whitespace();
Consume('encoding');
EQ();
Consume('"');
Result.Encoding := EncName();
Consume('"');
end;
if Input in [#32, #9, #13, #10] then begin
Whitespace();
Consume('standalone');
EQ();
if QuotedAttributeValue() <> 'no' then
Result.BStandalone := True;
end;
OptionalWhitespace();
Consume('?>');
OptionalWhitespace();
end;
function TParser.EntityReference() : TDOMString; inline;
//function TParser.EntityReference() : TEntityReference;
var
vCode : TCardinal;
vName : TDOMString;
vEntity : TEntity;
begin
Consume('&');
if Input = '#' then begin // Char ref.
if Input = 'x' then begin
Consume();
vCode := PositiveInteger(16);
end else
vCode := PositiveInteger(10);
// FIXME encoding...
Result := Chr(vCode);
Exit;
//vName := Format('#%u', [vCode]); // make sure there are no heading zeroes and it actually is a number.
// TODO handle them right here?
end else begin
vName := Name();
end;
Consume(';');
// TODO validate: Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
//Result := Document.createEntityReference(vName);
assert(Assigned(Document.doctype)); // FIXME is that always there?
vEntity := Document.doctype.GetEntity(vName);
if Assigned(vEntity) then
Result := vEntity.Contents
else
Error('', '&' + Result + ';');
end;
// inside DTD only.
function TParser.DocumentTypePEReference() : TDOMString;
begin
Consume('%');
Result := Name();
Consume(';');
// FIXME dereference.
end;
function TParser.DocumentTypeSubsetElement() : TObjectList;
begin
Result := TObjectList.Create(True);
try
if Input = '<' then begin
Consume();
if Input = '?' then begin
Result.Add(ProcessingInstruction());
end else begin
Consume('!');
if Input = '-' then
Result.Add(Comment()) // after '!'.
else begin // the interesting part is here:
if Input = 'E' then begin
Consume();
if Input = 'L' then begin
Consume('LEMENT');
Result.Add(DocumentTypeElementBody());
end else if input = 'N' then begin
Consume('NTITY');
Result.Add(DocumentTypeEntityBody());
end else begin
Consume('');
end;
end else if Input = 'A' then begin
Consume('ATTLIST');
FreeAndNil(Result);
Result := DocumentTypeATTListBody();
end else if Input = 'N' then begin
Consume('NOTATION');
Result.Add(DocumentTypeNotationBody());
end;
end;
end;
end else
Consume('<'); // error.
except
FreeAndNil(Result);
raise;
end;
end;
function TParser.DocumentTypeSubset() : TNodeList;
var
vItemSequence : TObjectList; // NOT subitems.
vItemIndex : Integer;
begin
Result := TNodeList.Create(True);
try
while BInput and (Input <> ']') do begin
OptionalWhitespace();
while Input = '%' do begin
{FIXME handle parameter-entity references} DocumentTypePEReference();
OptionalWhitespace();
end;
// PEReference() like whitespace?
if Input = ']' then
Break;
vItemSequence := DocumentTypeSubsetElement();
vItemSequence.OwnsObjects := False;
if vItemSequence.Count > 0 then
for vItemIndex := 0 to vItemSequence.Count - 1 do
Result.Append(TNode(vItemSequence[vItemIndex]));
//=Result.Append(DocumentTypeSubsetElement());
end;
except
FreeAndNil(Result);
raise;
end;
end;
// TODO support ''
function TParser.DocumentTypeElementBody() : TDocumentTypeElementDeclaration;
function OptionalRepetition() : TDocumentTypeElementBodyFactorRepetition; inline;
begin
Result := reOne;
if Input = '?' then begin
Consume();
Result := reZeroOrOne
end else if Input = '*' then begin
Consume();
Result := reZeroOrMore;
end else if Input = '+' then begin
Consume();
Result := reOneOrMore;
end;
// else ok, something else: keep it.
end;
function children() : TDocumentTypeElementBodyFactor; forward;
function cp() : TDocumentTypeElementBodyFactor;
var
vItem : TDocumentTypeBodyItem;
vRepetition : TDocumentTypeElementBodyFactorRepetition;
begin
// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
if Input = '(' then
Result := children()
else begin
vItem := TDocumentTypeElementBodyElementReference.Create(Name());
try
vRepetition := OptionalRepetition();
Result := TDocumentTypeElementBodyFactor.Create(vItem, vRepetition);
vItem := nil;
except
FreeAndNil(vItem);
raise;
end;
end;
end;
function children() : TDocumentTypeElementBodyFactor;
var
vFirstItem : TDocumentTypeElementBodyFactor;
vItem : TDocumentTypeElementBodyFactor;
vBPCData : Boolean;
begin
// we don't know whether it's supposed to be Choice or Sequence here, so keep temp variables until we know...
Consume('(');
OptionalWhitespace();
vBPCData := False;
if Input = '#' then begin // TODO toplevel only.
Consume('#PCDATA');
vBPCData := True;
vFirstItem := TDocumentTypeElementBodyFactor.Create(TDocumentTypePCData.Create(), reOne);
//OptionalWhitespace();
{Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
| '(' S? '#PCDATA' S? ')'}
end else
vFirstItem := cp();
try
OptionalWhitespace();
// finally decide, adding the first item...
if Input = '|' then begin // choice:
Result := TDocumentTypeElementBodyFactor.Create(TDocumentTypeElementBodyChoice.Create());
TDocumentTypeElementBodyChoice(Result.Item).Add(vFirstItem);
vFirstItem := nil;
while Input = '|' do begin
Consume();
OptionalWhitespace();
vItem := cp();
TDocumentTypeElementBodyChoice(Result.Item).Add(vItem);
OptionalWhitespace();
end;
end else begin // sequence (or single element).
Result := TDocumentTypeElementBodyFactor.Create(TDocumentTypeElementBodySequence.Create());
TDocumentTypeElementBodySequence(Result.Item).Append(vFirstItem);
vFirstItem := nil;
if (Input = ',') and (vBPCData) then begin
Error(')', '');
end;
try
while Input = ',' do begin
Consume();
OptionalWhitespace();
TDocumentTypeElementBodySequence(Result.Item).Append(cp());
OptionalWhitespace();
end;
except
FreeAndNil(Result.Item);
raise;
end;
end;
//[47] children ::= (choice | seq) ('?' | '*' | '+')?
//[49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' [VC: Proper Group/PE Nesting]
//[50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' [VC: Proper Group/PE Nesting]
OptionalWhitespace();
Consume(')');
Result.Repetition := OptionalRepetition();
except
FreeAndNil(vFirstItem);
raise;
end;
end;
function Content() : TDocumentTypeBodyItem;
begin
if Input = 'E' then begin
Consume('EMPTY');
Result := TDocumentTypeElementBodyEmpty.Create(); // TODO singleton.
end else if Input = 'A' then begin
Consume('ANY');
Result := TDocumentTypeElementBodyAny.Create(); // TODO singleton.
end else
Result := children();
end;
var
vName : TDOMString;
vContent : TDocumentTypeBodyItem;
begin
Whitespace();
vName := Name();
Whitespace();
vContent := Content();
Result := TDocumentTypeElementDeclaration.Create(Document, vName, vContent);
try
OptionalWhitespace();
Consume('>');
except
FreeAndNil(Result);
raise;
end;
end;
function TParser.DocumentTypeEntityBody() : TNode;
function EntityValue() : TDOMString;
var
vExpectedEndingQuote : Char;
begin
if Input = '"' then
vExpectedEndingQuote := Consume()
else if Input = '''' then
vExpectedEndingQuote := Consume()
else
Consume('"');
while (BInput) and (Input <> vExpectedEndingQuote) do begin
if Input = '&' then
Result := Result + EntityReference()
else if Input = '%' then
Result := Result + DocumentTypePEReference()
else
Result := Result + Consume();
end;
Consume(vExpectedEndingQuote);
end;
function NDATADecl() : TDOMString;
begin
Consume('NDATA');
Whitespace();
Result := Name();
end;
function EntityDef(aName : TDOMString) : TEntity;
var
vID : TExternalID;
vNDATA : TDOMString;
vContents : TDOMString;
begin
if Input in [ '"', '''' ] then
Result := TEntity.Create(Document, aName, ''{FIXME}, ''{FIXME}, ''{FIXME}, EntityValue())
else begin
vID := ExternalID();
vNDATA := '';
if Input in [#32, #9, #13, #10] then begin
Whitespace();
if Input = 'N' then begin
vNDATA := NDATADecl();
end;
end;
vContents := ''; // FIXME.
Result := TEntity.Create(Document, aName, vID.PublicID, vID.SystemID, vNDATA, vContents);
end;
end;
function PEDef(aName : TDOMString) : TDocumentTypeBodyPEEntity;
var
vID : TExternalID;
vContents : TDOMString;
begin
if Input in [ '"', '''' ] then
Result := TDocumentTypeBodyPEEntity.Create(Document, aName, ''{FIXME}, ''{FIXME}, EntityValue())
else begin // external.
vID := ExternalID();
end;
vContents := ''; // FIXME.
Result := TDocumentTypeBodyPEEntity.Create(Document, aName, vID.PublicID, vID.SystemID, vContents);
end;
var
vName : TDOMString;
begin
Whitespace();
if Input = '%' then begin // PEDecl:
Consume();
Whitespace();
vName := Name();
Whitespace();
Result := PEDef(vName);
end else begin // GEDecl:
vName := Name();
Whitespace();
Result := EntityDef(vName);
end;
OptionalWhitespace();
Consume('>');
end;
function TParser.DocumentTypeATTListBody() : TDocumentTypeAttributeDeclarations;
//
function TokenizedType(aAttributeName, aElementName : TDOMString) : TDocumentTypeAttributeDeclaration; inline;
var
vBaseType : TDocumentTypeBodyAttributeBaseType;
begin
if Input = 'I' then begin
Consume('D');
if Input = 'R' then begin
Consume('REF');
if Input = 'S' then begin
Consume();
vBaseType := atIDREFS;
end else begin // ok, just 'IDREF'.
vBaseType := atIDREF;
end;
end else begin // ok, just "ID".
vBaseType := atID;
end;
end else if Input = 'E' then begin
Consume('ENTIT');
if Input = 'Y' then begin
Consume();
vBaseType := atEntity;
end else begin
Consume('IES');
vBaseType := atEntities;
end;
end else if Input = 'N' then begin
Consume('NMTOKEN');
if Input = 'S' then begin
Consume();
vBaseType := atNMTOKENS;
end else begin // ok, just "NMTOKEN".
vBaseType := atNMTOKEN;
end;
end else
Error('');
Result := TDocumentTypeAttributeDeclaration.Create(Document, aAttributeName, aElementName, vBaseType);
end;
function Enumeration(aAttributeName, aElementName : TDOMString) : TDocumentTypeAttributeBodyEnumeration;
var
vFirstItem : TDOMString;
vItem : TDOMString;
begin
Consume('(');
OptionalWhitespace();
vFirstItem := NMToken();
Result := TDocumentTypeAttributeBodyEnumeration.Create(Document, aAttributeName, aElementName);
try
Result.Append(vFirstItem);
OptionalWhitespace();
while Input = '|' do begin
Consume();
OptionalWhitespace();
vItem := NMToken();
Result.Append(vItem);
OptionalWhitespace();
end;
Consume(')');
// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration]
// [VC: No Duplicate Tokens]
except
FreeAndNil(Result);
FreeAndNil(vFirstItem);
raise;
end;
end;
function NotationType(aAttributeName, aElementName : TDOMString) : TDocumentTypeBodyNotationChoice;
function GetNotation(const aName : TDOMString) : TNotation;
begin
assert(Assigned(Document.doctype));
Result := Document.doctype.GetNotation(aName);
if Result = nil then
Error('', aName);
end;
var
vFirstItem : TDOMString;
vItem : TDOMString;
begin
Consume('NOTATION');
Whitespace();
Consume('(');
OptionalWhitespace();
vFirstItem := Name();
try
OptionalWhitespace();
Result := TDocumentTypeBodyNotationChoice.Create(Document, aAttributeName, aElementName);
Result.Add(GetNotation(vFirstItem));
try
while Input = '|' do begin
Consume();
OptionalWhitespace();
vItem := Name();
Result.Add(GetNotation(vItem)); // no dupes.
OptionalWhitespace();
end;
Consume(')');
except
FreeAndNil(Result);
raise;
end;
except
raise;
end;
//[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
{ [VC: Notation Attributes]
[VC: One Notation Per Element Type]
[VC: No Notation on Empty Element]
[VC: No Duplicate Tokens]}
end;
function EnumeratedType(aAttributeName, aElementName : TDOMString) : TDocumentTypeAttributeDeclaration;
begin
if Input = 'N' then
Result := NotationType(aAttributeName, aElementName)
else
Result := Enumeration(aAttributeName, aElementName);
end;
function DefaultDecl() : TDocumentTypeBodyAttributeDefault;
var
vImportance : TDocumentTypeBodyAttributeDefaultImportance;
vValue : TDOMString;
begin
vValue := '';
vImportance := imOptional;
if Input = '#' then begin
Consume();
if Input = 'R' then begin
Consume('REQUIRED');
vImportance := imRequired;
end else if Input = 'I' then begin
Consume('IMPLIED');
vImportance := imImplied;
end else begin
vImportance := imFixed;
if Input = 'F' then begin
Consume('FIXED');
Whitespace();
end else
Error('');
vValue := QuotedAttributeValue();
end;
end else begin
vValue := QuotedAttributeValue();
end;
Result := TDocumentTypeBodyAttributeDefault.Create(vImportance, vValue);
end;
function ATTType(aAttributeName, aElementName : TDOMString) : TDocumentTypeAttributeDeclaration; inline;
var
vDefault : TDocumentTypeBodyAttributeDefault;
begin
if Input = 'C' then begin
Consume('CDATA');
Result := TDocumentTypeAttributeDeclaration.Create(Document, aAttributeName, aElementName, atString);
end else if Input in [ 'I', 'E', 'N' ] then begin // tokenized type.
Result := TokenizedType(aAttributeName, aElementName);
end else
Result := EnumeratedType(aAttributeName, aElementName);
// Result := TDocumentTypeAttributeDeclaration.Create(Document, vAttributeName, aElementName, vType, vDefault);
try
try
Whitespace();
vDefault := DefaultDecl();
Result.Default_ := vDefault;
vDefault := nil;
except
FreeAndNil(vDefault);
raise;
end;
except
FreeAndNil(vDefault);
FreeAndNil(Result);
raise;
end;
end;
function ATTDEF(aElementName : TDOMString) : TDocumentTypeAttributeDeclaration{s};
var
vAttributeName : TDOMString;
begin
vAttributeName := Name();
Whitespace();
Result := ATTType(vAttributeName, aElementName);
end;
var
vElementName : TDOMString;
begin
Whitespace();
vElementName := Name();
Whitespace();
Result := TDocumentTypeAttributeDeclarations.Create(True);
try
while BInput and (Input <> '>') do begin
Result.Add(ATTDEF(vElementName));
if not WhitespaceP() then
Break;
Whitespace();
end;
Consume('>');
except
FreeAndNil(Result);
raise;
end;
end;
function TParser.ExternalID() : TExternalID;
function SystemLiteral() : TDOMString;
var
vExpectedEndingQuote : Char;
begin
if Input = '"' then
vExpectedEndingQuote := Consume()
else if Input = '''' then
vExpectedEndingQuote := Consume()
else
Consume('"');
Result := '';
while BInput and (Input <> vExpectedEndingQuote) do begin
Result := Result + Consume();
end;
Consume(vExpectedEndingQuote);
end;
// returns: #0: not found.
function OptionalPubidChar(aNot : Char) : Char; inline;
begin
if (Input <> aNot) and (Input in [#20, #13, #10, 'a'..'z', 'A'..'Z', '0'..'9', '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*', '#', '@', '$', '_', '%']) then
Result := Consume()
else
Result := #0;
end;
function PubidLiteral() : TDOMString;
var
vExpectedEndingQuote : Char;
vChar : Char;
begin
if Input = '"' then
vExpectedEndingQuote := Consume()
else if Input = '''' then
vExpectedEndingQuote := Consume()
else
Consume('"');
Result := '';
repeat
vChar := OptionalPubidChar(vExpectedEndingQuote);
if vChar <> #0 then
Result := Result + vChar;
until vChar = #0;
Consume(vExpectedEndingQuote);
end;
begin
if Input = 'S' then begin
Consume('SYSTEM');
Whitespace();
Result.SystemID := SystemLiteral();
Result.PublicID := '';
end else if Input = 'P' then begin
Consume('PUBLIC');
Whitespace();
Result.PublicID := PubidLiteral();
Whitespace();
Result.SystemID := SystemLiteral();
end else
Error('');
end;
function TParser.DocumentTypeNotationBody() : TNotation;
var
vID : TExternalID;
vName : TDOMString;
begin
Whitespace();
vName := Name();
Whitespace();
vID := ExternalID();
//(ExternalID | PublicID FIXME )
//[83] PublicID ::= 'PUBLIC' S PubidLiteral // ExternalID looks similar.
OptionalWhitespace();
Consume('>');
Result := TNotation.Create(Document, vName, vID.PublicID, vID.SystemID);
end;
function TParser.DocumentType() : TDocumentType;
var
vName : TDOMString;
vNodeList : TNodeList;
vID : TExternalID;
begin
Consume('DOCTYPE');
Whitespace();
vName := Name();
// '[' then begin
vID := ExternalID();
// FIXME use.
end; // else misdetected, will handle later (just below).
end;
OptionalWhitespace();
if Input = '[' then begin
Consume('[');
vNodeList := DocumentTypeSubset(); // FIXME.
try
Consume(']');
OptionalWhitespace();
except
FreeAndNil(vNodeList);
raise;
end;
end else
vNodeList := nil;
try
Consume('>');
// FIXME error handling.
Result := TDocumentType.Create(Document, vName, vNodeList);
vNodeList := nil;
except
FreeAndNil(vNodeList);
raise;
end;
AddDefaultEntities(Result);
end;
function TParser.Parse() : TDocument;
var
vXMLDecl : TXMLDecl;
begin
vXMLDecl := XMLDecl(); // TODO make optional.
{
Prolog
[22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
[27] Misc ::= Comment | PI | S
Document Type Definition
[28] doctypedecl ::= '' [VC: Root Element Type]
[WFC: External Subset]
[28a] DeclSep ::= PEReference | S [WFC: PE Between Declarations]
[28b] DocumentTypeSubset ::= (markupdecl | DeclSep)*
[29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment [VC: Proper Declaration/PE Nesting]
The document type declaration MUST appear before the first element in the document. [WFC: PEs in Internal Subset]
}
assert(fDocument = nil);
Result := TDocument.Create();
fDocument := Result;
// magic: OptionalWhitespace() '<' ProcessingInstruction()
try
Result.SetChildNodes(ToplevelChildNodes());
except
FreeAndNil(Result);
raise;
end;
// assert PI.
// assert single document element.
// assert doctype?
// assert DTD / schema?
end;
{ TDocumentTypeElementBodyChoice }
destructor TDocumentTypeElementBodyChoice.Destroy();
begin
FreeAndNil(fList);
inherited Destroy();
end;
constructor TDocumentTypeElementBodyChoice.Create();
begin
fList := TObjectList.Create(True);
end;
procedure TDocumentTypeElementBodyChoice.Add(vItem : TDocumentTypeBodyItem);
begin
fList.Add(vItem);
end;
{ TDocumentTypeElementBodySequence }
destructor TDocumentTypeElementBodySequence.Destroy();
begin
FreeAndNil(fList);
inherited Destroy();
end;
constructor TDocumentTypeElementBodySequence.Create();
begin
fList := TObjectList.Create(True);
end;
procedure TDocumentTypeElementBodySequence.Append(vItem : TDocumentTypeBodyItem);
begin
fList.Add(vItem);
end;
{ TDocumentTypeElementBodyElementReference }
constructor TDocumentTypeElementBodyElementReference.Create(aTagName : TDOMString);
begin
// FIXME inherited Create...
fTagName := aTagName;
end;
{ TDocumentTypeBodyPEEntity }
constructor TDocumentTypeBodyPEEntity.Create(aOwnerDocument : TDocument; aNodeName, aPublicID, aSystemID : TDOMString; aContents : TDOMString);
begin
inherited Create(aOwnerDocument, ntEntity, aNodeName);
fPublicID := aPublicID;
fSystemID := aSystemID;
//fNDATAName := aNotationName;
fContents := aContents;
end;
{ TDocumentTypeElementBodyFactor }
constructor TDocumentTypeElementBodyFactor.Create(aItem : TDocumentTypeBodyItem; aRepetition : TDocumentTypeElementBodyFactorRepetition = reOne);
begin
fRepetition := aRepetition;
fItem := aItem;
end;
end.