unit XMLParser; {$MODE OBJFPC} {$M+} interface uses scanners, DOM, type_fixes, contnrs, classes; // . // TODO UTF-16, other encodings? // TODO XML namespaces. // TODO \x0D removal. // FIXME put all the Result statements into a try...except to free it in the error case. { TODO Parameter-entity references use percent-sign (%) and semicolon (;) as delimiters.] // TODO Namespaces . Example of a parameter-entity reference: %ISOLat2; Entities in attribute values? } { TODO includes: ]]> ]]> } type TXMLDecl = record // private. Encoding : TDOMString; Version : TDOMString; BStandalone : Boolean; end; TBase = 8..16; // TODO rename to "Occurences" or something... TDocumentTypeElementBodyFactorRepetition = (reOne, reZeroOrOne, reZeroOrMore, reOneOrMore); // public. TDocumentTypeElementBodyFactor = class(TDocumentTypeBodyItem) private fItem : TDocumentTypeBodyItem; fRepetition : TDocumentTypeElementBodyFactorRepetition; published property Item : TDocumentTypeBodyItem read fItem; property Repetition : TDocumentTypeElementBodyFactorRepetition read fRepetition write fRepetition; constructor Create(aItem : TDocumentTypeBodyItem; aRepetition : TDocumentTypeElementBodyFactorRepetition = reOne); end; TDocumentTypeElementBodyElementReference = class(TDocumentTypeBodyItem) private fTagName : TDOMString; published property TagName : TDOMString read fTagName; constructor Create(aTagName : TDOMString); end; type TDocumentTypeElementBodyChoice = class(TDocumentTypeBodyItem) private fList : TObjectList; public destructor Destroy(); override; published constructor Create(); procedure Add(vItem : TDocumentTypeBodyItem); end; TDocumentTypeElementBodySequence = class(TDocumentTypeBodyItem) private fList : TObjectList; public destructor Destroy(); override; published constructor Create(); published procedure Append(vItem : TDocumentTypeBodyItem); end; TDocumentTypeElementBodyEmpty = class(TDocumentTypeBodyItem) end; TDocumentTypeElementBodyAny = class(TDocumentTypeBodyItem) end; TDocumentTypePCData = class(TDocumentTypeBodyItem) end; TDocumentTypeAttributeDeclarations = TObjectList{TDocumentTypeAttributeDeclaration}; TExternalID = record SystemID : TDOMString; PublicID : TDOMString; // ='' => not public. end; TParser = class(TScanner, IInterface) private fDocument : TDocument; property Document : TDocument read fDocument; published //constructor Create(); function Parse() : TDocument; protected procedure AddDefaultEntities(aDocumentType { IO } : TDocumentType); virtual; // override for XHTML / HTML? protected function Comment() : TComment; function CDATASection() : TCDATASection; function Element() : TElement; function EndElement() : TDOMString; function Text() : TText; function ProcessingInstruction() : TProcessingInstruction; function Attribute() : TAttr; inline; function Attributes() : TNamedNodeMap; function QuotedAttributeValue() : TDOMString; //function EntityReference() : TEntityReference; function ChildNodes(out aPending : Char) : TNodeList; function ToplevelChildNodes() : TNodeList; function OptionalChildNode(out aPending : Char) : TNode; function NMToken() : TDOMString; function OptionalNameStartChar() : TDOMString; inline; function OptionalNameChar() : TDOMString; inline; function Name() : TDOMString; function PositiveInteger(aBase : TBase) : TCardinal; inline; procedure Whitespace(); inline; function WhitespaceP() : Boolean; inline; procedure OptionalWhitespace(); inline; function EncName() : TDOMString; function XMLDecl() : TXMLDecl; procedure EQ(); inline; function DocumentType() : TDocumentType; function EntityReference() : TDOMString; inline; // meta (doctype) layer: function DocumentTypeSubset() : TNodeList; // not self-recursive. function DocumentTypeSubsetElement() : TObjectList{TNode}; // not self-recursive. function DocumentTypePEReference() : TDOMString; function DocumentTypeElementBody() : TDocumentTypeElementDeclaration; function DocumentTypeEntityBody() : TNode; // PE or not PE. function DocumentTypeATTListBody() : TDocumentTypeAttributeDeclarations; function DocumentTypeNotationBody() : TNotation; function ExternalID() : TExternalID; end; TDocumentTypeBodyPEEntity = class(TNode) private fPublicID : TDOMString; fSystemID : TDOMString; fContents : TDOMString; public constructor Create(aOwnerDocument : TDocument; aNodeName, aPublicID, aSystemID : TDOMString; aContents : TDOMString); published property Contents : TDOMString read fContents; published // official DOM Level 1 core interface: property publicId : TDOMString read fPublicID; property systemId : TDOMString read fSystemID; end; implementation uses sysutils; { TParser } procedure TParser.AddDefaultEntities(aDocumentType { IO } : TDocumentType); begin with aDocumentType do begin if not HasEntityP('lt') then // constructor Create(aOwnerDocument : TDocument; aNodeName, aPublicID, aSystemID, aNotationName : TDOMString); AddEntity(TEntity.Create(Document, 'lt', {public ID FIXME}'', '', '', #60)); // FIXME #38 ?? if not HasEntityP('gt') then AddEntity(TEntity.Create(Document, 'gt', '', '', '', #62)); if not HasEntityP('amp') then AddEntity(TEntity.Create(Document, 'amp', '', '', '', #38)); if not HasEntityP('apos') then AddEntity(TEntity.Create(Document, 'apos', '', '', '', #39)); if not HasEntityP('quot') then AddEntity(TEntity.Create(Document, 'quot', '', '', '', #34)); { } end; end; { for the document; Like "ChildNodes", but also handles DOCTYPE. } function TParser.ToplevelChildNodes() : TNodeList; var vDocumentType : TDocumentType; vDocumentElement : TElement; begin Result := TNodeList.Create(True); try repeat OptionalWhitespace(); if Input = '<' then begin Consume(); if Input = '!' then begin Consume(); if Input = 'D' then begin vDocumentType := DocumentType(); if Document.doctype <> nil then Error('', ''); Document.SetDocumentType(vDocumentType); Result.Append(vDocumentType); end else Result.Append(Comment()); end else if Input = '?' then begin Consume(); Result.Append(ProcessingInstruction()); end else begin if Assigned(Document.documentElement) then begin Error('', ''); end; vDocumentElement := Element(); Document.SetDocumentElement(vDocumentElement); Result.Append(vDocumentElement); end; end else Break; until False; except FreeAndNil(Result); raise; end; end; { aPending will be #0 if nothing is pending, otherwise the pending char ('<'). } function TParser.OptionalChildNode(out aPending : Char) : TNode; begin aPending := #0; if Input = '<' then begin Consume(); if Input = '/' then begin // whooops. this is the end tag of the parent's Element and we just threw the '<' away. aPending := '<'; Result := nil; Exit; end; if Input = '!' then begin Consume(); if Input = '[' then Result := CDATASection() else Result := Comment(); end else if Input = '?' then Result := ProcessingInstruction() // do not process entity references! else Result := Element(); end else if Input = '&' then Result := Document.createTextNode(EntityReference()) // TODO merge? else Result := Text(); end; { for elements. } function TParser.ChildNodes(out aPending : Char) : TNodeList; var vNode : TNode; begin aPending := #0; // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* Result := TNodeList.Create(True); try repeat vNode := OptionalChildNode(aPending); if Assigned(vNode) then Result.Append(vNode); until vNode = nil; // note that because of the infinite wisdom *cough* of XML, we just consumed the '<' of ''. except FreeAndNil(Result); raise; end; end; function TParser.Comment() : TComment; var vValue : TDOMString; begin vValue := ''; Consume('--'); // FIXME lookahead more. while BInput and (Input <> '-') do begin vValue := vValue + Input; Consume(); end; // FIXME find wrong in: '' Consume('-->'); Result := Document.createComment(vValue); end; function TParser.CDATASection() : TCDATASection; var vValue : TDOMString; begin Consume('[CDATA['); vValue := ''; // FIXME lookahead more. while BInput and (Input <> ']') do begin vValue := vValue + Input; Consume(); end; Consume(']]>'); Result := Document.createCDATASection(vValue); end; function TParser.EndElement() : TDOMString; begin // after the "<". Result := ''; Consume('/'); // FIXME whitespace? while BInput and (Input <> '>') and not WhitespaceP() do begin Result := Result + Consume(); end; OptionalWhitespace(); Consume('>'); end; function TParser.Element() : TElement; var vBEmpty : Boolean; vName : TDOMString; vEndName : TDOMString; vPending : Char; begin // after the "<". vName := Name(); Result := Document.createElement(vName); try Result.SetAttributes(Attributes()); OptionalWhitespace(); vBEmpty := (Input = '/'); if vBEmpty then begin Consume(); end; Consume('>'); if not vBEmpty then begin Result.SetChildNodes(ChildNodes(vPending)); if vPending <> '<' then begin // 'ChildNodes will return whether it accidentially consumed something it shouldn't have. Consume('<'); end; vEndName := EndElement(); if vEndName <> vName then Error(Format('', [vName]), Format('', [vEndName])); end; except FreeAndNil(Result); raise; end; end; function TParser.OptionalNameStartChar() : TDOMString; inline; begin case Input of ':', 'A'..'Z', '_', 'a'..'z': // FIXME: [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] begin Result := Consume(); end; else Result := ''; end; end; function TParser.OptionalNameChar() : TDOMString; inline; begin if Input in [':', 'A'..'Z', '_', 'a'..'z', '-', '.', '0'..'9'] // FIXME: #xB7 | [#x0300-#x036F] | [#x203F-#x2040] // FIXME: [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] then Result := Consume() else Result := OptionalNameStartChar(); end; function TParser.Name() : TDOMString; var vChar : TDOMString; begin Result := OptionalNameStartChar(); if Result = '' then Error(''); repeat vChar := OptionalNameChar(); if Length(vChar) > 0 then Result := Result + vChar; until vChar = ''; end; function TParser.NMToken() : TDOMString; // enumeration? var vChar : TDOMString; begin Result := OptionalNameChar(); if Result = '' then Error(''); repeat vChar := OptionalNameChar(); if Length(vChar) > 0 then Result := Result + vChar; until vChar = ''; end; function TParser.Text() : TText; var vValue : TDOMString; begin vValue := ''; while BInput and ((Input in [#9, #10, #13]) or ((Input >= #32) and (Input <> '<') and (Input <> '>') and (Input <> '&'))) do begin vValue := vValue + Consume(); end; Result := Document.createTextNode(vValue); // TODO Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ end; function TParser.ProcessingInstruction() : TProcessingInstruction; var vValue : TDOMString; vTarget : TDOMString; begin // do not process entity references! Consume('?'); vTarget := Name(); Whitespace(); vValue := ''; while BInput and (Input <> '?') do begin vValue := vValue + Consume(); end; Consume('?>'); Result := Document.createProcessingInstruction(vTarget, vValue); end; function TParser.WhitespaceP() : Boolean; inline; begin Result := Input in [#32, #9, #13, #10]; end; procedure TParser.Whitespace(); inline; begin if Input in [#32, #9, #13, #10] then // #13 isn't really supposed to be there. Consume() else Consume(#32); OptionalWhitespace(); end; procedure TParser.OptionalWhitespace(); inline; begin while Input in [#32, #9, #13, #10] do // #13 isn't really supposed to be there. Consume(); end; function TParser.QuotedAttributeValue() : TDOMString; var vExpectedEndingQuote : Char; begin //AttValue ::= '"' ([^<&"] | Reference)* '"' Result := ''; if Input in ['''', '"'] then begin vExpectedEndingQuote := Input; Consume(); while BInput and not (Input in [vExpectedEndingQuote, '<']) do begin if Input = '&' then Result := Result + EntityReference() // implies charref. else Result := Result + Consume(); end; Consume(vExpectedEndingQuote); end else Error(''); end; function TParser.Attributes() : TNamedNodeMap; begin Result := TNamedNodeMap.Create(); try repeat OptionalWhitespace(); if (Input = '>') or (Input = '/') then Exit; // FIXME handle duplicates? Result.setNamedItem(Attribute()); until (Input = '>') or (Input = '/'); except FreeAndNil(Result); raise; end; end; function TParser.Attribute() : TAttr; inline; var vName : TDOMString; vValue : TDOMString; begin vName := Name(); EQ(); vValue := QuotedAttributeValue(); Result := Document.createAttribute(vName); try Result.NodeValue := vValue; except FreeAndNil(Result); raise; end; end; function TParser.PositiveInteger(aBase : TBase) : TCardinal; inline; const ValidCharacters : String = '0123456789ABCDEF'; ValidCharacters2 : String = '0123456789abcdef'; var vDigit : Integer; begin vDigit := Pos(Input, ValidCharacters); if vDigit = 0 then vDigit := Pos(Input, ValidCharacters2); if vDigit = 0 then Error(''); Result := vDigit; repeat vDigit := Pos(Input, ValidCharacters); if vDigit = 0 then vDigit := Pos(Input, ValidCharacters2); if vDigit <> 0 then Result := Result * aBase + Cardinal(vDigit - 1); until vDigit = 0; end; function TParser.EncName() : TDOMString; begin if Input in ['a'..'z', 'A'..'Z'] then Result := Consume() else Error(''); while Input in ['a'..'z', 'A'..'Z', '0'..'9', '.', '_', '-'] do Result := Result + Consume(); end; procedure TParser.EQ(); inline; begin OptionalWhitespace(); Consume('='); OptionalWhitespace(); end; function TParser.XMLDecl() : TXMLDecl; begin Consume(' '1.0' then Error('1.0'); if Input in [#32, #9, #13, #10] then begin Whitespace(); Consume('encoding'); EQ(); Consume('"'); Result.Encoding := EncName(); Consume('"'); end; if Input in [#32, #9, #13, #10] then begin Whitespace(); Consume('standalone'); EQ(); if QuotedAttributeValue() <> 'no' then Result.BStandalone := True; end; OptionalWhitespace(); Consume('?>'); OptionalWhitespace(); end; function TParser.EntityReference() : TDOMString; inline; //function TParser.EntityReference() : TEntityReference; var vCode : TCardinal; vName : TDOMString; vEntity : TEntity; begin Consume('&'); if Input = '#' then begin // Char ref. if Input = 'x' then begin Consume(); vCode := PositiveInteger(16); end else vCode := PositiveInteger(10); // FIXME encoding... Result := Chr(vCode); Exit; //vName := Format('#%u', [vCode]); // make sure there are no heading zeroes and it actually is a number. // TODO handle them right here? end else begin vName := Name(); end; Consume(';'); // TODO validate: Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ //Result := Document.createEntityReference(vName); assert(Assigned(Document.doctype)); // FIXME is that always there? vEntity := Document.doctype.GetEntity(vName); if Assigned(vEntity) then Result := vEntity.Contents else Error('', '&' + Result + ';'); end; // inside DTD only. function TParser.DocumentTypePEReference() : TDOMString; begin Consume('%'); Result := Name(); Consume(';'); // FIXME dereference. end; function TParser.DocumentTypeSubsetElement() : TObjectList; begin Result := TObjectList.Create(True); try if Input = '<' then begin Consume(); if Input = '?' then begin Result.Add(ProcessingInstruction()); end else begin Consume('!'); if Input = '-' then Result.Add(Comment()) // after '!'. else begin // the interesting part is here: if Input = 'E' then begin Consume(); if Input = 'L' then begin Consume('LEMENT'); Result.Add(DocumentTypeElementBody()); end else if input = 'N' then begin Consume('NTITY'); Result.Add(DocumentTypeEntityBody()); end else begin Consume(''); end; end else if Input = 'A' then begin Consume('ATTLIST'); FreeAndNil(Result); Result := DocumentTypeATTListBody(); end else if Input = 'N' then begin Consume('NOTATION'); Result.Add(DocumentTypeNotationBody()); end; end; end; end else Consume('<'); // error. except FreeAndNil(Result); raise; end; end; function TParser.DocumentTypeSubset() : TNodeList; var vItemSequence : TObjectList; // NOT subitems. vItemIndex : Integer; begin Result := TNodeList.Create(True); try while BInput and (Input <> ']') do begin OptionalWhitespace(); while Input = '%' do begin {FIXME handle parameter-entity references} DocumentTypePEReference(); OptionalWhitespace(); end; // PEReference() like whitespace? if Input = ']' then Break; vItemSequence := DocumentTypeSubsetElement(); vItemSequence.OwnsObjects := False; if vItemSequence.Count > 0 then for vItemIndex := 0 to vItemSequence.Count - 1 do Result.Append(TNode(vItemSequence[vItemIndex])); //=Result.Append(DocumentTypeSubsetElement()); end; except FreeAndNil(Result); raise; end; end; // TODO support '' function TParser.DocumentTypeElementBody() : TDocumentTypeElementDeclaration; function OptionalRepetition() : TDocumentTypeElementBodyFactorRepetition; inline; begin Result := reOne; if Input = '?' then begin Consume(); Result := reZeroOrOne end else if Input = '*' then begin Consume(); Result := reZeroOrMore; end else if Input = '+' then begin Consume(); Result := reOneOrMore; end; // else ok, something else: keep it. end; function children() : TDocumentTypeElementBodyFactor; forward; function cp() : TDocumentTypeElementBodyFactor; var vItem : TDocumentTypeBodyItem; vRepetition : TDocumentTypeElementBodyFactorRepetition; begin // [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? if Input = '(' then Result := children() else begin vItem := TDocumentTypeElementBodyElementReference.Create(Name()); try vRepetition := OptionalRepetition(); Result := TDocumentTypeElementBodyFactor.Create(vItem, vRepetition); vItem := nil; except FreeAndNil(vItem); raise; end; end; end; function children() : TDocumentTypeElementBodyFactor; var vFirstItem : TDocumentTypeElementBodyFactor; vItem : TDocumentTypeElementBodyFactor; vBPCData : Boolean; begin // we don't know whether it's supposed to be Choice or Sequence here, so keep temp variables until we know... Consume('('); OptionalWhitespace(); vBPCData := False; if Input = '#' then begin // TODO toplevel only. Consume('#PCDATA'); vBPCData := True; vFirstItem := TDocumentTypeElementBodyFactor.Create(TDocumentTypePCData.Create(), reOne); //OptionalWhitespace(); {Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'} end else vFirstItem := cp(); try OptionalWhitespace(); // finally decide, adding the first item... if Input = '|' then begin // choice: Result := TDocumentTypeElementBodyFactor.Create(TDocumentTypeElementBodyChoice.Create()); TDocumentTypeElementBodyChoice(Result.Item).Add(vFirstItem); vFirstItem := nil; while Input = '|' do begin Consume(); OptionalWhitespace(); vItem := cp(); TDocumentTypeElementBodyChoice(Result.Item).Add(vItem); OptionalWhitespace(); end; end else begin // sequence (or single element). Result := TDocumentTypeElementBodyFactor.Create(TDocumentTypeElementBodySequence.Create()); TDocumentTypeElementBodySequence(Result.Item).Append(vFirstItem); vFirstItem := nil; if (Input = ',') and (vBPCData) then begin Error(')', ''); end; try while Input = ',' do begin Consume(); OptionalWhitespace(); TDocumentTypeElementBodySequence(Result.Item).Append(cp()); OptionalWhitespace(); end; except FreeAndNil(Result.Item); raise; end; end; //[47] children ::= (choice | seq) ('?' | '*' | '+')? //[49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' [VC: Proper Group/PE Nesting] //[50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' [VC: Proper Group/PE Nesting] OptionalWhitespace(); Consume(')'); Result.Repetition := OptionalRepetition(); except FreeAndNil(vFirstItem); raise; end; end; function Content() : TDocumentTypeBodyItem; begin if Input = 'E' then begin Consume('EMPTY'); Result := TDocumentTypeElementBodyEmpty.Create(); // TODO singleton. end else if Input = 'A' then begin Consume('ANY'); Result := TDocumentTypeElementBodyAny.Create(); // TODO singleton. end else Result := children(); end; var vName : TDOMString; vContent : TDocumentTypeBodyItem; begin Whitespace(); vName := Name(); Whitespace(); vContent := Content(); Result := TDocumentTypeElementDeclaration.Create(Document, vName, vContent); try OptionalWhitespace(); Consume('>'); except FreeAndNil(Result); raise; end; end; function TParser.DocumentTypeEntityBody() : TNode; function EntityValue() : TDOMString; var vExpectedEndingQuote : Char; begin if Input = '"' then vExpectedEndingQuote := Consume() else if Input = '''' then vExpectedEndingQuote := Consume() else Consume('"'); while (BInput) and (Input <> vExpectedEndingQuote) do begin if Input = '&' then Result := Result + EntityReference() else if Input = '%' then Result := Result + DocumentTypePEReference() else Result := Result + Consume(); end; Consume(vExpectedEndingQuote); end; function NDATADecl() : TDOMString; begin Consume('NDATA'); Whitespace(); Result := Name(); end; function EntityDef(aName : TDOMString) : TEntity; var vID : TExternalID; vNDATA : TDOMString; vContents : TDOMString; begin if Input in [ '"', '''' ] then Result := TEntity.Create(Document, aName, ''{FIXME}, ''{FIXME}, ''{FIXME}, EntityValue()) else begin vID := ExternalID(); vNDATA := ''; if Input in [#32, #9, #13, #10] then begin Whitespace(); if Input = 'N' then begin vNDATA := NDATADecl(); end; end; vContents := ''; // FIXME. Result := TEntity.Create(Document, aName, vID.PublicID, vID.SystemID, vNDATA, vContents); end; end; function PEDef(aName : TDOMString) : TDocumentTypeBodyPEEntity; var vID : TExternalID; vContents : TDOMString; begin if Input in [ '"', '''' ] then Result := TDocumentTypeBodyPEEntity.Create(Document, aName, ''{FIXME}, ''{FIXME}, EntityValue()) else begin // external. vID := ExternalID(); end; vContents := ''; // FIXME. Result := TDocumentTypeBodyPEEntity.Create(Document, aName, vID.PublicID, vID.SystemID, vContents); end; var vName : TDOMString; begin Whitespace(); if Input = '%' then begin // PEDecl: Consume(); Whitespace(); vName := Name(); Whitespace(); Result := PEDef(vName); end else begin // GEDecl: vName := Name(); Whitespace(); Result := EntityDef(vName); end; OptionalWhitespace(); Consume('>'); end; function TParser.DocumentTypeATTListBody() : TDocumentTypeAttributeDeclarations; // function TokenizedType(aAttributeName, aElementName : TDOMString) : TDocumentTypeAttributeDeclaration; inline; var vBaseType : TDocumentTypeBodyAttributeBaseType; begin if Input = 'I' then begin Consume('D'); if Input = 'R' then begin Consume('REF'); if Input = 'S' then begin Consume(); vBaseType := atIDREFS; end else begin // ok, just 'IDREF'. vBaseType := atIDREF; end; end else begin // ok, just "ID". vBaseType := atID; end; end else if Input = 'E' then begin Consume('ENTIT'); if Input = 'Y' then begin Consume(); vBaseType := atEntity; end else begin Consume('IES'); vBaseType := atEntities; end; end else if Input = 'N' then begin Consume('NMTOKEN'); if Input = 'S' then begin Consume(); vBaseType := atNMTOKENS; end else begin // ok, just "NMTOKEN". vBaseType := atNMTOKEN; end; end else Error(''); Result := TDocumentTypeAttributeDeclaration.Create(Document, aAttributeName, aElementName, vBaseType); end; function Enumeration(aAttributeName, aElementName : TDOMString) : TDocumentTypeAttributeBodyEnumeration; var vFirstItem : TDOMString; vItem : TDOMString; begin Consume('('); OptionalWhitespace(); vFirstItem := NMToken(); Result := TDocumentTypeAttributeBodyEnumeration.Create(Document, aAttributeName, aElementName); try Result.Append(vFirstItem); OptionalWhitespace(); while Input = '|' do begin Consume(); OptionalWhitespace(); vItem := NMToken(); Result.Append(vItem); OptionalWhitespace(); end; Consume(')'); // [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration] // [VC: No Duplicate Tokens] except FreeAndNil(Result); FreeAndNil(vFirstItem); raise; end; end; function NotationType(aAttributeName, aElementName : TDOMString) : TDocumentTypeBodyNotationChoice; function GetNotation(const aName : TDOMString) : TNotation; begin assert(Assigned(Document.doctype)); Result := Document.doctype.GetNotation(aName); if Result = nil then Error('', aName); end; var vFirstItem : TDOMString; vItem : TDOMString; begin Consume('NOTATION'); Whitespace(); Consume('('); OptionalWhitespace(); vFirstItem := Name(); try OptionalWhitespace(); Result := TDocumentTypeBodyNotationChoice.Create(Document, aAttributeName, aElementName); Result.Add(GetNotation(vFirstItem)); try while Input = '|' do begin Consume(); OptionalWhitespace(); vItem := Name(); Result.Add(GetNotation(vItem)); // no dupes. OptionalWhitespace(); end; Consume(')'); except FreeAndNil(Result); raise; end; except raise; end; //[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' { [VC: Notation Attributes] [VC: One Notation Per Element Type] [VC: No Notation on Empty Element] [VC: No Duplicate Tokens]} end; function EnumeratedType(aAttributeName, aElementName : TDOMString) : TDocumentTypeAttributeDeclaration; begin if Input = 'N' then Result := NotationType(aAttributeName, aElementName) else Result := Enumeration(aAttributeName, aElementName); end; function DefaultDecl() : TDocumentTypeBodyAttributeDefault; var vImportance : TDocumentTypeBodyAttributeDefaultImportance; vValue : TDOMString; begin vValue := ''; vImportance := imOptional; if Input = '#' then begin Consume(); if Input = 'R' then begin Consume('REQUIRED'); vImportance := imRequired; end else if Input = 'I' then begin Consume('IMPLIED'); vImportance := imImplied; end else begin vImportance := imFixed; if Input = 'F' then begin Consume('FIXED'); Whitespace(); end else Error(''); vValue := QuotedAttributeValue(); end; end else begin vValue := QuotedAttributeValue(); end; Result := TDocumentTypeBodyAttributeDefault.Create(vImportance, vValue); end; function ATTType(aAttributeName, aElementName : TDOMString) : TDocumentTypeAttributeDeclaration; inline; var vDefault : TDocumentTypeBodyAttributeDefault; begin if Input = 'C' then begin Consume('CDATA'); Result := TDocumentTypeAttributeDeclaration.Create(Document, aAttributeName, aElementName, atString); end else if Input in [ 'I', 'E', 'N' ] then begin // tokenized type. Result := TokenizedType(aAttributeName, aElementName); end else Result := EnumeratedType(aAttributeName, aElementName); // Result := TDocumentTypeAttributeDeclaration.Create(Document, vAttributeName, aElementName, vType, vDefault); try try Whitespace(); vDefault := DefaultDecl(); Result.Default_ := vDefault; vDefault := nil; except FreeAndNil(vDefault); raise; end; except FreeAndNil(vDefault); FreeAndNil(Result); raise; end; end; function ATTDEF(aElementName : TDOMString) : TDocumentTypeAttributeDeclaration{s}; var vAttributeName : TDOMString; begin vAttributeName := Name(); Whitespace(); Result := ATTType(vAttributeName, aElementName); end; var vElementName : TDOMString; begin Whitespace(); vElementName := Name(); Whitespace(); Result := TDocumentTypeAttributeDeclarations.Create(True); try while BInput and (Input <> '>') do begin Result.Add(ATTDEF(vElementName)); if not WhitespaceP() then Break; Whitespace(); end; Consume('>'); except FreeAndNil(Result); raise; end; end; function TParser.ExternalID() : TExternalID; function SystemLiteral() : TDOMString; var vExpectedEndingQuote : Char; begin if Input = '"' then vExpectedEndingQuote := Consume() else if Input = '''' then vExpectedEndingQuote := Consume() else Consume('"'); Result := ''; while BInput and (Input <> vExpectedEndingQuote) do begin Result := Result + Consume(); end; Consume(vExpectedEndingQuote); end; // returns: #0: not found. function OptionalPubidChar(aNot : Char) : Char; inline; begin if (Input <> aNot) and (Input in [#20, #13, #10, 'a'..'z', 'A'..'Z', '0'..'9', '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*', '#', '@', '$', '_', '%']) then Result := Consume() else Result := #0; end; function PubidLiteral() : TDOMString; var vExpectedEndingQuote : Char; vChar : Char; begin if Input = '"' then vExpectedEndingQuote := Consume() else if Input = '''' then vExpectedEndingQuote := Consume() else Consume('"'); Result := ''; repeat vChar := OptionalPubidChar(vExpectedEndingQuote); if vChar <> #0 then Result := Result + vChar; until vChar = #0; Consume(vExpectedEndingQuote); end; begin if Input = 'S' then begin Consume('SYSTEM'); Whitespace(); Result.SystemID := SystemLiteral(); Result.PublicID := ''; end else if Input = 'P' then begin Consume('PUBLIC'); Whitespace(); Result.PublicID := PubidLiteral(); Whitespace(); Result.SystemID := SystemLiteral(); end else Error(''); end; function TParser.DocumentTypeNotationBody() : TNotation; var vID : TExternalID; vName : TDOMString; begin Whitespace(); vName := Name(); Whitespace(); vID := ExternalID(); //(ExternalID | PublicID FIXME ) //[83] PublicID ::= 'PUBLIC' S PubidLiteral // ExternalID looks similar. OptionalWhitespace(); Consume('>'); Result := TNotation.Create(Document, vName, vID.PublicID, vID.SystemID); end; function TParser.DocumentType() : TDocumentType; var vName : TDOMString; vNodeList : TNodeList; vID : TExternalID; begin Consume('DOCTYPE'); Whitespace(); vName := Name(); // '[' then begin vID := ExternalID(); // FIXME use. end; // else misdetected, will handle later (just below). end; OptionalWhitespace(); if Input = '[' then begin Consume('['); vNodeList := DocumentTypeSubset(); // FIXME. try Consume(']'); OptionalWhitespace(); except FreeAndNil(vNodeList); raise; end; end else vNodeList := nil; try Consume('>'); // FIXME error handling. Result := TDocumentType.Create(Document, vName, vNodeList); vNodeList := nil; except FreeAndNil(vNodeList); raise; end; AddDefaultEntities(Result); end; function TParser.Parse() : TDocument; var vXMLDecl : TXMLDecl; begin vXMLDecl := XMLDecl(); // TODO make optional. { Prolog [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? [27] Misc ::= Comment | PI | S Document Type Definition [28] doctypedecl ::= '' [VC: Root Element Type] [WFC: External Subset] [28a] DeclSep ::= PEReference | S [WFC: PE Between Declarations] [28b] DocumentTypeSubset ::= (markupdecl | DeclSep)* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment [VC: Proper Declaration/PE Nesting] The document type declaration MUST appear before the first element in the document. [WFC: PEs in Internal Subset] } assert(fDocument = nil); Result := TDocument.Create(); fDocument := Result; // magic: OptionalWhitespace() '<' ProcessingInstruction() try Result.SetChildNodes(ToplevelChildNodes()); except FreeAndNil(Result); raise; end; // assert PI. // assert single document element. // assert doctype? // assert DTD / schema? end; { TDocumentTypeElementBodyChoice } destructor TDocumentTypeElementBodyChoice.Destroy(); begin FreeAndNil(fList); inherited Destroy(); end; constructor TDocumentTypeElementBodyChoice.Create(); begin fList := TObjectList.Create(True); end; procedure TDocumentTypeElementBodyChoice.Add(vItem : TDocumentTypeBodyItem); begin fList.Add(vItem); end; { TDocumentTypeElementBodySequence } destructor TDocumentTypeElementBodySequence.Destroy(); begin FreeAndNil(fList); inherited Destroy(); end; constructor TDocumentTypeElementBodySequence.Create(); begin fList := TObjectList.Create(True); end; procedure TDocumentTypeElementBodySequence.Append(vItem : TDocumentTypeBodyItem); begin fList.Add(vItem); end; { TDocumentTypeElementBodyElementReference } constructor TDocumentTypeElementBodyElementReference.Create(aTagName : TDOMString); begin // FIXME inherited Create... fTagName := aTagName; end; { TDocumentTypeBodyPEEntity } constructor TDocumentTypeBodyPEEntity.Create(aOwnerDocument : TDocument; aNodeName, aPublicID, aSystemID : TDOMString; aContents : TDOMString); begin inherited Create(aOwnerDocument, ntEntity, aNodeName); fPublicID := aPublicID; fSystemID := aSystemID; //fNDATAName := aNotationName; fContents := aContents; end; { TDocumentTypeElementBodyFactor } constructor TDocumentTypeElementBodyFactor.Create(aItem : TDocumentTypeBodyItem; aRepetition : TDocumentTypeElementBodyFactorRepetition = reOne); begin fRepetition := aRepetition; fItem := aItem; end; end.