%{ #include #include #include "y.tab.h" #include "element_path.h" /*":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]*/ /* FIXME more */ /* TODO NotationDecl := ''; */ /* FIXME where to use INCLUDE and IGNORE? */ /* FIXME namechar in ENTITY_REFERENCE */ /* FIXME elementdecl '*' '(' ')' */ /*[^< \t\015\012]* { yylval.text = strdup(yytext); return BLURB; }*/ void lex_init () { #warning workaround #define XML_UNDECIDED 17 BEGIN XML_UNDECIDED; } /*stop*/ /*printf("Stop command received\n")*/; /*start {}*/ static int decode_character_reference(const char* format, const char* str) { int result; /* FIXME */ if(sscanf(str, format, &result) != 1 || yylval.symbol < 0) { return -1; } return result; } %} %Start XML_HEADER XML_ATTLIST_DECLARATION XML_ELEMENT_DECLARATION XML_ATTRIBUTE XML_DOCTYPE XML_NOTATION_DECLARATION XML_GE_ENTITY_DECLARATION XML_INCLUDE XML_IGNORE XML_ATTRIBUTE_VALUE XML_ATTRIBUTE_VALUE_S XML_ENDTAG XML_PI XML_CDATA XML_BODY XML_ATTRIBUTE_VALUE_HTML3 XML_UNDECIDED XML_COMMENT XML_PI_ATTRIBUTE_VALUE %% "<" { BEGIN XML_ATTRIBUTE; return '<'; /* TODO XML comment which is ignored? */ } ""? */ BEGIN XML_HEADER; return BEGIN_XML; } """version" return VERSION; "=" return '='; "encoding" return ENCODING; "\""[^\"]*"\"" { /*assert(yytext[0]);*/ yylval.text = strdup(&yytext[1]); yylval.text[strlen(yylval.text) - 1] = 0; return QUOTED_STRING; } "standalone" return STANDALONE; [ \t\015\012]* { /* TODO: move that to attribute and element definitions? */ return S; } "?>" { BEGIN XML_UNDECIDED; return END_PI; } [^?]* { /*[#a-zA-Z:_-][a-zA-Z:_0-9-]* {*/ yylval.text = strdup(yytext); return NAME; /* "=" { BEGIN XML_PI_ATTRIBUTE_VALUE; return '='; } */ } "?>" { BEGIN XML_UNDECIDED; return END_PI; } [^<&\"]* { yylval.text = strdup(yytext); return ATTRIBUTE_VALUE; } "\"" { BEGIN XML_PI; return '"'; } [^<&]* { /* FIXME what about PIs? */ /* modified for whitespacelessness. */ /* pattern contains workaround for people who do " */ yylval.text = strdup(yytext); return BLURB; } ""?" { return '?'; } "*" { return '*'; } '+' { return '+'; } "(" { return '('; } ")" { return ')'; } "|" { return '|'; } "," { return ','; } "#PCDATA" { return PCDATA; } "EMPTY" { return EMPTY; } "ANY" { return ANY; } ">" { BEGIN INITIAL; return '>'; } ""NOTATION" { return NOTATION; } "(" { return '('; } "ID" { return ID; } "IDREF" { return IDREF; } "IDREFS" { return IDREFS; } "ENTITY" { return ENTITY; } "ENTITIES" { return ENTITIES; } "NMTOKEN" { return NMTOKEN; } "NMTOKENS" { return NMTOKENS; } "CDATA" { return CDATA; } ")" { return ')'; } "|" { return '|'; } "ordered" { return ORDERED; } '=' { return '='; } [:_A-Za-z.0-9]+ { yylval.text = strdup(yytext); return NAME; /* FIXME MORE CHARACTERS (NameChar|NameStartChar) */ } "#REQUIRED" { return REQUIRED; } "#IMPLIED" { return IMPLIED; } "#FIXED" { /* FIXME and then the attribute value. */ return FIXED; } [^<&\"]* { yylval.text = strdup(yytext); return ATTRIBUTE_VALUE; } "&&" { /* technically, this is wrong. But everyone does it. */ yylval.text = strdup("&&"); return ENTITY_REFERENCE; } "&"[a-zA-Z_:][a-zA-Z_:0-9]*";" { yylval.text = strdup(yytext); return ENTITY_REFERENCE; } "\"" { BEGIN XML_ATTRIBUTE; return '"'; } [^&<\']* { yylval.text = strdup(yytext); return ATTRIBUTE_VALUE; } "&"[a-zA-Z_][a-zA-Z_0-9]*";" { yylval.text = strdup(yytext); return ENTITY_REFERENCE; } "\'" { BEGIN XML_ATTRIBUTE; return '"'; } ">" { BEGIN XML_BODY; return '>'; } "/" { return '/'; } [#a-zA-Z:_0-9-][a-zA-Z:_0-9-]* { yylval.text = strdup(yytext); /* moooore chars */ return NAME; /* FIXME more chars */ } "=" { BEGIN XML_ATTRIBUTE_VALUE_HTML3; return '='; } [#a-zA-Z:_/?0-9=().%-][a-zA-Z:_/?0-9=().%-]* { /* this is also used for HTML-style attribute values. */ yylval.text = strdup(yytext); BEGIN XML_ATTRIBUTE; return NAME; /*ATTRIBUTE_VALUE;*/ /* FIXME more chars */ } "\"" { BEGIN XML_ATTRIBUTE_VALUE; return '"'; } "\'" { BEGIN XML_ATTRIBUTE_VALUE_S; return '"'; } "\""[^\"]*"\"" { yylval.text = strdup(&yytext[1]); yylval.text[strlen(yylval.text) - 1] = 0; return QUOTED_STRING; } "'"[^\']*"'" { yylval.text = strdup(&yytext[1]); yylval.text[strlen(yylval.text) - 1] = 0; return QUOTED_STRING; } "-->" { BEGIN XML_BODY; return END_COMMENT; } [^-]* { yylval.text = strdup(yytext); return BLURB; } "-" { yylval.text = strdup("."); return BLURB; } "