ARTICLE AD BOX
I want to embed a DTD into a C++ application, parse the DTD and validate an XML document against it. However, when the DTD becomes large enough, the DTD parser fails. If I dump the same DTD into a file and then use xmlstarlet val -d my.dtd my.xml, it works correctly. Therefore I suspect an issue with buffer management. What am I doing wrong?
#include <libxml/parser.h> #include <libxml/xmlmemory.h> #include <cstring> #include <iostream> /* * DTD as a string. * Disable any of the ATTLIST entries by setting the #if 0 * and the parser error goes away */ const char* dtdstr = "<!ELEMENT recording (monitor|processor|remote)*>\n" #if 1 "<!ATTLIST recording\n" " label CDATA #IMPLIED\n" " logfile CDATA #IMPLIED\n" " stop (halt|ready) 'halt'\n" " wait (halt|ready) 'ready'\n" " warmup_seconds CDATA '0'\n" ">\n" #endif "<!ELEMENT monitor (instrument)*>\n" #if 1 "<!ATTLIST monitor\n" " address CDATA #REQUIRED\n" " port CDATA #REQUIRED\n" " failure (critical|ignore|startup) 'critical'\n" ">\n" #endif "<!ELEMENT instrument EMPTY>\n" #if 1 "<!ATTLIST instrument\n" " address CDATA #REQUIRED\n" " mask CDATA #IMPLIED\n" " sync (external|internal) 'internal'\n" ">\n" #endif "<!ELEMENT processor (interface)*>\n" #if 1 "<!ATTLIST processor\n" " label CDATA #IMPLIED\n" " logfile CDATA #IMPLIED\n" " stop (halt|ready) 'halt'\n" " wait (halt|ready) 'ready'\n" " warmup_seconds CDATA '0'\n" ">\n" #endif "<!ELEMENT interface ANY>\n" "\n"; const char* xmlstr = "<recording>\n" " <monitor address=\"127.0.0.1\" port=\"0\">\n" " <instrument address=\"127.0.0.1\"/>\n" " </monitor>\n" " <processor>\n" " <interface/>\n" " </processor>\n" "</recording>\n"; int main() { std::cout << "DTD:\n" << dtdstr << "\n\nXML:\n" << xmlstr << "\n\n"; xmlDocPtr doc = xmlParseDoc((const xmlChar*) xmlstr); if(! doc) return 1; xmlParserInputBufferPtr dtdbuf = xmlParserInputBufferCreateStatic( dtdstr, static_cast<int>(std::strlen(dtdstr)), XML_CHAR_ENCODING_NONE); if(! dtdbuf) return 2; xmlDtdPtr dtd = xmlIOParseDTD(nullptr, dtdbuf, XML_CHAR_ENCODING_ASCII); if(! dtd) return 3; xmlValidCtxt vctxt {}; if(xmlValidateDtd(&vctxt, doc, dtd)) std::cout << "valid\n"; else std::cout << "invalid\n"; }Compile with g++ -O1 -I/usr/include/libxml2 xmldtd.cpp -lxml2. Operating system: Ubuntu 24.04.
Parser error:
Entity: line 21: parser error : ContentDecl : ',' '|' or ')' expected label CDATA #IMPLIED ^ Entity: line 21: parser error : expected '>' label CDATA #IMPLIED ^ Entity: line 21: parser error : Content error in the external subset label CDATA #IMPLIED