diff options
Diffstat (limited to '')
| -rwxr-xr-x | src/dom/xmlreader.cpp | 985 |
1 files changed, 0 insertions, 985 deletions
diff --git a/src/dom/xmlreader.cpp b/src/dom/xmlreader.cpp deleted file mode 100755 index dd88c2bb8..000000000 --- a/src/dom/xmlreader.cpp +++ /dev/null @@ -1,985 +0,0 @@ -/** - * Phoebe DOM Implementation. - * - * This is a C++ approximation of the W3C DOM model, which follows - * fairly closely the specifications in the various .idl files, copies of - * which are provided for reference. Most important is this one: - * - * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html - * - * Authors: - * Bob Jamison - * - * Copyright (C) 2005 Bob Jamison - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - - - -#include "xmlreader.h" -#include "charclass.h" -#include "svgimpl.h" - -#include <stdarg.h> - -namespace org -{ -namespace w3c -{ -namespace dom -{ - - -//######################################################################### -//# E N T I T Y T A B L E -//######################################################################### -struct EntityInfo -{ - char *escape; - int escapeLength; - char *value; -}; - - -static EntityInfo entityTable[] = -{ - { "&" , 5 , "&" }, - { "<" , 4 , "<" }, - { ">" , 4 , ">" }, - { "'" , 6 , "'" }, - { """ , 6 , "\"" }, - { NULL , 0 , "\0" } -}; - - - -//######################################################################### -//# M E S S A G E S -//######################################################################### - - -/** - * - */ -void XmlReader::error(char *fmt, ...) -{ - va_list args; - fprintf(stderr, "XmlReader:error at line %d, column %d:", lineNr, colNr); - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args) ; - fprintf(stderr, "\n"); -} - - - -//######################################################################### -//# U T I L I T Y -//######################################################################### - -static void trim(DOMString &str) -{ - int len = str.size(); - if (len<1) - return; - - int start = 0; - int end = 0; - for (start=0 ; start<len ; start++) - { - int ch = str[start]; - if (ch<=' ' || ch>126) - break; - } - for (end=len-1 ; end>=0 ; end--) - { - int ch = str[end]; - if (ch<=' ' || ch>126) - break; - } - if (start<end) - { - str = str.substr(start, end+1); - } -} - -//######################################################################### -//# P A R S I N G -//######################################################################### - -/** - * Get the character at the position and record the fact - */ -int XmlReader::get(int p) -{ - if (p >= len) - return -1; - int ch = parsebuf[p]; - //printf("%c", ch); - if (ch == '\n' || ch == '\r') - { - colNr = 0; - lineNr++; - } - else - colNr++; - return ch; -} - -/** - * Look at the character at the position, but don't note the fact - */ -int XmlReader::peek(int p) -{ - if (p >= len) - return -1; - int ch = parsebuf[p]; - return ch; -} - - -/** - * Test if the given substring exists at the given position - * in parsebuf. Use peek() in case of out-of-bounds - */ -bool XmlReader::match(int pos, char *str) -{ - while (*str) - { - if (peek(pos++) != *str++) - return false; - } - return true; -} - - - -/** - * Test if the given substring exists at the given position - * in a given buffer - */ -/* -static bool bufMatch(const DOMString &buf, int pos, char *str) -{ - while (*str) - { - if (buf[pos++] != *str++) - return false; - } - return true; -} -*/ - - -/** - * - */ -int XmlReader::skipwhite(int p) -{ - while (p < len) - { - int b = get(p); - if (!isWhitespace(b)) - break; - p++; - } - return p; -} - -/** - * modify this to allow all chars for an element or attribute name - */ -int XmlReader::getWord(int p, DOMString &result) -{ - while (p<len) - { - int b = get(p); - if (b<=' ' || b=='/' || b=='>' || b=='=') - break; - result.push_back(b); - p++; - } - return p; -} - -/** - * get a name and prefix, if any - */ -int XmlReader::getPrefixedWord(int p, DOMString &prefix, - DOMString &shortWord, DOMString &fullWord) -{ - while (p<len) - { - int b = get(p); - if (b<=' ' || b=='/' || b=='>' || b=='=') - break; - else if (b == ':') - { - prefix = shortWord; - shortWord = ""; - } - else - shortWord.push_back(b); - p++; - } - if (prefix.size() > 0) - fullWord = prefix + ":" + shortWord; - else - fullWord = shortWord; - return p; -} - - -/** - * Assume that we are starting on a quote. Ends on the char - * after the final '"' - */ -int XmlReader::getQuoted(int p0, DOMString &result) -{ - - int p = p0; - - if (peek(p)!='"' && peek(p)!='\'') - return p0; - - int b = get(p++); //go to next char - - DOMString buf; - - while (p<len ) - { - b = get(p++); - if (b=='"' || b=='\'') - break; - else if (b=='&') - { - p = parseEntity(p, result); - if (p < 0) - return p0; - } - else - { - buf.push_back(b); - } - } - - //printf("quoted text:'%s'\n", buf.c_str()); - - result.append(buf); - - return p; -} - - - -/** - * Parse a <!xml> tag. Node may be null. Assumes current char is '<' - * ends on char after '>' - */ -int XmlReader::parseVersion(int p0) -{ - int p = p0; - - if (!match(p, "<?xml")) - return p0; - - p += 5; - colNr += 5; - - bool quickCloseDummy; - Node *node = new NodeImpl(); - int p2 = parseAttributes(p, node, &quickCloseDummy); - if (p2 < p) - { - delete node; - return p0; - } - p = p2; - - //get the attributes that we need - NamedNodeMap attributes = node->getAttributes(); - Node *attr = attributes.getNamedItem("version"); - if (attr) - document->setXmlVersion(attr->getNodeValue()); - attr = attributes.getNamedItem("encoding"); - if (attr) - { /*document->setXmlEncoding(attr->getNodeValue());*/ } - attr = attributes.getNamedItem("standalone"); - if (attr) - document->setXmlStandalone((attr->getNodeValue() == "yes")); - delete node; - - //#now we should be pointing at '?>' - if (!match(p, "?>")) - { - return p0; - } - - //skip over '?>' - get(p++); - get(p++); - - return p; -} - - -/** - * Parse a <!DOCTYPE> tag. doctype may be null. Expects '<' - * on start. Ends pointing at char after '>' - */ -int XmlReader::parseDoctype(int p0) -{ - int p = p0; - - if (!match(p, "<!DOCTYPE")) - return p0; - - p += 9; - colNr += 9; - - DocumentType *doctype = document->getDoctype(); - if (!doctype) - return p0; - - - //### get the root name of the document - p = skipwhite(p); - DOMString rootName; - int p2 = getWord(p, rootName); - if (p2 <= p) - return p0; - p = p2; - //printf("doctype root '%s'\n", rootName.c_str()); - - - while (p < len) - { - p = skipwhite(p); - if (peek(p) == '>') - break; - else if (peek(p) == '[') //just ignore 'internal' [] stuff - { - while (p < len) - { - int ch = get(p++); - if (ch == ']') - break; - } - p++; - } - else if (match(p, "PUBLIC")) - { - p += 6; - colNr += 6; - p = skipwhite(p); - DOMString pubIdLiteral; - int p2 = getQuoted(p, pubIdLiteral); - if (p2 <= p) - return p0; - p = p2; - p = skipwhite(p); - DOMString systemLiteral; - p2 = getQuoted(p, systemLiteral); - if (p2 <= p) - return p0; - p = p2; - //printf("PUBLIC \"%s\" \"%s\" \n", - // pubIdLiteral.c_str(), systemLiteral.c_str()); - } - else if (match(p, "SYSTEM")) - { - p += 6; - colNr += 6; - p = skipwhite(p); - DOMString systemLiteral; - int p2 = getQuoted(p, systemLiteral); - if (p2 <= p) - return p0; - p = p2; - //printf("SYSTEM \"%s\" \n", systemLiteral.c_str()); - } - } - - - //skip over '>' - get(p++); - - return p; -} - - - -/** - * Expects '<' on startup, ends on char after '>' - */ -int XmlReader::parseComment(int p0, Comment *comment) -{ - int p = p0; - - if (!match(p, "<!--")) - return p0; - - colNr += 4; - p += 4; - - DOMString buf; - - while (p<len-3) - { - if (match(p, "-->")) - { - p += 3; - colNr += 3; - break; - } - int ch = get(p++); - buf.push_back(ch); - } - - comment->setNodeValue(buf); - - return p; -} - - - -/** - * - */ -int XmlReader::parseCDATA(int p0, CDATASection *cdata) -{ - - int p = p0; - - if (!match(p, "<![CDATA[")) - return p0; - - colNr += 9; - p += 9; - - DOMString buf; - - while (p<len) - { - if (match(p, "]]>")) - { - p +=3; - colNr += 3; - break; - } - int ch = get(p++); - buf.push_back(ch); - } - - /*printf("Got CDATA:%s\n",buf.c_str());*/ - cdata->setNodeValue(buf); - - return p; -} - - - -/** - * - */ -int XmlReader::parseText(int p0, Text *text) -{ - - int p = p0; - - DOMString buf; - - while (p<len) - { - if (peek(p) == '&') - { - p = parseEntity(p, buf); - if (p < 0) //error? - return p0; - } - else if (peek(p) == '<') - { - break; - } - else - { - int ch = get(p++); - buf.push_back(ch); - } - } - - /*printf("Got Text:%s\n",buf.c_str());*/ - text->setNodeValue(buf); - - return p; -} - - - - - -/** - * Parses attributes of a node. Should end pointing at either the - * '?' of a version or doctype tag, or a '>' of a normal tag - */ -int XmlReader::parseAttributes(int p0, Node *node, bool *quickClose) -{ - *quickClose = false; - - int p = p0; - - NamedNodeMap attributes; - - while (p<len) - { - /*printf("ch:%c\n",ch);*/ - p = skipwhite(p); - int ch = get(p); - - /*printf("ch:%c\n",ch);*/ - if (ch == '?' || ch == '>')//done - break; - else if (ch=='/' && p<len+1) - { - p++; - p = skipwhite(p); - ch = peek(p); - if (ch == '>') - { - p++; - *quickClose = true; - /*printf("quick close\n");*/ - return p; - } - } - DOMString shortName; - DOMString prefix; - DOMString qualifiedName; - int p2 = getPrefixedWord(p, prefix, shortName, qualifiedName); - if (p2 <= p) - break; - - /*printf("name:%s",buf);*/ - p = p2; - p = skipwhite(p); - ch = get(p); - /*printf("ch:%c\n",ch);*/ - if (ch != '=') - break; - p++; - p = skipwhite(p); - /*ch = parsebuf[p];*/ - /*printf("ch:%c\n",ch);*/ - DOMString attrValue; - p2 = getQuoted(p, attrValue); - p = p2; - /*printf("name:'%s' value:'%s'\n",buf,buf2);*/ - - DOMString namespaceURI = ""; - if (prefix == "xmlns" || shortName == "xmlns") - namespaceURI = XMLNSNAME; - - //## Now let us make the attribute and give it to the node - Attr *attr = document->createAttributeNS(namespaceURI, qualifiedName); - attr->setValue(attrValue); - node->getAttributes().setNamedItemNS(attr); - - }//while p<len - - return p; -} - -/** - * Appends the value of an entity to the buffer - */ -int XmlReader::parseEntity(int p0, DOMString &buf) -{ - int p = p0; - for (EntityInfo *info = entityTable ; info->escape ; info++) - { - if (match(p, info->escape)) - { - p += info->escapeLength; - colNr += info->escapeLength; - buf += info->value; - return p; - } - } - - error("unterminated entity"); - return -1; -} - - -//######################################################################### -//# P A R S E A N O D E -//######################################################################### - -/** - * Parse as a document, preserving the original structure as much as - * possible - */ -int XmlReader::parseNode(int p0, Node *node, int depth) -{ - - int p = p0; - - - //### OPEN TAG - int ch = get(p++); - if (ch != '<') - return p0; - - p = skipwhite(p); - DOMString openTagName; - DOMString openTagNamePrefix; - DOMString openTagQualifiedName; - int p2 = getPrefixedWord(p,openTagNamePrefix, - openTagName, openTagQualifiedName); - if (p2 <= p) - return p0; - p = p2; - p = skipwhite(p); - - //printf("qualifiedName:%s\n", openTagQualifiedName.c_str()); - DOMString namespaceURI = node->lookupNamespaceURI(openTagNamePrefix); - document->renameNode(node, namespaceURI, openTagQualifiedName); - - //### ATTRIBUTES - bool quickClose; - p = parseAttributes(p, node, &quickClose); - if (quickClose) //trivial tag: <name/> - return p; - - p++; //skip over '>' - - - DOMString nodeValue; - - /* ### Get intervening data ### */ - while (p<len && keepGoing) - { - //### COMMENT - if (match(p, "<!--")) - { - Comment *comment = document->createComment(""); - p2 = parseComment(p, comment); - if (p2 <= p) - return p0; - p = p2; - if (parseAsData) - { //throw away - delete comment; - } - else - { - node->appendChild(comment); - } - } - //### VERSION - else if (match(p, "<?xml")) - { - p2 = parseVersion(p); - if (p2 <= p) - return p0; - } - //### DOCTYPE - else if (match(p, "<!DOCTYPE")) - { - p2 = parseDoctype(p); - if (p2 <= p) - return p0; - } - //### CDATA - else if (match(p, "<![CDATA[")) - { - CDATASection *cdata = document->createCDATASection(""); - p2 = parseCDATA(p, cdata); - if (p2 <= p) - return p0; - p = p2; - if (parseAsData) - { - nodeValue += cdata->getNodeValue(); - delete cdata; - } - else - { - node->appendChild(cdata); - } - } - //### OPEN OR CLOSE TAG - else if (peek(p) == '<') - { - p2 = skipwhite(p+1); - if (peek(p2) =='/') - { - p = p2; - break; - } - else - { - /*Add element to tree*/ - Element *elem = document->createElement(""); //fill in name later - node->appendChild(elem); - p2 = parseNode(p, elem, depth+1); - if (p2 <= p) - { - /*printf("problem on element:%ls. p2:%d p:%d\n",n->name, p2, p);*/ - return p0; - } - p = p2; - } - } - //### TEXT - else - { - Text *text = document->createTextNode(""); - p2 = parseText(p, text); - if (p2 <= p) - return p0; - p = p2; - if (parseAsData) - { - nodeValue += text->getNodeValue(); - delete text; - } - else - { - node->appendChild(text); - } - } - - }//while (p<len) - - //printf("%d : nodeValue:'%s'\n", p, nodeValue.c_str()); - trim(nodeValue); - node->setNodeValue(nodeValue); - - //### get close tag. we should be pointing at '/' - p = skipwhite(p); - ch = get(p); - if (ch != '/') - { - error("no / on end tag"); - return p0; - } - p++; - - //### get word after '/' - p = skipwhite(p); - DOMString closeTagName; - DOMString closeTagNamePrefix; - DOMString closeTagQualifiedName; - p = getPrefixedWord(p, closeTagNamePrefix, closeTagName, - closeTagQualifiedName); - if (openTagQualifiedName != closeTagQualifiedName) - { - error("Mismatched closing tag. Expected </%S>. Got '%S'.", - openTagQualifiedName.c_str(), closeTagQualifiedName.c_str()); - return p0; - } - p = skipwhite(p); - if (parsebuf[p] != '>') - { - error("no > on end tag"); - return p0; - } - p++; - /*printf("close element:%ls\n",buf);*/ - return p; -} - - -/** - * - */ -org::w3c::dom::Document * -XmlReader::parse(const DOMString &buf, int bufferOffset, int parseLen) -{ - len = parseLen; - parsebuf = buf; - - DOMImplementationSourceImpl source; - DOMImplementation *domImpl = source.getDOMImplementation(""); - - keepGoing = true; - - document = domImpl->createDocument("", "", NULL); - //document = new svg::SVGDocumentImpl(domImpl, "", "", NULL); - - int p = bufferOffset; - int p2 = 0; - - while (p<len && keepGoing) - { - p = skipwhite(p); - //### COMMENT - if (match(p, "<!--")) - { - Comment *comment = document->createComment(""); - p2 = parseComment(p, comment); - if (p2 <= p) - return document; - p = p2; - if (parseAsData) - { //throw away - delete comment; - } - else - { - document->appendChild(comment); - } - } - //### VERSION - else if (match(p, "<?xml")) - { - p2 = parseVersion(p); - if (p2 <= p) - return document; - p = p2; - } - //### DOCTYPE - else if (match(p, "<!DOCTYPE")) - { - p2 = parseDoctype(p); - if (p2 <= p) - return document; - p = p2; - } - else - { - break; - } - } - - p = skipwhite(p); - p = parseNode(p, document->getDocumentElement(), 0); - - keepGoing = false; - - return document; -} - - -/** - * - */ -org::w3c::dom::Document * -XmlReader::parse(const DOMString &str) -{ - - Document *doc = parse(str, 0, str.size()); - doc->normalizeDocument(); - - return doc; -} - -/** - * - */ -org::w3c::dom::Document * -XmlReader::parseFile(char *fileName) -{ - - DOMString buf = loadFile(fileName); - - Document *doc = parse(buf, 0, buf.size()); - - return doc; -} - - - -//######################################################################### -//# S T R E A M R E A D I N G -//######################################################################### - -/** - * - */ -org::w3c::dom::DOMString -XmlReader::loadFile(char *fileName) -{ - - if (!fileName) - return NULL; - FILE *f = fopen(fileName, "rb"); - if (!f) - return NULL; - - DOMString buf; - while (!feof(f)) - { - int ch = fgetc(f); - if (ch<0) - break; - buf.push_back(ch); - } - fclose(f); - - return buf; -} - - -//######################################################################### -//# C O N S T R U C T O R / D E S T R U C T O R -//######################################################################### - - -/** - * - */ -XmlReader::XmlReader() -{ - len = 0; - lineNr = 1; - colNr = 0; - parseAsData = false; - keepGoing = false; -} - -/** - * - */ -XmlReader::XmlReader(bool parseAsDataArg) -{ - len = 0; - lineNr = 1; - colNr = 0; - parseAsData = parseAsDataArg; - keepGoing = false; -} - - - -/** - * - */ -XmlReader::~XmlReader() -{ -} - - -} //namespace dom -} //namespace w3c -} //namespace org - - -//######################################################################### -//# E N D O F F I L E -//######################################################################### - |
