diff options
| author | Bob Jamison <ishmalius@gmail.com> | 2006-05-21 21:51:51 +0000 |
|---|---|---|
| committer | ishmal <ishmal@users.sourceforge.net> | 2006-05-21 21:51:51 +0000 |
| commit | 12c02a7a645e0bc0a2acdddfe3fdb9911b8c6a61 (patch) | |
| tree | 2ab648845475b57afc3837bf1883b8785861d95e /src/dom/xmlreader.cpp | |
| parent | Check if we are already SSL before trying STARTTLS (diff) | |
| download | inkscape-12c02a7a645e0bc0a2acdddfe3fdb9911b8c6a61.tar.gz inkscape-12c02a7a645e0bc0a2acdddfe3fdb9911b8c6a61.zip | |
Unix-ify the sources
(bzr r928)
Diffstat (limited to 'src/dom/xmlreader.cpp')
| -rw-r--r-- | src/dom/xmlreader.cpp | 1974 |
1 files changed, 987 insertions, 987 deletions
diff --git a/src/dom/xmlreader.cpp b/src/dom/xmlreader.cpp index c36eec961..fbacf48f4 100644 --- a/src/dom/xmlreader.cpp +++ b/src/dom/xmlreader.cpp @@ -1,987 +1,987 @@ -/**
- * Phoebe DOM Implementation.
- *
- * This is a C++ approximation of the W3C DOM model, which follows
- * fairly closely the specifications in the various .idl files, copies of
- * which are provided for reference. Most important is this one:
- *
- * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
- *
- * Authors:
- * Bob Jamison
- *
- * Copyright (C) 2005 Bob Jamison
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-
-
-#include "xmlreader.h"
-#include "charclass.h"
-#include "domimpl.h"
-#include "svg/svgimpl.h"
-
-#include <stdio.h>
-#include <stdarg.h>
-
-namespace org
-{
-namespace w3c
-{
-namespace dom
-{
-
-
-//#########################################################################
-//# E N T I T Y T A B L E
-//#########################################################################
-struct EntityInfo
-{
- char *escape;
- int escapeLength;
- char *value;
-};
-
-
-static EntityInfo entityTable[] =
-{
- { "&" , 5 , "&" },
- { "<" , 4 , "<" },
- { ">" , 4 , ">" },
- { "'" , 6 , "'" },
- { """ , 6 , "\"" },
- { NULL , 0 , "\0" }
-};
-
-
-
-//#########################################################################
-//# M E S S A G E S
-//#########################################################################
-
-
-/**
- *
- */
-void XmlReader::error(char *fmt, ...)
-{
- va_list args;
- fprintf(stderr, "XmlReader:error at line %d, column %d:", lineNr, colNr);
- va_start(args, fmt);
- vfprintf(stderr, fmt, args);
- va_end(args) ;
- fprintf(stderr, "\n");
-}
-
-
-
-//#########################################################################
-//# U T I L I T Y
-//#########################################################################
-
-static void trim(DOMString &str)
-{
- int len = str.size();
- if (len<1)
- return;
-
- int start = 0;
- int end = 0;
- for (start=0 ; start<len ; start++)
- {
- int ch = str[start];
- if (ch<=' ' || ch>126)
- break;
- }
- for (end=len-1 ; end>=0 ; end--)
- {
- int ch = str[end];
- if (ch<=' ' || ch>126)
- break;
- }
- if (start<end)
- {
- str = str.substr(start, end+1);
- }
-}
-
-//#########################################################################
-//# P A R S I N G
-//#########################################################################
-
-/**
- * Get the character at the position and record the fact
- */
-int XmlReader::get(int p)
-{
- if (p >= len)
- return -1;
- int ch = parsebuf[p];
- //printf("%c", ch);
- if (ch == '\n' || ch == '\r')
- {
- colNr = 0;
- lineNr++;
- }
- else
- colNr++;
- return ch;
-}
-
-/**
- * Look at the character at the position, but don't note the fact
- */
-int XmlReader::peek(int p)
-{
- if (p >= len)
- return -1;
- int ch = parsebuf[p];
- return ch;
-}
-
-
-/**
- * Test if the given substring exists at the given position
- * in parsebuf. Use peek() in case of out-of-bounds
- */
-bool XmlReader::match(int pos, char *str)
-{
- while (*str)
- {
- if (peek(pos++) != *str++)
- return false;
- }
- return true;
-}
-
-
-
-/**
- * Test if the given substring exists at the given position
- * in a given buffer
- */
-/*
-static bool bufMatch(const DOMString &buf, int pos, char *str)
-{
- while (*str)
- {
- if (buf[pos++] != *str++)
- return false;
- }
- return true;
-}
-*/
-
-
-/**
- *
- */
-int XmlReader::skipwhite(int p)
-{
- while (p < len)
- {
- int b = get(p);
- if (!isWhitespace(b))
- break;
- p++;
- }
- return p;
-}
-
-/**
- * modify this to allow all chars for an element or attribute name
- */
-int XmlReader::getWord(int p, DOMString &result)
-{
- while (p<len)
- {
- int b = get(p);
- if (b<=' ' || b=='/' || b=='>' || b=='=')
- break;
- result.push_back((XMLCh)b);
- p++;
- }
- return p;
-}
-
-/**
- * get a name and prefix, if any
- */
-int XmlReader::getPrefixedWord(int p, DOMString &prefix,
- DOMString &shortWord, DOMString &fullWord)
-{
- while (p<len)
- {
- int b = get(p);
- if (b<=' ' || b=='/' || b=='>' || b=='=')
- break;
- else if (b == ':')
- {
- prefix = shortWord;
- shortWord = "";
- }
- else
- shortWord.push_back((XMLCh)b);
- p++;
- }
- if (prefix.size() > 0)
- fullWord = prefix + ":" + shortWord;
- else
- fullWord = shortWord;
- return p;
-}
-
-
-/**
- * Assume that we are starting on a quote. Ends on the char
- * after the final '"'
- */
-int XmlReader::getQuoted(int p0, DOMString &result)
-{
-
- int p = p0;
-
- if (peek(p)!='"' && peek(p)!='\'')
- return p0;
-
- int b = get(p++); //go to next char
-
- DOMString buf;
-
- while (p<len )
- {
- b = get(p++);
- if (b=='"' || b=='\'')
- break;
- else if (b=='&')
- {
- p = parseEntity(p, result);
- if (p < 0)
- return p0;
- }
- else
- {
- buf.push_back((XMLCh)b);
- }
- }
-
- //printf("quoted text:'%s'\n", buf.c_str());
-
- result.append(buf);
-
- return p;
-}
-
-
-
-/**
- * Parse a <!xml> tag. Node may be null. Assumes current char is '<'
- * ends on char after '>'
- */
-int XmlReader::parseVersion(int p0)
-{
- int p = p0;
-
- if (!match(p, "<?xml"))
- return p0;
-
- p += 5;
- colNr += 5;
-
- bool quickCloseDummy;
- Node *node = new NodeImpl();
- int p2 = parseAttributes(p, node, &quickCloseDummy);
- if (p2 < p)
- {
- delete node;
- return p0;
- }
- p = p2;
-
- //get the attributes that we need
- NamedNodeMap attributes = node->getAttributes();
- Node *attr = attributes.getNamedItem("version");
- if (attr)
- document->setXmlVersion(attr->getNodeValue());
- attr = attributes.getNamedItem("encoding");
- if (attr)
- { /*document->setXmlEncoding(attr->getNodeValue());*/ }
- attr = attributes.getNamedItem("standalone");
- if (attr)
- document->setXmlStandalone((attr->getNodeValue() == "yes"));
- delete node;
-
- //#now we should be pointing at '?>'
- if (!match(p, "?>"))
- {
- return p0;
- }
-
- //skip over '?>'
- get(p++);
- get(p++);
-
- return p;
-}
-
-
-/**
- * Parse a <!DOCTYPE> tag. doctype may be null. Expects '<'
- * on start. Ends pointing at char after '>'
- */
-int XmlReader::parseDoctype(int p0)
-{
- int p = p0;
-
- if (!match(p, "<!DOCTYPE"))
- return p0;
-
- p += 9;
- colNr += 9;
-
- DocumentType *doctype = document->getDoctype();
- if (!doctype)
- return p0;
-
-
- //### get the root name of the document
- p = skipwhite(p);
- DOMString rootName;
- int p2 = getWord(p, rootName);
- if (p2 <= p)
- return p0;
- p = p2;
- //printf("doctype root '%s'\n", rootName.c_str());
-
-
- while (p < len)
- {
- p = skipwhite(p);
- if (peek(p) == '>')
- break;
- else if (peek(p) == '[') //just ignore 'internal' [] stuff
- {
- while (p < len)
- {
- int ch = get(p++);
- if (ch == ']')
- break;
- }
- p++;
- }
- else if (match(p, "PUBLIC"))
- {
- p += 6;
- colNr += 6;
- p = skipwhite(p);
- DOMString pubIdLiteral;
- int p2 = getQuoted(p, pubIdLiteral);
- if (p2 <= p)
- return p0;
- p = p2;
- p = skipwhite(p);
- DOMString systemLiteral;
- p2 = getQuoted(p, systemLiteral);
- if (p2 <= p)
- return p0;
- p = p2;
- //printf("PUBLIC \"%s\" \"%s\" \n",
- // pubIdLiteral.c_str(), systemLiteral.c_str());
- }
- else if (match(p, "SYSTEM"))
- {
- p += 6;
- colNr += 6;
- p = skipwhite(p);
- DOMString systemLiteral;
- int p2 = getQuoted(p, systemLiteral);
- if (p2 <= p)
- return p0;
- p = p2;
- //printf("SYSTEM \"%s\" \n", systemLiteral.c_str());
- }
- }
-
-
- //skip over '>'
- get(p++);
-
- return p;
-}
-
-
-
-/**
- * Expects '<' on startup, ends on char after '>'
- */
-int XmlReader::parseComment(int p0, Comment *comment)
-{
- int p = p0;
-
- if (!match(p, "<!--"))
- return p0;
-
- colNr += 4;
- p += 4;
-
- DOMString buf;
-
- while (p<len-3)
- {
- if (match(p, "-->"))
- {
- p += 3;
- colNr += 3;
- break;
- }
- int ch = get(p++);
- buf.push_back((XMLCh)ch);
- }
-
- comment->setNodeValue(buf);
-
- return p;
-}
-
-
-
-/**
- *
- */
-int XmlReader::parseCDATA(int p0, CDATASection *cdata)
-{
-
- int p = p0;
-
- if (!match(p, "<![CDATA["))
- return p0;
-
- colNr += 9;
- p += 9;
-
- DOMString buf;
-
- while (p<len)
- {
- if (match(p, "]]>"))
- {
- p +=3;
- colNr += 3;
- break;
- }
- int ch = get(p++);
- buf.push_back((XMLCh)ch);
- }
-
- /*printf("Got CDATA:%s\n",buf.c_str());*/
- cdata->setNodeValue(buf);
-
- return p;
-}
-
-
-
-/**
- *
- */
-int XmlReader::parseText(int p0, Text *text)
-{
-
- int p = p0;
-
- DOMString buf;
-
- while (p<len)
- {
- if (peek(p) == '&')
- {
- p = parseEntity(p, buf);
- if (p < 0) //error?
- return p0;
- }
- else if (peek(p) == '<')
- {
- break;
- }
- else
- {
- int ch = get(p++);
- buf.push_back((XMLCh)ch);
- }
- }
-
- /*printf("Got Text:%s\n",buf.c_str());*/
- text->setNodeValue(buf);
-
- return p;
-}
-
-
-
-
-
-/**
- * Parses attributes of a node. Should end pointing at either the
- * '?' of a version or doctype tag, or a '>' of a normal tag
- */
-int XmlReader::parseAttributes(int p0, Node *node, bool *quickClose)
-{
- *quickClose = false;
-
- int p = p0;
-
- NamedNodeMap attributes;
-
- while (p<len)
- {
- /*printf("ch:%c\n",ch);*/
- p = skipwhite(p);
- int ch = get(p);
-
- /*printf("ch:%c\n",ch);*/
- if (ch == '?' || ch == '>')//done
- break;
- else if (ch=='/' && p<len+1)
- {
- p++;
- p = skipwhite(p);
- ch = peek(p);
- if (ch == '>')
- {
- p++;
- *quickClose = true;
- /*printf("quick close\n");*/
- return p;
- }
- }
- DOMString shortName;
- DOMString prefix;
- DOMString qualifiedName;
- int p2 = getPrefixedWord(p, prefix, shortName, qualifiedName);
- if (p2 <= p)
- break;
-
- /*printf("name:%s",buf);*/
- p = p2;
- p = skipwhite(p);
- ch = get(p);
- /*printf("ch:%c\n",ch);*/
- if (ch != '=')
- break;
- p++;
- p = skipwhite(p);
- /*ch = parsebuf[p];*/
- /*printf("ch:%c\n",ch);*/
- DOMString attrValue;
- p2 = getQuoted(p, attrValue);
- p = p2;
- /*printf("name:'%s' value:'%s'\n",buf,buf2);*/
-
- DOMString namespaceURI = "";
- if (prefix == "xmlns" || shortName == "xmlns")
- namespaceURI = XMLNSNAME;
-
- //## Now let us make the attribute and give it to the node
- Attr *attr = document->createAttributeNS(namespaceURI, qualifiedName);
- attr->setValue(attrValue);
- node->getAttributes().setNamedItemNS(attr);
-
- }//while p<len
-
- return p;
-}
-
-/**
- * Appends the value of an entity to the buffer
- */
-int XmlReader::parseEntity(int p0, DOMString &buf)
-{
- int p = p0;
- for (EntityInfo *info = entityTable ; info->escape ; info++)
- {
- if (match(p, info->escape))
- {
- p += info->escapeLength;
- colNr += info->escapeLength;
- buf += info->value;
- return p;
- }
- }
-
- error("unterminated entity");
- return -1;
-}
-
-
-//#########################################################################
-//# P A R S E A N O D E
-//#########################################################################
-
-/**
- * Parse as a document, preserving the original structure as much as
- * possible
- */
-int XmlReader::parseNode(int p0, Node *node, int depth)
-{
-
- int p = p0;
-
-
- //### OPEN TAG
- int ch = get(p++);
- if (ch != '<')
- return p0;
-
- p = skipwhite(p);
- DOMString openTagName;
- DOMString openTagNamePrefix;
- DOMString openTagQualifiedName;
- int p2 = getPrefixedWord(p,openTagNamePrefix,
- openTagName, openTagQualifiedName);
- if (p2 <= p)
- return p0;
- p = p2;
- p = skipwhite(p);
-
- //printf("qualifiedName:%s\n", openTagQualifiedName.c_str());
- DOMString namespaceURI = node->lookupNamespaceURI(openTagNamePrefix);
- document->renameNode(node, namespaceURI, openTagQualifiedName);
-
- //### ATTRIBUTES
- bool quickClose;
- p = parseAttributes(p, node, &quickClose);
- if (quickClose) //trivial tag: <name/>
- return p;
-
- p++; //skip over '>'
-
-
- DOMString nodeValue;
-
- /* ### Get intervening data ### */
- while (p<len && keepGoing)
- {
- //### COMMENT
- if (match(p, "<!--"))
- {
- Comment *comment = document->createComment("");
- p2 = parseComment(p, comment);
- if (p2 <= p)
- return p0;
- p = p2;
- if (parseAsData)
- { //throw away
- delete comment;
- }
- else
- {
- node->appendChild(comment);
- }
- }
- //### VERSION
- else if (match(p, "<?xml"))
- {
- p2 = parseVersion(p);
- if (p2 <= p)
- return p0;
- }
- //### DOCTYPE
- else if (match(p, "<!DOCTYPE"))
- {
- p2 = parseDoctype(p);
- if (p2 <= p)
- return p0;
- }
- //### CDATA
- else if (match(p, "<![CDATA["))
- {
- CDATASection *cdata = document->createCDATASection("");
- p2 = parseCDATA(p, cdata);
- if (p2 <= p)
- return p0;
- p = p2;
- if (parseAsData)
- {
- nodeValue += cdata->getNodeValue();
- delete cdata;
- }
- else
- {
- node->appendChild(cdata);
- }
- }
- //### OPEN OR CLOSE TAG
- else if (peek(p) == '<')
- {
- p2 = skipwhite(p+1);
- if (peek(p2) =='/')
- {
- p = p2;
- break;
- }
- else
- {
- /*Add element to tree*/
- Element *elem = document->createElement(""); //fill in name later
- node->appendChild(elem);
- p2 = parseNode(p, elem, depth+1);
- if (p2 <= p)
- {
- /*printf("problem on element:%ls. p2:%d p:%d\n",n->name, p2, p);*/
- return p0;
- }
- p = p2;
- }
- }
- //### TEXT
- else
- {
- Text *text = document->createTextNode("");
- p2 = parseText(p, text);
- if (p2 <= p)
- return p0;
- p = p2;
- if (parseAsData)
- {
- nodeValue += text->getNodeValue();
- delete text;
- }
- else
- {
- node->appendChild(text);
- }
- }
-
- }//while (p<len)
-
- //printf("%d : nodeValue:'%s'\n", p, nodeValue.c_str());
- trim(nodeValue);
- node->setNodeValue(nodeValue);
-
- //### get close tag. we should be pointing at '/'
- p = skipwhite(p);
- ch = get(p);
- if (ch != '/')
- {
- error("no / on end tag");
- return p0;
- }
- p++;
-
- //### get word after '/'
- p = skipwhite(p);
- DOMString closeTagName;
- DOMString closeTagNamePrefix;
- DOMString closeTagQualifiedName;
- p = getPrefixedWord(p, closeTagNamePrefix, closeTagName,
- closeTagQualifiedName);
- if (openTagQualifiedName != closeTagQualifiedName)
- {
- error("Mismatched closing tag. Expected </%S>. Got '%S'.",
- openTagQualifiedName.c_str(), closeTagQualifiedName.c_str());
- return p0;
- }
- p = skipwhite(p);
- if (parsebuf[p] != '>')
- {
- error("no > on end tag");
- return p0;
- }
- p++;
- /*printf("close element:%ls\n",buf);*/
- return p;
-}
-
-
-/**
- *
- */
-org::w3c::dom::Document *
-XmlReader::parse(const DOMString &buf, int bufferOffset, int parseLen)
-{
- len = parseLen;
- parsebuf = buf;
-
- DOMImplementationSourceImpl source;
- DOMImplementation *domImpl = source.getDOMImplementation("");
-
- keepGoing = true;
-
- document = domImpl->createDocument("", "", NULL);
- //document = new svg::SVGDocumentImpl(domImpl, "", "", NULL);
-
- int p = bufferOffset;
- int p2 = 0;
-
- while (p<len && keepGoing)
- {
- p = skipwhite(p);
- //### COMMENT
- if (match(p, "<!--"))
- {
- Comment *comment = document->createComment("");
- p2 = parseComment(p, comment);
- if (p2 <= p)
- return document;
- p = p2;
- if (parseAsData)
- { //throw away
- delete comment;
- }
- else
- {
- document->appendChild(comment);
- }
- }
- //### VERSION
- else if (match(p, "<?xml"))
- {
- p2 = parseVersion(p);
- if (p2 <= p)
- return document;
- p = p2;
- }
- //### DOCTYPE
- else if (match(p, "<!DOCTYPE"))
- {
- p2 = parseDoctype(p);
- if (p2 <= p)
- return document;
- p = p2;
- }
- else
- {
- break;
- }
- }
-
- p = skipwhite(p);
- p = parseNode(p, document->getDocumentElement(), 0);
-
- keepGoing = false;
-
- return document;
-}
-
-
-/**
- *
- */
-org::w3c::dom::Document *
-XmlReader::parse(const DOMString &str)
-{
-
- Document *doc = parse(str, 0, str.size());
- doc->normalizeDocument();
-
- return doc;
-}
-
-/**
- *
- */
-org::w3c::dom::Document *
-XmlReader::parseFile(char *fileName)
-{
-
- DOMString buf = loadFile(fileName);
-
- Document *doc = parse(buf, 0, buf.size());
-
- return doc;
-}
-
-
-
-//#########################################################################
-//# S T R E A M R E A D I N G
-//#########################################################################
-
-/**
- *
- */
-org::w3c::dom::DOMString
-XmlReader::loadFile(char *fileName)
-{
-
- if (!fileName)
- return NULL;
- FILE *f = fopen(fileName, "rb");
- if (!f)
- return NULL;
-
- DOMString buf;
- while (!feof(f))
- {
- int ch = fgetc(f);
- if (ch<0)
- break;
- buf.push_back((XMLCh)ch);
- }
- fclose(f);
-
- return buf;
-}
-
-
-//#########################################################################
-//# C O N S T R U C T O R / D E S T R U C T O R
-//#########################################################################
-
-
-/**
- *
- */
-XmlReader::XmlReader()
-{
- len = 0;
- lineNr = 1;
- colNr = 0;
- parseAsData = false;
- keepGoing = false;
-}
-
-/**
- *
- */
-XmlReader::XmlReader(bool parseAsDataArg)
-{
- len = 0;
- lineNr = 1;
- colNr = 0;
- parseAsData = parseAsDataArg;
- keepGoing = false;
-}
-
-
-
-/**
- *
- */
-XmlReader::~XmlReader()
-{
-}
-
-
-} //namespace dom
-} //namespace w3c
-} //namespace org
-
-
-//#########################################################################
-//# E N D O F F I L E
-//#########################################################################
-
+/** + * Phoebe DOM Implementation. + * + * This is a C++ approximation of the W3C DOM model, which follows + * fairly closely the specifications in the various .idl files, copies of + * which are provided for reference. Most important is this one: + * + * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html + * + * Authors: + * Bob Jamison + * + * Copyright (C) 2005 Bob Jamison + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + + +#include "xmlreader.h" +#include "charclass.h" +#include "domimpl.h" +#include "svg/svgimpl.h" + +#include <stdio.h> +#include <stdarg.h> + +namespace org +{ +namespace w3c +{ +namespace dom +{ + + +//######################################################################### +//# E N T I T Y T A B L E +//######################################################################### +struct EntityInfo +{ + char *escape; + int escapeLength; + char *value; +}; + + +static EntityInfo entityTable[] = +{ + { "&" , 5 , "&" }, + { "<" , 4 , "<" }, + { ">" , 4 , ">" }, + { "'" , 6 , "'" }, + { """ , 6 , "\"" }, + { NULL , 0 , "\0" } +}; + + + +//######################################################################### +//# M E S S A G E S +//######################################################################### + + +/** + * + */ +void XmlReader::error(char *fmt, ...) +{ + va_list args; + fprintf(stderr, "XmlReader:error at line %d, column %d:", lineNr, colNr); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args) ; + fprintf(stderr, "\n"); +} + + + +//######################################################################### +//# U T I L I T Y +//######################################################################### + +static void trim(DOMString &str) +{ + int len = str.size(); + if (len<1) + return; + + int start = 0; + int end = 0; + for (start=0 ; start<len ; start++) + { + int ch = str[start]; + if (ch<=' ' || ch>126) + break; + } + for (end=len-1 ; end>=0 ; end--) + { + int ch = str[end]; + if (ch<=' ' || ch>126) + break; + } + if (start<end) + { + str = str.substr(start, end+1); + } +} + +//######################################################################### +//# P A R S I N G +//######################################################################### + +/** + * Get the character at the position and record the fact + */ +int XmlReader::get(int p) +{ + if (p >= len) + return -1; + int ch = parsebuf[p]; + //printf("%c", ch); + if (ch == '\n' || ch == '\r') + { + colNr = 0; + lineNr++; + } + else + colNr++; + return ch; +} + +/** + * Look at the character at the position, but don't note the fact + */ +int XmlReader::peek(int p) +{ + if (p >= len) + return -1; + int ch = parsebuf[p]; + return ch; +} + + +/** + * Test if the given substring exists at the given position + * in parsebuf. Use peek() in case of out-of-bounds + */ +bool XmlReader::match(int pos, char *str) +{ + while (*str) + { + if (peek(pos++) != *str++) + return false; + } + return true; +} + + + +/** + * Test if the given substring exists at the given position + * in a given buffer + */ +/* +static bool bufMatch(const DOMString &buf, int pos, char *str) +{ + while (*str) + { + if (buf[pos++] != *str++) + return false; + } + return true; +} +*/ + + +/** + * + */ +int XmlReader::skipwhite(int p) +{ + while (p < len) + { + int b = get(p); + if (!isWhitespace(b)) + break; + p++; + } + return p; +} + +/** + * modify this to allow all chars for an element or attribute name + */ +int XmlReader::getWord(int p, DOMString &result) +{ + while (p<len) + { + int b = get(p); + if (b<=' ' || b=='/' || b=='>' || b=='=') + break; + result.push_back((XMLCh)b); + p++; + } + return p; +} + +/** + * get a name and prefix, if any + */ +int XmlReader::getPrefixedWord(int p, DOMString &prefix, + DOMString &shortWord, DOMString &fullWord) +{ + while (p<len) + { + int b = get(p); + if (b<=' ' || b=='/' || b=='>' || b=='=') + break; + else if (b == ':') + { + prefix = shortWord; + shortWord = ""; + } + else + shortWord.push_back((XMLCh)b); + p++; + } + if (prefix.size() > 0) + fullWord = prefix + ":" + shortWord; + else + fullWord = shortWord; + return p; +} + + +/** + * Assume that we are starting on a quote. Ends on the char + * after the final '"' + */ +int XmlReader::getQuoted(int p0, DOMString &result) +{ + + int p = p0; + + if (peek(p)!='"' && peek(p)!='\'') + return p0; + + int b = get(p++); //go to next char + + DOMString buf; + + while (p<len ) + { + b = get(p++); + if (b=='"' || b=='\'') + break; + else if (b=='&') + { + p = parseEntity(p, result); + if (p < 0) + return p0; + } + else + { + buf.push_back((XMLCh)b); + } + } + + //printf("quoted text:'%s'\n", buf.c_str()); + + result.append(buf); + + return p; +} + + + +/** + * Parse a <!xml> tag. Node may be null. Assumes current char is '<' + * ends on char after '>' + */ +int XmlReader::parseVersion(int p0) +{ + int p = p0; + + if (!match(p, "<?xml")) + return p0; + + p += 5; + colNr += 5; + + bool quickCloseDummy; + Node *node = new NodeImpl(); + int p2 = parseAttributes(p, node, &quickCloseDummy); + if (p2 < p) + { + delete node; + return p0; + } + p = p2; + + //get the attributes that we need + NamedNodeMap attributes = node->getAttributes(); + Node *attr = attributes.getNamedItem("version"); + if (attr) + document->setXmlVersion(attr->getNodeValue()); + attr = attributes.getNamedItem("encoding"); + if (attr) + { /*document->setXmlEncoding(attr->getNodeValue());*/ } + attr = attributes.getNamedItem("standalone"); + if (attr) + document->setXmlStandalone((attr->getNodeValue() == "yes")); + delete node; + + //#now we should be pointing at '?>' + if (!match(p, "?>")) + { + return p0; + } + + //skip over '?>' + get(p++); + get(p++); + + return p; +} + + +/** + * Parse a <!DOCTYPE> tag. doctype may be null. Expects '<' + * on start. Ends pointing at char after '>' + */ +int XmlReader::parseDoctype(int p0) +{ + int p = p0; + + if (!match(p, "<!DOCTYPE")) + return p0; + + p += 9; + colNr += 9; + + DocumentType *doctype = document->getDoctype(); + if (!doctype) + return p0; + + + //### get the root name of the document + p = skipwhite(p); + DOMString rootName; + int p2 = getWord(p, rootName); + if (p2 <= p) + return p0; + p = p2; + //printf("doctype root '%s'\n", rootName.c_str()); + + + while (p < len) + { + p = skipwhite(p); + if (peek(p) == '>') + break; + else if (peek(p) == '[') //just ignore 'internal' [] stuff + { + while (p < len) + { + int ch = get(p++); + if (ch == ']') + break; + } + p++; + } + else if (match(p, "PUBLIC")) + { + p += 6; + colNr += 6; + p = skipwhite(p); + DOMString pubIdLiteral; + int p2 = getQuoted(p, pubIdLiteral); + if (p2 <= p) + return p0; + p = p2; + p = skipwhite(p); + DOMString systemLiteral; + p2 = getQuoted(p, systemLiteral); + if (p2 <= p) + return p0; + p = p2; + //printf("PUBLIC \"%s\" \"%s\" \n", + // pubIdLiteral.c_str(), systemLiteral.c_str()); + } + else if (match(p, "SYSTEM")) + { + p += 6; + colNr += 6; + p = skipwhite(p); + DOMString systemLiteral; + int p2 = getQuoted(p, systemLiteral); + if (p2 <= p) + return p0; + p = p2; + //printf("SYSTEM \"%s\" \n", systemLiteral.c_str()); + } + } + + + //skip over '>' + get(p++); + + return p; +} + + + +/** + * Expects '<' on startup, ends on char after '>' + */ +int XmlReader::parseComment(int p0, Comment *comment) +{ + int p = p0; + + if (!match(p, "<!--")) + return p0; + + colNr += 4; + p += 4; + + DOMString buf; + + while (p<len-3) + { + if (match(p, "-->")) + { + p += 3; + colNr += 3; + break; + } + int ch = get(p++); + buf.push_back((XMLCh)ch); + } + + comment->setNodeValue(buf); + + return p; +} + + + +/** + * + */ +int XmlReader::parseCDATA(int p0, CDATASection *cdata) +{ + + int p = p0; + + if (!match(p, "<![CDATA[")) + return p0; + + colNr += 9; + p += 9; + + DOMString buf; + + while (p<len) + { + if (match(p, "]]>")) + { + p +=3; + colNr += 3; + break; + } + int ch = get(p++); + buf.push_back((XMLCh)ch); + } + + /*printf("Got CDATA:%s\n",buf.c_str());*/ + cdata->setNodeValue(buf); + + return p; +} + + + +/** + * + */ +int XmlReader::parseText(int p0, Text *text) +{ + + int p = p0; + + DOMString buf; + + while (p<len) + { + if (peek(p) == '&') + { + p = parseEntity(p, buf); + if (p < 0) //error? + return p0; + } + else if (peek(p) == '<') + { + break; + } + else + { + int ch = get(p++); + buf.push_back((XMLCh)ch); + } + } + + /*printf("Got Text:%s\n",buf.c_str());*/ + text->setNodeValue(buf); + + return p; +} + + + + + +/** + * Parses attributes of a node. Should end pointing at either the + * '?' of a version or doctype tag, or a '>' of a normal tag + */ +int XmlReader::parseAttributes(int p0, Node *node, bool *quickClose) +{ + *quickClose = false; + + int p = p0; + + NamedNodeMap attributes; + + while (p<len) + { + /*printf("ch:%c\n",ch);*/ + p = skipwhite(p); + int ch = get(p); + + /*printf("ch:%c\n",ch);*/ + if (ch == '?' || ch == '>')//done + break; + else if (ch=='/' && p<len+1) + { + p++; + p = skipwhite(p); + ch = peek(p); + if (ch == '>') + { + p++; + *quickClose = true; + /*printf("quick close\n");*/ + return p; + } + } + DOMString shortName; + DOMString prefix; + DOMString qualifiedName; + int p2 = getPrefixedWord(p, prefix, shortName, qualifiedName); + if (p2 <= p) + break; + + /*printf("name:%s",buf);*/ + p = p2; + p = skipwhite(p); + ch = get(p); + /*printf("ch:%c\n",ch);*/ + if (ch != '=') + break; + p++; + p = skipwhite(p); + /*ch = parsebuf[p];*/ + /*printf("ch:%c\n",ch);*/ + DOMString attrValue; + p2 = getQuoted(p, attrValue); + p = p2; + /*printf("name:'%s' value:'%s'\n",buf,buf2);*/ + + DOMString namespaceURI = ""; + if (prefix == "xmlns" || shortName == "xmlns") + namespaceURI = XMLNSNAME; + + //## Now let us make the attribute and give it to the node + Attr *attr = document->createAttributeNS(namespaceURI, qualifiedName); + attr->setValue(attrValue); + node->getAttributes().setNamedItemNS(attr); + + }//while p<len + + return p; +} + +/** + * Appends the value of an entity to the buffer + */ +int XmlReader::parseEntity(int p0, DOMString &buf) +{ + int p = p0; + for (EntityInfo *info = entityTable ; info->escape ; info++) + { + if (match(p, info->escape)) + { + p += info->escapeLength; + colNr += info->escapeLength; + buf += info->value; + return p; + } + } + + error("unterminated entity"); + return -1; +} + + +//######################################################################### +//# P A R S E A N O D E +//######################################################################### + +/** + * Parse as a document, preserving the original structure as much as + * possible + */ +int XmlReader::parseNode(int p0, Node *node, int depth) +{ + + int p = p0; + + + //### OPEN TAG + int ch = get(p++); + if (ch != '<') + return p0; + + p = skipwhite(p); + DOMString openTagName; + DOMString openTagNamePrefix; + DOMString openTagQualifiedName; + int p2 = getPrefixedWord(p,openTagNamePrefix, + openTagName, openTagQualifiedName); + if (p2 <= p) + return p0; + p = p2; + p = skipwhite(p); + + //printf("qualifiedName:%s\n", openTagQualifiedName.c_str()); + DOMString namespaceURI = node->lookupNamespaceURI(openTagNamePrefix); + document->renameNode(node, namespaceURI, openTagQualifiedName); + + //### ATTRIBUTES + bool quickClose; + p = parseAttributes(p, node, &quickClose); + if (quickClose) //trivial tag: <name/> + return p; + + p++; //skip over '>' + + + DOMString nodeValue; + + /* ### Get intervening data ### */ + while (p<len && keepGoing) + { + //### COMMENT + if (match(p, "<!--")) + { + Comment *comment = document->createComment(""); + p2 = parseComment(p, comment); + if (p2 <= p) + return p0; + p = p2; + if (parseAsData) + { //throw away + delete comment; + } + else + { + node->appendChild(comment); + } + } + //### VERSION + else if (match(p, "<?xml")) + { + p2 = parseVersion(p); + if (p2 <= p) + return p0; + } + //### DOCTYPE + else if (match(p, "<!DOCTYPE")) + { + p2 = parseDoctype(p); + if (p2 <= p) + return p0; + } + //### CDATA + else if (match(p, "<![CDATA[")) + { + CDATASection *cdata = document->createCDATASection(""); + p2 = parseCDATA(p, cdata); + if (p2 <= p) + return p0; + p = p2; + if (parseAsData) + { + nodeValue += cdata->getNodeValue(); + delete cdata; + } + else + { + node->appendChild(cdata); + } + } + //### OPEN OR CLOSE TAG + else if (peek(p) == '<') + { + p2 = skipwhite(p+1); + if (peek(p2) =='/') + { + p = p2; + break; + } + else + { + /*Add element to tree*/ + Element *elem = document->createElement(""); //fill in name later + node->appendChild(elem); + p2 = parseNode(p, elem, depth+1); + if (p2 <= p) + { + /*printf("problem on element:%ls. p2:%d p:%d\n",n->name, p2, p);*/ + return p0; + } + p = p2; + } + } + //### TEXT + else + { + Text *text = document->createTextNode(""); + p2 = parseText(p, text); + if (p2 <= p) + return p0; + p = p2; + if (parseAsData) + { + nodeValue += text->getNodeValue(); + delete text; + } + else + { + node->appendChild(text); + } + } + + }//while (p<len) + + //printf("%d : nodeValue:'%s'\n", p, nodeValue.c_str()); + trim(nodeValue); + node->setNodeValue(nodeValue); + + //### get close tag. we should be pointing at '/' + p = skipwhite(p); + ch = get(p); + if (ch != '/') + { + error("no / on end tag"); + return p0; + } + p++; + + //### get word after '/' + p = skipwhite(p); + DOMString closeTagName; + DOMString closeTagNamePrefix; + DOMString closeTagQualifiedName; + p = getPrefixedWord(p, closeTagNamePrefix, closeTagName, + closeTagQualifiedName); + if (openTagQualifiedName != closeTagQualifiedName) + { + error("Mismatched closing tag. Expected </%S>. Got '%S'.", + openTagQualifiedName.c_str(), closeTagQualifiedName.c_str()); + return p0; + } + p = skipwhite(p); + if (parsebuf[p] != '>') + { + error("no > on end tag"); + return p0; + } + p++; + /*printf("close element:%ls\n",buf);*/ + return p; +} + + +/** + * + */ +org::w3c::dom::Document * +XmlReader::parse(const DOMString &buf, int bufferOffset, int parseLen) +{ + len = parseLen; + parsebuf = buf; + + DOMImplementationSourceImpl source; + DOMImplementation *domImpl = source.getDOMImplementation(""); + + keepGoing = true; + + document = domImpl->createDocument("", "", NULL); + //document = new svg::SVGDocumentImpl(domImpl, "", "", NULL); + + int p = bufferOffset; + int p2 = 0; + + while (p<len && keepGoing) + { + p = skipwhite(p); + //### COMMENT + if (match(p, "<!--")) + { + Comment *comment = document->createComment(""); + p2 = parseComment(p, comment); + if (p2 <= p) + return document; + p = p2; + if (parseAsData) + { //throw away + delete comment; + } + else + { + document->appendChild(comment); + } + } + //### VERSION + else if (match(p, "<?xml")) + { + p2 = parseVersion(p); + if (p2 <= p) + return document; + p = p2; + } + //### DOCTYPE + else if (match(p, "<!DOCTYPE")) + { + p2 = parseDoctype(p); + if (p2 <= p) + return document; + p = p2; + } + else + { + break; + } + } + + p = skipwhite(p); + p = parseNode(p, document->getDocumentElement(), 0); + + keepGoing = false; + + return document; +} + + +/** + * + */ +org::w3c::dom::Document * +XmlReader::parse(const DOMString &str) +{ + + Document *doc = parse(str, 0, str.size()); + doc->normalizeDocument(); + + return doc; +} + +/** + * + */ +org::w3c::dom::Document * +XmlReader::parseFile(char *fileName) +{ + + DOMString buf = loadFile(fileName); + + Document *doc = parse(buf, 0, buf.size()); + + return doc; +} + + + +//######################################################################### +//# S T R E A M R E A D I N G +//######################################################################### + +/** + * + */ +org::w3c::dom::DOMString +XmlReader::loadFile(char *fileName) +{ + + if (!fileName) + return NULL; + FILE *f = fopen(fileName, "rb"); + if (!f) + return NULL; + + DOMString buf; + while (!feof(f)) + { + int ch = fgetc(f); + if (ch<0) + break; + buf.push_back((XMLCh)ch); + } + fclose(f); + + return buf; +} + + +//######################################################################### +//# C O N S T R U C T O R / D E S T R U C T O R +//######################################################################### + + +/** + * + */ +XmlReader::XmlReader() +{ + len = 0; + lineNr = 1; + colNr = 0; + parseAsData = false; + keepGoing = false; +} + +/** + * + */ +XmlReader::XmlReader(bool parseAsDataArg) +{ + len = 0; + lineNr = 1; + colNr = 0; + parseAsData = parseAsDataArg; + keepGoing = false; +} + + + +/** + * + */ +XmlReader::~XmlReader() +{ +} + + +} //namespace dom +} //namespace w3c +} //namespace org + + +//######################################################################### +//# E N D O F F I L E +//######################################################################### + |
