1 files changed, 782 insertions, 0 deletions
diff --git a/src/pedro/pedrodom.cpp b/src/pedro/pedrodom.cpp
new file mode 100644
index 000000000..5ac4a61d0
--- /dev/null
+++ b/src/pedro/pedrodom.cpp
@@ -0,0 +1,782 @@
+/*
+ * Implementation of the Pedro mini-DOM parser and tree
+ *
+ * Authors:
+ *   Bob Jamison
+ *
+ * Copyright (C) 2005 Bob Jamison
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <malloc.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "pedrodom.h"
+
+namespace Pedro
+{
+
+
+
+//########################################################################
+//# E L E M E N T
+//########################################################################
+
+Element *Element::clone()
+{
+    Element *elem = new Element(name, value);
+    elem->parent     = parent;
+    elem->attributes = attributes;
+    elem->namespaces = namespaces;
+
+    std::vector<Element *>::iterator iter;
+    for (iter = children.begin(); iter != children.end() ; iter++)
+        {
+        elem->addChild((*iter)->clone());
+        }
+    return elem;
+}
+
+
+void Element::findElementsRecursive(std::vector<Element *>&res, const DOMString &name)
+{
+    if (getName() == name)
+        {
+        res.push_back(this);
+        }
+    for (unsigned int i=0; i<children.size() ; i++)
+        children[i]->findElementsRecursive(res, name);
+}
+
+std::vector<Element *> Element::findElements(const DOMString &name)
+{
+    std::vector<Element *> res;
+    findElementsRecursive(res, name);
+    return res;
+}
+
+DOMString Element::getAttribute(const DOMString &name)
+{
+    for (unsigned int i=0 ; i<attributes.size() ; i++)
+        if (attributes[i].getName() ==name)
+            return attributes[i].getValue();
+    return "";
+}
+
+DOMString Element::getTagAttribute(const DOMString &tagName, const DOMString &attrName)
+{
+    std::vector<Element *>elems = findElements(tagName);
+    if (elems.size() <1)
+        return "";
+    DOMString res = elems[0]->getAttribute(attrName);
+    return res;
+}
+
+DOMString Element::getTagValue(const DOMString &tagName)
+{
+    std::vector<Element *>elems = findElements(tagName);
+    if (elems.size() <1)
+        return "";
+    DOMString res = elems[0]->getValue();
+    return res;
+}
+
+void Element::addChild(Element *child)
+{
+    if (!child)
+        return;
+    child->parent = this;
+    children.push_back(child);
+}
+
+
+void Element::addAttribute(const DOMString &name, const DOMString &value)
+{
+    Attribute attr(name, value);
+    attributes.push_back(attr);
+}
+
+void Element::addNamespace(const DOMString &prefix, const DOMString &namespaceURI)
+{
+    Namespace ns(prefix, namespaceURI);
+    namespaces.push_back(ns);
+}
+
+void Element::writeIndentedRecursive(FILE *f, int indent)
+{
+    int i;
+    if (!f)
+        return;
+    //Opening tag, and attributes
+    for (i=0;i<indent;i++)
+        fputc(' ',f);
+    fprintf(f,"<%s",name.c_str());
+    for (unsigned int i=0 ; i<attributes.size() ; i++)
+        {
+        fprintf(f," %s=\"%s\"",
+              attributes[i].getName().c_str(),
+              attributes[i].getValue().c_str());
+        }
+    for (unsigned int i=0 ; i<namespaces.size() ; i++)
+        {
+        fprintf(f," xmlns:%s=\"%s\"",
+              namespaces[i].getPrefix().c_str(),
+              namespaces[i].getNamespaceURI().c_str());
+        }
+    fprintf(f,">\n");
+
+    //Between the tags
+    if (value.size() > 0)
+        {
+        for (int i=0;i<indent;i++)
+            fputc(' ', f);
+        fprintf(f," %s\n", value.c_str());
+        }
+
+    for (unsigned int i=0 ; i<children.size() ; i++)
+        children[i]->writeIndentedRecursive(f, indent+2);
+
+    //Closing tag
+    for (int i=0; i<indent; i++)
+        fputc(' ',f);
+    fprintf(f,"</%s>\n", name.c_str());
+}
+
+void Element::writeIndented(FILE *f)
+{
+    writeIndentedRecursive(f, 0);
+}
+
+void Element::print()
+{
+    writeIndented(stdout);
+}
+
+
+//########################################################################
+//# P A R S E R
+//########################################################################
+
+
+
+typedef struct
+    {
+    char *escaped;
+    char value;
+    } EntityEntry;
+
+static EntityEntry entities[] =
+{
+    { "&amp;" , '&'  },
+    { "&lt;"  , '<'  },
+    { "&gt;"  , '>'  },
+    { "&apos;", '\'' },
+    { "&quot;", '"'  },
+    { NULL    , '\0' }
+};
+
+
+
+void Parser::getLineAndColumn(long pos, long *lineNr, long *colNr)
+{
+    long line = 1;
+    long col  = 1;
+    for (long i=0 ; i<pos ; i++)
+        {
+        XMLCh ch = parsebuf[i];
+        if (ch == '\n' || ch == '\r')
+            {
+            col = 0;
+            line ++;
+            }
+        else
+            col++;
+        }
+    *lineNr = line;
+    *colNr  = col;
+
+}
+
+
+void Parser::error(char *fmt, ...)
+{
+    long lineNr;
+    long colNr;
+    getLineAndColumn(currentPosition, &lineNr, &colNr);
+    va_list args;
+    fprintf(stderr, "xml error at line %ld, column %ld:", lineNr, colNr);
+    va_start(args,fmt);
+    vfprintf(stderr,fmt,args);
+    va_end(args) ;
+    fprintf(stderr, "\n");
+}
+
+
+
+int Parser::peek(long pos)
+{
+    if (pos >= parselen)
+        return -1;
+    currentPosition = pos;
+    int ch = parsebuf[pos];
+    //printf("ch:%c\n", ch);
+    return ch;
+}
+
+
+
+DOMString Parser::encode(const DOMString &str)
+{
+    DOMString ret;
+    for (unsigned int i=0 ; i<str.size() ; i++)
+        {
+        XMLCh ch = (XMLCh)str[i];
+        if (ch == '&')
+            ret.append("&amp;");
+        else if (ch == '<')
+            ret.append("&lt;");
+        else if (ch == '>')
+            ret.append("&gt;");
+        else if (ch == '\'')
+            ret.append("&apos;");
+        else if (ch == '"')
+            ret.append("&quot;");
+        else
+            ret.push_back(ch);
+
+        }
+    return ret;
+}
+
+
+int Parser::match(long p0, const char *text)
+{
+    int p = p0;
+    while (*text)
+        {
+        if (peek(p) != *text)
+            return p0;
+        p++; text++;
+        }
+    return p;
+}
+
+
+
+int Parser::skipwhite(long p)
+{
+
+    while (p<parselen)
+        {
+        int p2 = match(p, "<!--");
+        if (p2 > p)
+            {
+            p = p2;
+            while (p<parselen)
+              {
+              p2 = match(p, "-->");
+              if (p2 > p)
+                  {
+                  p = p2;
+                  break;
+                  }
+              p++;
+              }
+          }
+      XMLCh b = peek(p);
+      if (!isspace(b))
+          break;
+      p++;
+      }
+  return p;
+}
+
+/* modify this to allow all chars for an element or attribute name*/
+int Parser::getWord(int p0, DOMString &buf)
+{
+    int p = p0;
+    while (p<parselen)
+        {
+        XMLCh b = peek(p);
+        if (b<=' ' || b=='/' || b=='>' || b=='=')
+            break;
+        buf.push_back(b);
+        p++;
+        }
+    return p;
+}
+
+int Parser::getQuoted(int p0, DOMString &buf, int do_i_parse)
+{
+
+    int p = p0;
+    if (peek(p) != '"' && peek(p) != '\'')
+        return p0;
+    p++;
+
+    while ( p<parselen )
+        {
+        XMLCh b = peek(p);
+        if (b=='"' || b=='\'')
+            break;
+        if (b=='&' && do_i_parse)
+            {
+            bool found = false;
+            for (EntityEntry *ee = entities ; ee->value ; ee++)
+                {
+                int p2 = match(p, ee->escaped);
+                if (p2>p)
+                    {
+                    buf.push_back(ee->value);
+                    p = p2;
+                    found = true;
+                    break;
+                    }
+                }
+            if (!found)
+                {
+                error("unterminated entity");
+                return false;
+                }
+            }
+        else
+            {
+            buf.push_back(b);
+            p++;
+            }
+        }
+    return p;
+}
+
+int Parser::parseVersion(int p0)
+{
+    //printf("### parseVersion: %d\n", p0);
+
+    int p = p0;
+
+    p = skipwhite(p0);
+
+    if (peek(p) != '<')
+        return p0;
+
+    p++;
+    if (p>=parselen || peek(p)!='?')
+        return p0;
+
+    p++;
+
+    DOMString buf;
+
+    while (p<parselen)
+        {
+        XMLCh ch = peek(p);
+        if (ch=='?')
+            {
+            p++;
+            break;
+            }
+        buf.push_back(ch);
+        p++;
+        }
+
+    if (peek(p) != '>')
+        return p0;
+    p++;
+
+    //printf("Got version:%s\n",buf.c_str());
+    return p;
+}
+
+int Parser::parseDoctype(int p0)
+{
+    //printf("### parseDoctype: %d\n", p0);
+
+    int p = p0;
+    p = skipwhite(p);
+
+    if (p>=parselen || peek(p)!='<')
+        return p0;
+
+    p++;
+
+    if (peek(p)!='!' || peek(p+1)=='-')
+        return p0;
+    p++;
+
+    DOMString buf;
+    while (p<parselen)
+        {
+        XMLCh ch = peek(p);
+        if (ch=='>')
+            {
+            p++;
+            break;
+            }
+        buf.push_back(ch);
+        p++;
+        }
+
+    //printf("Got doctype:%s\n",buf.c_str());
+    return p;
+}
+
+int Parser::parseElement(int p0, Element *par,int depth)
+{
+
+    int p = p0;
+
+    int p2 = p;
+
+    p = skipwhite(p);
+
+    //## Get open tag
+    XMLCh ch = peek(p);
+    if (ch!='<')
+        return p0;
+
+    p++;
+
+    DOMString openTagName;
+    p = skipwhite(p);
+    p = getWord(p, openTagName);
+    //printf("####tag :%s\n", openTagName.c_str());
+    p = skipwhite(p);
+
+    //Add element to tree
+    Element *n = new Element(openTagName);
+    n->parent = par;
+    par->addChild(n);
+
+    // Get attributes
+    if (peek(p) != '>')
+        {
+        while (p<parselen)
+            {
+            p = skipwhite(p);
+            ch = peek(p);
+            //printf("ch:%c\n",ch);
+            if (ch=='>')
+                break;
+            else if (ch=='/' && p<parselen+1)
+                {
+                p++;
+                p = skipwhite(p);
+                ch = peek(p);
+                if (ch=='>')
+                    {
+                    p++;
+                    //printf("quick close\n");
+                    return p;
+                    }
+                }
+            DOMString attrName;
+            p2 = getWord(p, attrName);
+            if (p2==p)
+                break;
+            //printf("name:%s",buf);
+            p=p2;
+            p = skipwhite(p);
+            ch = peek(p);
+            //printf("ch:%c\n",ch);
+            if (ch!='=')
+                break;
+            p++;
+            p = skipwhite(p);
+            // ch = parsebuf[p];
+            // printf("ch:%c\n",ch);
+            DOMString attrVal;
+            p2 = getQuoted(p, attrVal, true);
+            p=p2+1;
+            //printf("name:'%s'   value:'%s'\n",attrName.c_str(),attrVal.c_str());
+            char *namestr = (char *)attrName.c_str();
+            if (strncmp(namestr, "xmlns:", 6)==0)
+                n->addNamespace(attrName, attrVal);
+            else
+                n->addAttribute(attrName, attrVal);
+            }
+        }
+
+    bool cdata = false;
+
+    p++;
+    // ### Get intervening data ### */
+    DOMString data;
+    while (p<parselen)
+        {
+        //# COMMENT
+        p2 = match(p, "<!--");
+        if (!cdata && p2>p)
+            {
+            p = p2;
+            while (p<parselen)
+                {
+                p2 = match(p, "-->");
+                if (p2 > p)
+                    {
+                    p = p2;
+                    break;
+                    }
+                p++;
+                }
+            }
+
+        ch = peek(p);
+        //# END TAG
+        if (ch=='<' && !cdata && peek(p+1)=='/')
+            {
+            break;
+            }
+        //# CDATA
+        p2 = match(p, "<![CDATA[");
+        if (p2 > p)
+            {
+            cdata = true;
+            p = p2;
+            continue;
+            }
+
+        //# CHILD ELEMENT
+        if (ch == '<')
+            {
+            p2 = parseElement(p, n, depth+1);
+            if (p2 == p)
+                {
+                /*
+                printf("problem on element:%s.  p2:%d p:%d\n",
+                      openTagName.c_str(), p2, p);
+                */
+                return p0;
+                }
+            p = p2;
+            continue;
+            }
+        //# ENTITY
+        if (ch=='&' && !cdata)
+            {
+            bool found = false;
+            for (EntityEntry *ee = entities ; ee->value ; ee++)
+                {
+                int p2 = match(p, ee->escaped);
+                if (p2>p)
+                    {
+                    data.push_back(ee->value);
+                    p = p2;
+                    found = true;
+                    break;
+                    }
+                }
+            if (!found)
+                {
+                error("unterminated entity");
+                return -1;
+                }
+            continue;
+            }
+
+        //# NONE OF THE ABOVE
+        data.push_back(ch);
+        p++;
+        }/*while*/
+
+
+    n->value = data;
+    //printf("%d : data:%s\n",p,data.c_str());
+
+    //## Get close tag
+    p = skipwhite(p);
+    ch = peek(p);
+    if (ch != '<')
+        {
+        error("no < for end tag\n");
+        return p0;
+        }
+    p++;
+    ch = peek(p);
+    if (ch != '/')
+        {
+        error("no / on end tag");
+        return p0;
+        }
+    p++;
+    ch = peek(p);
+    p = skipwhite(p);
+    DOMString closeTagName;
+    p = getWord(p, closeTagName);
+    if (openTagName != closeTagName)
+        {
+        error("Mismatched closing tag.  Expected </%S>. Got '%S'.",
+                openTagName.c_str(), closeTagName.c_str());
+        return p0;
+        }
+    p = skipwhite(p);
+    if (peek(p) != '>')
+        {
+        error("no > on end tag for '%s'", closeTagName.c_str());
+        return p0;
+        }
+    p++;
+    // printf("close element:%s\n",closeTagName.c_str());
+    p = skipwhite(p);
+    return p;
+}
+
+
+
+
+Element *Parser::parse(XMLCh *buf,int pos,int len)
+{
+    parselen = len;
+    parsebuf = buf;
+    Element *rootNode = new Element("root");
+    pos = parseVersion(pos);
+    pos = parseDoctype(pos);
+    pos = parseElement(pos, rootNode, 0);
+    return rootNode;
+}
+
+
+Element *Parser::parse(const char *buf, int pos, int len)
+{
+
+    XMLCh *charbuf = (XMLCh *)malloc((len+1) * sizeof(XMLCh));
+    long i = 0;
+    while (i< len)
+        {
+        charbuf[i] = (XMLCh)buf[i];
+        i++;
+        }
+    charbuf[i] = '\0';
+    Element *n = parse(charbuf, 0, len);
+    free(charbuf);
+    return n;
+}
+
+Element *Parser::parse(const DOMString &buf)
+{
+    long len = buf.size();
+    XMLCh *charbuf = (XMLCh *)malloc((len+1) * sizeof(XMLCh));
+    long i = 0;
+    while (i< len)
+        {
+        charbuf[i] = (XMLCh)buf[i];
+        i++;
+        }
+    charbuf[i] = '\0';
+    Element *n = parse(charbuf, 0, len);
+    free(charbuf);
+    return n;
+}
+
+Element *Parser::parseFile(const char *fileName)
+{
+
+    //##### LOAD INTO A CHAR BUF, THEN CONVERT TO XMLCh
+    if (!fileName)
+        return NULL;
+
+    FILE *f = fopen(fileName, "rb");
+    if (!f)
+        return NULL;
+
+    struct stat   statBuf;
+    if (fstat(fileno(f),&statBuf)<0)
+        {
+        fclose(f);
+        return NULL;
+        }
+    long filelen = statBuf.st_size;
+
+    //printf("length:%d\n",filelen);
+    XMLCh *charbuf = (XMLCh *)malloc((filelen+1) * sizeof(XMLCh));
+    for (XMLCh *p=charbuf ; !feof(f) ; p++)
+        {
+        *p = (XMLCh)fgetc(f);
+        }
+    fclose(f);
+    charbuf[filelen] = '\0';
+
+
+    /*
+    printf("nrbytes:%d\n",wc_count);
+    printf("buf:%ls\n======\n",charbuf);
+    */
+    Element *n = parse(charbuf, 0, filelen);
+    free(charbuf);
+    return n;
+}
+
+
+
+
+
+
+
+}//namespace Pedro
+
+#if 0
+//########################################################################
+//#  T E S T
+//########################################################################
+
+bool doTest(char *fileName)
+{
+    Pedro::Parser parser;
+
+    Pedro::Element *elem = parser.parseFile(fileName);
+
+    if (!elem)
+        {
+        printf("Parsing failed\n");
+        return false;
+        }
+
+    elem->print();
+
+    delete elem;
+
+    return true;
+}
+
+
+
+int main(int argc, char **argv)
+{
+    if (argc != 2)
+        {
+        printf("usage: %s <xmlfile>\n", argv[0]);
+        return 1;
+        }
+
+    if (!doTest(argv[1]))
+        return 1;
+
+    return 0;
+}
+
+#endif
+
+//########################################################################
+//#  E N D    O F    F I L E
+//########################################################################
+
+