/** * Phoebe DOM Implementation. * * This is a C++ approximation of the W3C DOM model, which follows * fairly closely the specifications in the various .idl files, copies of * which are provided for reference. Most important is this one: * * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html * * Authors: * Bob Jamison * * Copyright (C) 2005-2007 Bob Jamison * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "uri.h" #include "charclass.h" #include #include #include namespace org { namespace w3c { namespace dom { typedef struct { int ival; char const *sval; int port; } LookupEntry; static LookupEntry schemes[] = { { URI::SCHEME_DATA, "data:", 0 }, { URI::SCHEME_HTTP, "http:", 80 }, { URI::SCHEME_HTTPS, "https:", 443 }, { URI::SCHEME_FTP, "ftp", 12 }, { URI::SCHEME_FILE, "file:", 0 }, { URI::SCHEME_LDAP, "ldap:", 123 }, { URI::SCHEME_MAILTO, "mailto:", 25 }, { URI::SCHEME_NEWS, "news:", 117 }, { URI::SCHEME_TELNET, "telnet:", 23 }, { 0, NULL, 0 } }; //######################################################################### //# C O N S T R U C T O R //######################################################################### /** * */ URI::URI() { init(); } /** * */ URI::URI(const DOMString &str) { init(); parse(str); } /** * */ URI::URI(const char *str) { init(); DOMString domStr = str; parse(domStr); } /** * */ URI::URI(const URI &other) { init(); assign(other); } /** * */ URI &URI::operator=(const URI &other) { init(); assign(other); return *this; } /** * */ URI::~URI() { } /** * */ void URI::init() { parsebuf = NULL; parselen = 0; scheme = SCHEME_NONE; schemeStr.clear(); port = 0; authority.clear(); path.clear(); absolute = false; opaque = false; query.clear(); fragment.clear(); } /** * */ void URI::assign(const URI &other) { scheme = other.scheme; schemeStr = other.schemeStr; authority = other.authority; port = other.port; path = other.path; absolute = other.absolute; opaque = other.opaque; query = other.query; fragment = other.fragment; } //######################################################################### //#A T T R I B U T E S //######################################################################### static char *hexChars = "0123456789abcdef"; static DOMString toStr(const std::vector &arr) { DOMString buf; std::vector::const_iterator iter; for (iter=arr.begin() ; iter!=arr.end() ; iter++) { int ch = *iter; if (isprint(ch)) buf.push_back((XMLCh)ch); else { buf.push_back('%'); int hi = ((ch>>4) & 0xf); buf.push_back(hexChars[hi]); int lo = ((ch ) & 0xf); buf.push_back(hexChars[lo]); } } return buf; } DOMString URI::toString() const { DOMString str = schemeStr; if (authority.size() > 0) { str.append("//"); str.append(toStr(authority)); } str.append(toStr(path)); if (query.size() > 0) { str.append("?"); str.append(toStr(query)); } if (fragment.size() > 0) { str.append("#"); str.append(toStr(fragment)); } return str; } int URI::getScheme() const { return scheme; } DOMString URI::getSchemeStr() const { return schemeStr; } DOMString URI::getAuthority() const { DOMString ret = toStr(authority); if (portSpecified && port>=0) { char buf[7]; snprintf(buf, 6, ":%6d", port); ret.append(buf); } return ret; } DOMString URI::getHost() const { DOMString str = toStr(authority); return str; } int URI::getPort() const { return port; } DOMString URI::getPath() const { DOMString str = toStr(path); return str; } DOMString URI::getNativePath() const { DOMString pathStr = toStr(path); DOMString npath; #ifdef __WIN32__ unsigned int firstChar = 0; if (pathStr.size() >= 3) { if (pathStr[0] == '/' && isLetter(pathStr[1]) && pathStr[2] == ':') firstChar++; } for (unsigned int i=firstChar ; i &str, int ch, int startpos) { for (unsigned int i = startpos ; i < str.size() ; i++) { if (ch == str[i]) return i; } return -1; } static int findLast(const std::vector &str, int ch) { for (unsigned int i = str.size()-1 ; i>=0 ; i--) { if (ch == str[i]) return i; } return -1; } static bool sequ(const std::vector &str, char *key) { char *c = key; for (unsigned int i=0 ; i substr(const std::vector &str, int startpos, int len) { std::vector buf; unsigned int pos = startpos; for (int i=0 ; i= str.size()) break; buf.push_back(str[pos++]); } return buf; } URI URI::resolve(const URI &other) const { //### According to w3c, this is handled in 3 cases //## 1 if (opaque || other.isAbsolute()) return other; //## 2 if (other.fragment.size() > 0 && other.path.size() == 0 && other.scheme == SCHEME_NONE && other.authority.size() == 0 && other.query.size() == 0 ) { URI fragUri = *this; fragUri.fragment = other.fragment; return fragUri; } //## 3 http://www.ietf.org/rfc/rfc2396.txt, section 5.2 URI newUri; //# 3.1 newUri.scheme = scheme; newUri.schemeStr = schemeStr; newUri.query = other.query; newUri.fragment = other.fragment; if (other.authority.size() > 0) { //# 3.2 if (absolute || other.absolute) newUri.absolute = true; newUri.authority = other.authority; newUri.port = other.port;//part of authority newUri.path = other.path; } else { //# 3.3 if (other.absolute) { newUri.absolute = true; newUri.path = other.path; } else { int pos = findLast(path, '/'); if (pos >= 0) { newUri.path.clear(); //# append my path up to and including the '/' for (int i = 0; i<=pos ; i++) newUri.path.push_back(path[i]); //# append other path for (unsigned int i = 0; i > segments; //## Collect segments if (path.size()<2) return; bool abs = false; int pos=0; int len = (int) path.size(); if (path[0]=='/') { abs = true; pos++; } while (pos < len) { int pos2 = find(path, '/', pos); if (pos2 < 0) { std::vector seg = substr(path, pos, path.size()-pos); //printf("last segment:%s\n", toStr(seg).c_str()); segments.push_back(seg); break; } if (pos2>pos) { std::vector seg = substr(path, pos, pos2-pos); //printf("segment:%s\n", toStr(seg).c_str()); segments.push_back(seg); } pos = pos2; pos++; } //## Clean up (normalize) segments bool edited = false; std::vector< std::vector >::iterator iter; for (iter=segments.begin() ; iter!=segments.end() ; ) { std::vector s = *iter; if (sequ(s,".")) { iter = segments.erase(iter); edited = true; } else if (sequ(s, "..") && iter != segments.begin() && !sequ(*(iter-1), "..")) { iter--; //back up, then erase two entries iter = segments.erase(iter); iter = segments.erase(iter); edited = true; } else iter++; } //## Rebuild path, if necessary if (edited) { path.clear(); if (abs) { path.push_back('/'); } std::vector< std::vector >::iterator iter; for (iter=segments.begin() ; iter!=segments.end() ; iter++) { if (iter != segments.begin()) path.push_back('/'); std::vector seg = *iter; for (unsigned int i = 0; i=parselen) return -1; return parsebuf[p]; } int URI::match(int p0, char const *key) { int p = p0; while (p < parselen) { if (*key == '\0') return p; else if (*key != parsebuf[p]) break; p++; key++; } return p0; } //######################################################################### //# Parsing is performed according to: //# http://www.gbiv.com/protocols/uri/rfc/rfc3986.html#components //######################################################################### int URI::parseHex(int p0, int &result) { int p = p0; int val = 0; //# Upper 4 int ch = peek(p); if (ch >= '0' && ch <= '9') val += (ch - '0'); else if (ch >= 'a' && ch <= 'f') val += (10 + ch - 'a'); else if (ch >= 'A' && ch <= 'F') val += (10 + ch - 'A'); else { error("parseHex : unexpected character : %c", ch); return -1; } p++; val <<= 4; //# Lower 4 ch = peek(p); if (ch >= '0' && ch <= '9') val += (ch - '0'); else if (ch >= 'a' && ch <= 'f') val += (10 + ch - 'a'); else if (ch >= 'A' && ch <= 'F') val += (10 + ch - 'A'); else { error("parseHex : unexpected character : %c", ch); return -1; } p++; result = val; return p; } int URI::parseEntity(int p0, int &result) { int p = p0; int ch = peek(p); if (ch != '&') return p0; p++; if (!match(p, "#x")) { error("parseEntity: expected '#x'"); return -1; } p += 2; int val; p = parseHex(p, val); if (p<0) return -1; ch = peek(p); if (ch != ';') { error("parseEntity: expected ';'"); return -1; } p++; result = val; return p; } int URI::parseAsciiEntity(int p0, int &result) { int p = p0; int ch = peek(p); if (ch != '%') return p0; p++; int val; p = parseHex(p, val); if (p<0) return -1; result = val; return p; } int URI::parseScheme(int p0) { int p = p0; for (LookupEntry *entry = schemes; entry->sval ; entry++) { int p2 = match(p, entry->sval); if (p2 > p) { schemeStr = entry->sval; scheme = entry->ival; port = entry->port; p = p2; return p; } } return p; } int URI::parseHierarchicalPart(int p0) { int p = p0; int ch; //# Authority field (host and port, for example) int p2 = match(p, "//"); if (p2 > p) { p = p2; portSpecified = false; DOMString portStr; while (p < parselen) { ch = peek(p); if (ch == '/') break; else if (ch == '&') //IRI entity { int val; p2 = parseEntity(p, val); if (p2 0) { char *pstr = (char *)portStr.c_str(); char *endStr; long val = strtol(pstr, &endStr, 10); if (endStr > pstr) //successful parse? port = val; } } //# Are we absolute? ch = peek(p); if (isLetter(ch) && peek(p+1)==':') { absolute = true; path.push_back((XMLCh)'/'); } else if (ch == '/') { absolute = true; if (p>p0) //in other words, if '/' is not the first char opaque = true; path.push_back((XMLCh)ch); p++; } while (p < parselen) { ch = peek(p); if (ch == '?' || ch == '#') break; else if (ch == '&') //IRI entity { int val; p2 = parseEntity(p, val); if (p2