From 6f19b8463ee061eb119280a67afbfd17c78cfeef Mon Sep 17 00:00:00 2001 From: Martin Owens Date: Wed, 11 Sep 2013 10:20:53 -0400 Subject: Introduce a backup loading method that enables NOENT but hacks out system calls for adobe files Fixed bugs: - https://launchpad.net/bugs/166371 (bzr r12505.1.1) --- src/xml/repr-io.cpp | 48 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 12 deletions(-) (limited to 'src/xml') diff --git a/src/xml/repr-io.cpp b/src/xml/repr-io.cpp index af47779fe..364ff826a 100644 --- a/src/xml/repr-io.cpp +++ b/src/xml/repr-io.cpp @@ -100,6 +100,9 @@ public: int setFile( char const * filename ); + xmlDocPtr readXml(); + bool SystemCheck; // Checks for SYSTEM Entities + static int readCb( void * context, char * buffer, int len ); static int closeCb( void * context ); @@ -121,6 +124,7 @@ int XmlSource::setFile(char const *filename) { int retVal = -1; + this->SystemCheck = false; this->filename = filename; fp = Inkscape::IO::fopen_utf8name(filename, "r"); @@ -178,6 +182,18 @@ int XmlSource::setFile(char const *filename) return retVal; } +xmlDocPtr XmlSource::readXml() +{ + int parse_options = XML_PARSE_HUGE; // do not use XML_PARSE_NOENT ! see bug lp:1025185 + + Inkscape::Preferences *prefs = Inkscape::Preferences::get(); + bool allowNetAccess = prefs->getBool("/options/externalresources/xml/allow_net_access", false); + if (!allowNetAccess) parse_options |= XML_PARSE_NONET; + if (SystemCheck) parse_options |= XML_PARSE_NOENT; + + return xmlReadIO( readCb, closeCb, this, + filename, getEncoding(), parse_options); +} int XmlSource::readCb( void * context, char * buffer, int len ) { @@ -185,6 +201,15 @@ int XmlSource::readCb( void * context, char * buffer, int len ) if ( context ) { XmlSource* self = static_cast(context); retVal = self->read( buffer, len ); + + if(self->SystemCheck) { + // Check for ENTITY SYSTEM entry and kill with fire + char *system = strstr(buffer, "SYSTEM"); + while (system != NULL) { + strncpy (system," ",6); + system = strstr(buffer, "SYSTEM"); + } + } } return retVal; } @@ -299,22 +324,21 @@ Document *sp_repr_read_file (const gchar * filename, const gchar *default_ns) XmlSource src; if ( (src.setFile(filename) == 0) ) { - int parse_options = XML_PARSE_HUGE; // do not use XML_PARSE_NOENT ! see bug lp:1025185 - Inkscape::Preferences *prefs = Inkscape::Preferences::get(); - bool allowNetAccess = prefs->getBool("/options/externalresources/xml/allow_net_access", false); - if (!allowNetAccess) { - parse_options |= XML_PARSE_NONET; + doc = src.readXml(); + rdoc = sp_repr_do_read( doc, default_ns ); + // For some reason, failed ns loading results in this + // We try a system check version of load with NOENT for adobe + if(rdoc && strcmp(rdoc->root()->name(), "ns:svg") == 0) { + xmlFreeDoc( doc ); + src.setFile(filename); + src.SystemCheck = true; + doc = src.readXml(); + rdoc = sp_repr_do_read( doc, default_ns ); } - doc = xmlReadIO( XmlSource::readCb, - XmlSource::closeCb, - &src, - localFilename, - src.getEncoding(), - parse_options); } } - rdoc = sp_repr_do_read( doc, default_ns ); + if ( doc ) { xmlFreeDoc( doc ); } -- cgit v1.2.3 From f5b0c43114862ecd240d4ac78fe265e159873b14 Mon Sep 17 00:00:00 2001 From: Martin Owens Date: Thu, 12 Sep 2013 13:37:52 -0400 Subject: =?UTF-8?q?Improve=20code=20with=20Kosi=C5=84ski's=20regex=20repla?= =?UTF-8?q?cement?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (bzr r12505.1.2) --- src/xml/repr-io.cpp | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'src/xml') diff --git a/src/xml/repr-io.cpp b/src/xml/repr-io.cpp index 364ff826a..d5088f3cf 100644 --- a/src/xml/repr-io.cpp +++ b/src/xml/repr-io.cpp @@ -203,12 +203,16 @@ int XmlSource::readCb( void * context, char * buffer, int len ) retVal = self->read( buffer, len ); if(self->SystemCheck) { - // Check for ENTITY SYSTEM entry and kill with fire - char *system = strstr(buffer, "SYSTEM"); - while (system != NULL) { - strncpy (system," ",6); - system = strstr(buffer, "SYSTEM"); - } + // Check for ENTITY SYSTEM cdata and kill with fire, does + // Break svg files who use entities for ns and system entities. + GRegex *entity_regex = g_regex_new( + "\\s]+\\s+SYSTEM\\s+\"[^>\"]+\"\\s*>", + G_REGEX_CASELESS, G_REGEX_MATCH_NEWLINE_ANY, NULL); + gchar *fixed_buffer = g_regex_replace( + entity_regex, buffer, len, 0, "", + G_REGEX_MATCH_NEWLINE_ANY, NULL); + g_regex_unref(entity_regex); + buffer = fixed_buffer; } } return retVal; @@ -974,12 +978,12 @@ void sp_repr_write_stream_element( Node * repr, Writer & out, GQuark const absref_key = g_quark_from_static_string("sodipodi:absref"); gchar const *xxHref = 0; - gchar const *xxAbsref = 0; + //gchar const *xxAbsref = 0; for ( List ai(attributes); ai; ++ai ) { if ( ai->key == href_key ) { xxHref = ai->value; - } else if ( ai->key == absref_key ) { - xxAbsref = ai->value; + //} else if ( ai->key == absref_key ) { + //xxAbsref = ai->value; } } -- cgit v1.2.3 From 58453047cf80b311511ff36b6b1f11c42eabbee6 Mon Sep 17 00:00:00 2001 From: Martin Owens Date: Thu, 12 Sep 2013 17:06:20 -0400 Subject: Fix regex so it effects the buffer and ban PUBLIC entities too. (bzr r12505.1.3) --- src/xml/repr-io.cpp | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'src/xml') diff --git a/src/xml/repr-io.cpp b/src/xml/repr-io.cpp index d5088f3cf..579c7598c 100644 --- a/src/xml/repr-io.cpp +++ b/src/xml/repr-io.cpp @@ -184,11 +184,13 @@ int XmlSource::setFile(char const *filename) xmlDocPtr XmlSource::readXml() { - int parse_options = XML_PARSE_HUGE; // do not use XML_PARSE_NOENT ! see bug lp:1025185 + int parse_options = XML_PARSE_HUGE | XML_PARSE_RECOVER; Inkscape::Preferences *prefs = Inkscape::Preferences::get(); bool allowNetAccess = prefs->getBool("/options/externalresources/xml/allow_net_access", false); if (!allowNetAccess) parse_options |= XML_PARSE_NONET; + + // Allow NOENT only if we're filtering out SYSTEM and PUBLIC entities if (SystemCheck) parse_options |= XML_PARSE_NOENT; return xmlReadIO( readCb, closeCb, this, @@ -198,21 +200,34 @@ xmlDocPtr XmlSource::readXml() int XmlSource::readCb( void * context, char * buffer, int len ) { int retVal = -1; + if ( context ) { XmlSource* self = static_cast(context); retVal = self->read( buffer, len ); if(self->SystemCheck) { - // Check for ENTITY SYSTEM cdata and kill with fire, does - // Break svg files who use entities for ns and system entities. - GRegex *entity_regex = g_regex_new( - "\\s]+\\s+SYSTEM\\s+\"[^>\"]+\"\\s*>", + GMatchInfo *info; + gint start, end; + + GRegex *regex = g_regex_new( + "\\s]+\\s+(SYSTEM|PUBLIC\\s+\"[^>\"]+\")\\s+\"[^>\"]+\"\\s*>", G_REGEX_CASELESS, G_REGEX_MATCH_NEWLINE_ANY, NULL); - gchar *fixed_buffer = g_regex_replace( - entity_regex, buffer, len, 0, "", - G_REGEX_MATCH_NEWLINE_ANY, NULL); - g_regex_unref(entity_regex); - buffer = fixed_buffer; + + // Check for SYSTEM or PUBLIC entities and kill them with spaces + // Note: g_regex_replace does not modify buffer in place, this + // logic is used instead because we can just blank out the offending + // charicters in the right place without hurting the length. + g_regex_match (regex, buffer, G_REGEX_MATCH_NEWLINE_ANY, &info); + + while (g_match_info_matches (info)) { + if (g_match_info_fetch_pos (info, 1, &start, &end)) { + for (int x=start; x