From 58453047cf80b311511ff36b6b1f11c42eabbee6 Mon Sep 17 00:00:00 2001 From: Martin Owens Date: Thu, 12 Sep 2013 17:06:20 -0400 Subject: Fix regex so it effects the buffer and ban PUBLIC entities too. (bzr r12505.1.3) --- src/xml/repr-io.cpp | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'src/xml/repr-io.cpp') diff --git a/src/xml/repr-io.cpp b/src/xml/repr-io.cpp index d5088f3cf..579c7598c 100644 --- a/src/xml/repr-io.cpp +++ b/src/xml/repr-io.cpp @@ -184,11 +184,13 @@ int XmlSource::setFile(char const *filename) xmlDocPtr XmlSource::readXml() { - int parse_options = XML_PARSE_HUGE; // do not use XML_PARSE_NOENT ! see bug lp:1025185 + int parse_options = XML_PARSE_HUGE | XML_PARSE_RECOVER; Inkscape::Preferences *prefs = Inkscape::Preferences::get(); bool allowNetAccess = prefs->getBool("/options/externalresources/xml/allow_net_access", false); if (!allowNetAccess) parse_options |= XML_PARSE_NONET; + + // Allow NOENT only if we're filtering out SYSTEM and PUBLIC entities if (SystemCheck) parse_options |= XML_PARSE_NOENT; return xmlReadIO( readCb, closeCb, this, @@ -198,21 +200,34 @@ xmlDocPtr XmlSource::readXml() int XmlSource::readCb( void * context, char * buffer, int len ) { int retVal = -1; + if ( context ) { XmlSource* self = static_cast(context); retVal = self->read( buffer, len ); if(self->SystemCheck) { - // Check for ENTITY SYSTEM cdata and kill with fire, does - // Break svg files who use entities for ns and system entities. - GRegex *entity_regex = g_regex_new( - "\\s]+\\s+SYSTEM\\s+\"[^>\"]+\"\\s*>", + GMatchInfo *info; + gint start, end; + + GRegex *regex = g_regex_new( + "\\s]+\\s+(SYSTEM|PUBLIC\\s+\"[^>\"]+\")\\s+\"[^>\"]+\"\\s*>", G_REGEX_CASELESS, G_REGEX_MATCH_NEWLINE_ANY, NULL); - gchar *fixed_buffer = g_regex_replace( - entity_regex, buffer, len, 0, "", - G_REGEX_MATCH_NEWLINE_ANY, NULL); - g_regex_unref(entity_regex); - buffer = fixed_buffer; + + // Check for SYSTEM or PUBLIC entities and kill them with spaces + // Note: g_regex_replace does not modify buffer in place, this + // logic is used instead because we can just blank out the offending + // charicters in the right place without hurting the length. + g_regex_match (regex, buffer, G_REGEX_MATCH_NEWLINE_ANY, &info); + + while (g_match_info_matches (info)) { + if (g_match_info_fetch_pos (info, 1, &start, &end)) { + for (int x=start; x