From 0209eb25e9f59de797926b90954d33c75d363c69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Hojtsy?= <gabor@hojtsy.hu> Date: Fri, 28 May 2010 11:29:39 +0000 Subject: [PATCH] #222926 by deviantintegral, sun, jcnventura, tic2000, jenlampton, smk-ka: htmlcorrector filter escapes HTML comments --- modules/filter/filter.module | 107 +++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 42 deletions(-) diff --git a/modules/filter/filter.module b/modules/filter/filter.module index 7385a25611b5..9646b1d1ced3 100644 --- a/modules/filter/filter.module +++ b/modules/filter/filter.module @@ -783,10 +783,10 @@ function _filter_htmlcorrector($text) { } // Properly entify angles. - $text = preg_replace('!<([^a-zA-Z/])!', '<\1', $text); + $text = preg_replace('@<(?=[^a-zA-Z!/]|$)@', '<', $text); // Split tags from text. - $split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE); + $split = preg_split('/<(!--.*?--|[^>]+?)>/s', $text, -1, PREG_SPLIT_DELIM_CAPTURE); // Note: PHP ensures the array consists of alternating delimiters and literals // and begins and ends with a literal (inserting $null as required). @@ -796,37 +796,43 @@ function _filter_htmlcorrector($text) { foreach ($split as $value) { // Process HTML tags. if ($tag) { - list($tagname) = explode(' ', strtolower($value), 2); - // Closing tag - if ($tagname{0} == '/') { - $tagname = substr($tagname, 1); - // Discard XHTML closing tags for single use tags. - if (!isset($single_use[$tagname])) { - // See if we possibly have a matching opening tag on the stack. - if (in_array($tagname, $stack)) { - // Close other tags lingering first. - do { - $output .= '</'. $stack[0] .'>'; - } while (array_shift($stack) != $tagname); - } - // Otherwise, discard it. - } + // Passthrough comments. + if (substr($value, 0, 3) == '!--') { + $output .= '<'. $value .'>'; } - // Opening tag else { - // See if we have an identical 'no nesting' tag already open and close it if found. - if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) { - $output .= '</'. array_shift($stack) .'>'; - } - // Push non-single-use tags onto the stack - if (!isset($single_use[$tagname])) { - array_unshift($stack, $tagname); + list($tagname) = preg_split('/\s/', strtolower($value), 2); + // Closing tag + if ($tagname{0} == '/') { + $tagname = substr($tagname, 1); + // Discard XHTML closing tags for single use tags. + if (!isset($single_use[$tagname])) { + // See if we possibly have a matching opening tag on the stack. + if (in_array($tagname, $stack)) { + // Close other tags lingering first. + do { + $output .= '</'. $stack[0] .'>'; + } while (array_shift($stack) != $tagname); + } + // Otherwise, discard it. + } } - // Add trailing slash to single-use tags as per X(HT)ML. + // Opening tag else { - $value = rtrim($value, ' /') .' /'; + // See if we have an identical 'no nesting' tag already open and close it if found. + if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) { + $output .= '</'. array_shift($stack) .'>'; + } + // Push non-single-use tags onto the stack + if (!isset($single_use[$tagname])) { + array_unshift($stack, $tagname); + } + // Add trailing slash to single-use tags as per X(HT)ML. + else { + $value = rtrim($value, ' /') .' /'; + } + $output .= '<'. $value .'>'; } - $output .= '<'. $value .'>'; } } else { @@ -891,7 +897,7 @@ function _filter_autop($text) { // We don't apply any processing to the contents of these tags to avoid messing // up code. We look for matched pairs and allow basic nesting. For example: // "processed <pre> ignored <script> ignored </script> ignored </pre> processed" - $chunks = preg_split('@(</?(?:pre|script|style|object)[^>]*>)@i', $text, -1, PREG_SPLIT_DELIM_CAPTURE); + $chunks = preg_split('@(<(?:!--.*?--|/?(?:pre|script|style|object)[^>]*)>)@si', $text, -1, PREG_SPLIT_DELIM_CAPTURE); // Note: PHP ensures the array consists of alternating delimiters and literals // and begins and ends with a literal (inserting NULL as required). $ignore = FALSE; @@ -899,19 +905,25 @@ function _filter_autop($text) { $output = ''; foreach ($chunks as $i => $chunk) { if ($i % 2) { - // Opening or closing tag? - $open = ($chunk[1] != '/'); - list($tag) = split('[ >]', substr($chunk, 2 - $open), 2); - if (!$ignore) { - if ($open) { - $ignore = TRUE; - $ignoretag = $tag; - } + // Passthrough comments. + if (substr($chunk, 1, 3) == '!--') { + $output .= $chunk; } - // Only allow a matching tag to close it. - else if (!$open && $ignoretag == $tag) { - $ignore = FALSE; - $ignoretag = ''; + else { + // Opening or closing tag? + $open = ($chunk[1] != '/'); + list($tag) = split('[ >]', substr($chunk, 2 - $open), 2); + if (!$ignore) { + if ($open) { + $ignore = TRUE; + $ignoretag = $tag; + } + } + // Only allow a matching tag to close it. + else if (!$open && $ignoretag == $tag) { + $ignore = FALSE; + $ignoretag = ''; + } } } else if (!$ignore) { @@ -997,6 +1009,8 @@ function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', ( <(?=[^a-zA-Z!/]) # a lone < | # or + <!--.*?--> # a comment + | # or <[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string | # or > # just a > @@ -1035,7 +1049,7 @@ function _filter_xss_split($m, $store = FALSE) { return '<'; } - if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) { + if (!preg_match('%^(?:<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?|(<!--.*?-->))$%', $string, $matches)) { // Seriously malformed return ''; } @@ -1043,12 +1057,21 @@ function _filter_xss_split($m, $store = FALSE) { $slash = trim($matches[1]); $elem = &$matches[2]; $attrlist = &$matches[3]; + $comment = &$matches[4]; + + if ($comment) { + $elem = '!--'; + } if (!isset($allowed_html[strtolower($elem)])) { // Disallowed HTML element return ''; } + if ($comment) { + return $comment; + } + if ($slash != '') { return "</$elem>"; } -- GitLab