diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 1a68edc4f171ac7f00a6c21ad4dfdb76abd50fb4..91521595de88176c0d3214d715a7a66cf8ddffa9 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -52,6 +52,10 @@ Drupal x.x.x, xxxx-xx-xx (development version) - PostgreSQL support: * removed dependency on PL/pgSQL procedural language +Drupal 4.6.4, 2005-11-29 +------------------------ +- fixed bugs, including 3 security vulnerabilities. + Drupal 4.6.3, 2005-08-15 ------------------------ - fixed bugs, including a critical "arbitrary PHP code execution" bug. @@ -108,6 +112,10 @@ Drupal 4.6.0, 2005-04-15 - documentation: * improved and extended PHPDoc/Doxygen comments. +Drupal 4.5.6, 2005-11-29 +------------------------ +- fixed bugs, including 3 security vulnerabilities. + Drupal 4.5.5, 2005-08-15 ------------------------ - fixed bugs, including a critical "arbitrary PHP code execution" bug. diff --git a/includes/bootstrap.inc b/includes/bootstrap.inc index 6c87605ac91c68c08039091f7c94671a3ccb4ffb..9d3e44cd0c1ff50eaccebb0cb79f2e2b00bdb4dc 100644 --- a/includes/bootstrap.inc +++ b/includes/bootstrap.inc @@ -708,14 +708,12 @@ function arg($index) { } /** - * Prepare a URL for use in an HTML attribute. + * Prepare a URL for use in an HTML attribute. Strips harmful protocols. * - * We replace ( and ) with their url-encoded equivalents to prevent XSS attacks. */ function check_url($uri) { $uri = htmlspecialchars($uri, ENT_QUOTES); - - $uri = strtr($uri, array('(' => '%28', ')' => '%29')); + $uri = filter_xss_bad_protocol($uri, FALSE); return $uri; } diff --git a/includes/common.inc b/includes/common.inc index 84c955eed660d258e40f1a8db6b28d7f15f15652..08545ce0e7cdf11d3e8b1ee86bbbcdbed0d48cde 100644 --- a/includes/common.inc +++ b/includes/common.inc @@ -615,13 +615,6 @@ function t($string, $args = 0) { } } -/** - * Encode special characters in a plain-text string for display as HTML. - */ -function check_plain($text) { - return htmlspecialchars($text, ENT_QUOTES); -} - /** * @defgroup validation Input validation * @{ @@ -667,54 +660,6 @@ function valid_url($url, $absolute = FALSE) { } } -/** - * Validate data input by a user. - * - * Ensures that user data cannot be used to perform attacks on the site. - * - * @param $data - * The input to check. - * @return - * TRUE if the input data is acceptable. - */ -function valid_input_data($data) { - if (is_array($data) || is_object($data)) { - // Form data can contain a number of nested arrays. - foreach ($data as $key => $value) { - if (!valid_input_data($key) || !valid_input_data($value)) { - return FALSE; - } - } - } - else if (isset($data)) { - // Detect dangerous input data. - - // Decode all normal character entities. - $data = decode_entities($data, array('<', '&', '"')); - - // Check strings: - $match = preg_match('/\Wjavascript\s*:/i', $data); - $match += preg_match('/\Wexpression\s*\(/i', $data); - $match += preg_match('/\Walert\s*\(/i', $data); - - // Check attributes: - $match += preg_match("/\W(dynsrc|datasrc|data|lowsrc|on[a-z]+)\s*=[^>]+?>/i", $data); - - // Check tags: - $match += preg_match("/<\s*(applet|script|object|style|embed|form|blink|meta|html|frame|iframe|layer|ilayer|head|frameset|xml)/i", $data); - - if ($match) { - watchdog('security', t('Terminated request because of suspicious input data: %data.', array('%data' => theme('placeholder', $data)))); - return FALSE; - } - } - - return TRUE; -} -/** - * @} End of "defgroup validation". - */ - /** * Register an event for the current visitor (hostname/IP) to the flood control mechanism. * @@ -1366,17 +1311,7 @@ function _drupal_bootstrap_full() { } // Initialize all enabled modules. module_init(); - if (!user_access('bypass input data check')) { - // We can't use $_REQUEST because it consists of the contents of $_POST, - // $_GET and $_COOKIE: if any of the input arrays share a key, only one - // value will be verified. - if (!valid_input_data($_GET) - || !valid_input_data($_POST) - || !valid_input_data($_COOKIE) - || !valid_input_data($_FILES)) { - die('Terminated request because of suspicious input data.'); - } - } + // Undo magic quotes fix_gpc_magic(); // Initialize the localization system. $locale = locale_initialize(); diff --git a/includes/database.mysql.inc b/includes/database.mysql.inc index ed02fa886c65eb2837355f82ac266b1c12221a8d..3fce79940cc11962d22dc2511848d8a25c46177b 100644 --- a/includes/database.mysql.inc +++ b/includes/database.mysql.inc @@ -105,7 +105,7 @@ function _db_query($query, $debug = 0) { return $result; } else { - trigger_error(mysql_error() ."\nquery: ". htmlspecialchars($query), E_USER_ERROR); + trigger_error(check_plain(mysql_error() ."\nquery: ". $query), E_USER_ERROR); return FALSE; } } diff --git a/includes/database.mysqli.inc b/includes/database.mysqli.inc index f2583b214d92db4b1a981fee87b2942488b7e59d..3eaea5854637e519abe5d04be4227ede1406e765 100644 --- a/includes/database.mysqli.inc +++ b/includes/database.mysqli.inc @@ -113,7 +113,7 @@ function _db_query($query, $debug = 0) { return $result; } else { - trigger_error(mysqli_error($active_db) ."\nquery: ". htmlspecialchars($query), E_USER_ERROR); + trigger_error(check_plain(mysqli_error($active_db) ."\nquery: ". $query), E_USER_ERROR); return FALSE; } } diff --git a/includes/database.pgsql.inc b/includes/database.pgsql.inc index 415565a477b31afabee91c3f255f286c544c6013..3c2f8f710d151d4e2bf7f254b66761e1f5d5b9f5 100644 --- a/includes/database.pgsql.inc +++ b/includes/database.pgsql.inc @@ -92,7 +92,7 @@ function _db_query($query, $debug = 0) { return $last_result; } else { - trigger_error(pg_last_error() ."\nquery: ". htmlspecialchars($query), E_USER_ERROR); + trigger_error(check_plain(pg_last_error() ."\nquery: ". $query), E_USER_ERROR); return FALSE; } } diff --git a/includes/file.inc b/includes/file.inc index 41f9a78f158e9d0bc5ad7ae2871b4683da5134f9..55f7825d5be5f78507c7e8a37932c40c441be9c7 100644 --- a/includes/file.inc +++ b/includes/file.inc @@ -144,8 +144,24 @@ function file_check_upload($source) { elseif ($_FILES["edit"]["name"][$source] && is_uploaded_file($_FILES["edit"]["tmp_name"][$source])) { $file = new StdClass(); $file->filename = trim(basename($_FILES["edit"]["name"][$source]), '.'); - $file->filemime = $_FILES["edit"]["type"][$source]; $file->filepath = $_FILES["edit"]["tmp_name"][$source]; + + if (function_exists('mime_content_type')) { + $file->filemime = mime_content_type($file->filepath); + if ($file->filemime != $_FILES["edit"]["type"][$source]) { + watchdog('file', t('For %file the system thinks its MIME type is %detected while the user has given %given for MIME type', array('%file' => theme('placeholder', $file->filepath), '%detected' => theme('placeholder', $file>-filemime), '%given' => theme('placeholder', $_FILES['edit']['type'][$source])))); + } + } + else { + $file->filemime = $_FILES["edit"]["type"][$source]; + } + if (((substr($file->filemime, 0, 5) == 'text/' || strpos($file->filemime, 'javascript')) && (substr($file->filepath, -4) != '.txt')) || preg_match('/\.(php|pl|py|cgi|asp)$/i', $file->filename)) { + $file->filemime = 'text/plain'; + rename($file->filepath, $file->filepath .'.txt'); + $file->filepath .= '.txt'; + $file->filename .= '.txt'; + } + $file->error = $_FILES["edit"]["error"][$source]; $file->filesize = $_FILES["edit"]["size"][$source]; $file->source = $source; diff --git a/includes/unicode.inc b/includes/unicode.inc index 3508aad6485d724b6dab9af3cb0a8c9d106782a5..0c562a8c1bbaeeb2ece4654c68c5aca65bdff891 100644 --- a/includes/unicode.inc +++ b/includes/unicode.inc @@ -303,6 +303,10 @@ function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) { if ($prefix == '#x') { $codepoint = base_convert($codepoint, 16, 10); } + // Decimal numerical entity (strip leading zeros to avoid PHP octal notation) + else { + $codepoint = preg_replace('/^0+/', '', $codepoint); + } // Encode codepoint as UTF-8 bytes if ($codepoint < 0x80) { $str = chr($codepoint); diff --git a/modules/filter.module b/modules/filter.module index 41e6ad3990545fae2208321834ae03a5f1da66f3..2ee9f17437c05304124e25f4a99101d172643153 100644 --- a/modules/filter.module +++ b/modules/filter.module @@ -14,9 +14,6 @@ define('FILTER_HTML_STRIP', 1); define('FILTER_HTML_ESCAPE', 2); -define('FILTER_STYLE_ALLOW', 0); -define('FILTER_STYLE_STRIP', 1); - /** * Implementation of hook_help(). */ @@ -65,105 +62,100 @@ function filter_filter_tips($delta, $format, $long = false) { global $base_url; switch ($delta) { case 0: - switch (variable_get("filter_html_$format", FILTER_HTML_STRIP)) { - - case FILTER_HTML_STRIP: - if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) { - switch ($long) { - case 0: - return t('Allowed HTML tags') .': '. check_plain($allowed_html); - case 1: - $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>'; - if (!variable_get("filter_html_help_$format", 1)) { - return $output; - } + if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) { + if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) { + switch ($long) { + case 0: + return t('Allowed HTML tags') .': '. check_plain($allowed_html); + case 1: + $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>'; + if (!variable_get("filter_html_help_$format", 1)) { + return $output; + } - $output .= t(' + $output .= t(' <p>This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.</p> <p>For more information see W3C\'s <a href="http://www.w3.org/TR/html/">HTML Specifications</a> or use your favorite search engine to find other sites that explain HTML.</p>'); - $tips = array( - 'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'), - 'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')), - 'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'), - 'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'), - 'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'), - 'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'), - 'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'), - 'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'), - 'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'), - 'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'), - 'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')), - 'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')), - 'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'), - 'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'), - 'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'), - // Assumes and describes tr, td, th. - 'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'), - 'tr' => NULL, 'td' => NULL, 'th' => NULL, - 'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'), - 'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'), - // Assumes and describes li. - 'ol' => array( t('Ordered list - use the <li> to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'), - 'ul' => array( t('Unordered list - use the <li> to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'), - 'li' => NULL, - // Assumes and describes dt and dd. - 'dl' => array( t('Definition lists are similar to other HTML lists. <dl> begins the definition list, <dt> begins the definition term and <dd> begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'), - 'dt' => NULL, 'dd' => NULL, - 'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'), - 'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'), - 'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'), - 'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'), - 'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'), - 'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>') - ); - $header = array(t('Tag Description'), t('You Type'), t('You Get')); - preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out); - foreach ($out[1] as $tag) { - if (array_key_exists($tag, $tips)) { - if ($tips[$tag]) { - $rows[] = array( - array('data' => $tips[$tag][0], 'class' => 'description'), - array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'), - array('data' => $tips[$tag][1], 'class' => 'get') - ); - } - } - else { + $tips = array( + 'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'), + 'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')), + 'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'), + 'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'), + 'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'), + 'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'), + 'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'), + 'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'), + 'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'), + 'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'), + 'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')), + 'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')), + 'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'), + 'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'), + 'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'), + // Assumes and describes tr, td, th. + 'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'), + 'tr' => NULL, 'td' => NULL, 'th' => NULL, + 'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'), + 'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'), + // Assumes and describes li. + 'ol' => array( t('Ordered list - use the <li> to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'), + 'ul' => array( t('Unordered list - use the <li> to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'), + 'li' => NULL, + // Assumes and describes dt and dd. + 'dl' => array( t('Definition lists are similar to other HTML lists. <dl> begins the definition list, <dt> begins the definition term and <dd> begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'), + 'dt' => NULL, 'dd' => NULL, + 'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'), + 'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'), + 'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'), + 'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'), + 'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'), + 'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>') + ); + $header = array(t('Tag Description'), t('You Type'), t('You Get')); + preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out); + foreach ($out[1] as $tag) { + if (array_key_exists($tag, $tips)) { + if ($tips[$tag]) { $rows[] = array( - array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3), + array('data' => $tips[$tag][0], 'class' => 'description'), + array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'), + array('data' => $tips[$tag][1], 'class' => 'get') ); } } - $output .= theme('table', $header, $rows); + else { + $rows[] = array( + array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3), + ); + } + } + $output .= theme('table', $header, $rows); - $output .= t(' + $output .= t(' <p>Most unusual characters can be directly entered without any problems.</p> <p>If you do encounter problems, try using HTML character entities. A common example looks like &amp; for an ampersand & character. For a full list of entities see HTML\'s <a href="http://www.w3.org/TR/html4/sgml/entities.html">entities</a> page. Some of the available characters include:</p>'); - $entities = array( - array( t('Ampersand'), '&'), - array( t('Greater than'), '>'), - array( t('Less than'), '<'), - array( t('Quotation mark'), '"'), + $entities = array( + array( t('Ampersand'), '&'), + array( t('Greater than'), '>'), + array( t('Less than'), '<'), + array( t('Quotation mark'), '"'), + ); + $header = array(t('Character Description'), t('You Type'), t('You Get')); + unset($rows); + foreach ($entities as $entity) { + $rows[] = array( + array('data' => $entity[0], 'class' => 'description'), + array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'), + array('data' => $entity[1], 'class' => 'get') ); - $header = array(t('Character Description'), t('You Type'), t('You Get')); - unset($rows); - foreach ($entities as $entity) { - $rows[] = array( - array('data' => $entity[0], 'class' => 'description'), - array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'), - array('data' => $entity[1], 'class' => 'get') - ); - } - $output .= theme('table', $header, $rows); - return $output; - } - } - else { - return t('No HTML tags allowed'); + } + $output .= theme('table', $header, $rows); + return $output; } - - case FILTER_STYLE_STRIP: + } + else { return t('No HTML tags allowed'); + } } break; @@ -975,10 +967,9 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') { */ function _filter_html_settings($format) { $form['filter_html'] = array('#type' => 'fieldset', '#title' => t('HTML filter'), '#collapsible' => TRUE, '#collapsed' => TRUE); - $form['filter_html']["filter_html_$format"] = array('#type' => 'radios', '#title' => t('Filter HTML tags'), '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP), '#options' => array(FILTER_HTML_STRIP => t('Strip tags'), FILTER_HTML_ESCAPE => t('Escape tags')), '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.')); + $form['filter_html']["filter_html_$format"] = array('#type' => 'radios', '#title' => t('Filter HTML tags'), '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP), '#options' => array(FILTER_HTML_STRIP => t('Strip disallowed tags'), FILTER_HTML_ESCAPE => t('Escape all tags')), '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip disallowed tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.')); $form['filter_html']["allowed_html_$format"] = array('#type' => 'textfield', '#title' => t('Allowed HTML tags'), '#default_value' => variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), '#size' => 64, '#maxlength' => 255, '#description' => t('If "Strip tags" is selected, optionally specify tags which should not be stripped. Javascript event attributes are always stripped.')); $form['filter_html']["filter_html_help_$format"] = array('#type' => 'checkbox', '#title' => t('Display HTML help'), '#default_value' => variable_get("filter_html_help_$format", 1), '#description' => t('If enabled, Drupal will display some basic HTML help in the long filter tips.')); - $form['filter_html']["filter_style_$format"] = array('#type' => 'radios', '#title' => t('HTML style attributes'), '#default_value' => variable_get("filter_style_$format", FILTER_STYLE_STRIP), '#options' => array(FILTER_STYLE_ALLOW => t('Allowed'), FILTER_STYLE_STRIP => t('Removed')), '#description' => t('If "Strip tags" is selected, you can choose whether "STYLE" attributes are allowed or removed from input.')); $form['filter_html']["filter_html_nofollow_$format"] = array('#type' => 'checkbox', '#title' => t('Spam link deterrent'), '#default_value' => variable_get("filter_html_nofollow_$format", FALSE), '#description' => t('If enabled, Drupal will add rel="nofollow" to all links, as a measure to reduce the effectiveness of spam links. Note: this will also prevent valid links from being followed by search engines, therefore it is likely most effective when enabled for anonymous users.')); return $form; @@ -989,12 +980,7 @@ function _filter_html_settings($format) { */ function _filter_html($text, $format) { if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) { - // Allow users to enter HTML, but filter it - $text = strip_tags($text, variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')); - if (variable_get("filter_style_$format", FILTER_STYLE_STRIP)) { - $text = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $text); - } - $text = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $text); + $text = filter_xss($text, $format); } if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_ESCAPE) { @@ -1064,6 +1050,245 @@ function _filter_autop($text) { return $output; } +/** + * Filters XSS. Based on kses by Ulf Harnhammar, see + * http://sourceforge.net/projects/kses + * + * For examples of various XSS attacks, see: + * http://ha.ckers.org/xss.html + * + * This code does four things: + * - Removes characters and constructs that can trick browsers + * - Makes sure all HTML entities are well-formed + * - Makes sure all HTML tags and attributes are well-formed + * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:) + * + * @param $string + * The string with raw HTML in it. It will be stripped of everything that can cause + * an XSS attack. + * @param $format + * The format to use. + */ +function filter_xss($string, $format) { + // Store the input format + _filter_xss_split($format); + // Remove NUL characters (ignored by some browsers) + $string = str_replace(chr(0), '', $string); + // Remove Netscape 4 JS entities + $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); + + // Defuse all HTML entities + $string = str_replace('&', '&', $string); + // Change back only well-formed entities in our whitelist + // Named entities + $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string); + // Decimal numeric entities + $string = preg_replace('/&#([0-9]+;)/', '&#\1', $string); + // Hexadecimal numeric entities + $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string); + + return preg_replace_callback('% + ( + <[^>]*.(>|$) # a string that starts with a <, up until the > or the end of the string + | # or + > # just a > + )%x + ', '_filter_xss_split', $string); +} + +/** + * Processes an HTML tag. + * + * @param + * On first call, a format identifier. On subsequent calls, an array where the + * first element is the HTML tag to process. + * @return + * If the element isn't allowed, an empty string. Otherwise, the cleaned up version + * of the HTML element. + */ +function _filter_xss_split($m) { + static $allowed_html; + + if (!isset($allowed_html)) { + $allowed_html = array_flip(preg_split('/\s+|<|>/', variable_get("allowed_html_$m", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), -1, PREG_SPLIT_NO_EMPTY)); + return; + } + + $string = &$m[1]; + + if (substr($string, 0, 1) != '<') { + // We matched a lone ">" character + return '>'; + } + + if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) { + // Seriously malformed + return ''; + } + + $slash = trim($matches[1]); + $elem = &$matches[2]; + $attrlist = &$matches[3]; + + if (!isset($allowed_html[strtolower($elem)])) { + // Disallowed HTML element + return ''; + } + + if ($slash != '') { + return "</$elem>"; + } + // Is there a closing XHTML slash at the end of the attributes? + $xhtml_slash = preg_match('%\s/\s*$%', $attr) ? '/' : ''; + + // Clean up attributes + $attr2 = implode(' ', _filter_xss_attributes($attrlist)); + $attr2 = preg_replace('/[<>]/', '', $attr2); + + return "<$elem $attr2$xhtml_slash>"; +} + +/** + * Processes a string of HTML attributes. + * + * @return + * Cleaned up version of the HTML attributes. + */ +function _filter_xss_attributes($attr) { + $attrarr = array(); + $mode = 0; + $attrname = ''; + + while (strlen($attr) != 0) { + // Was the last operation successful? + $working = 0; + + switch ($mode) { + case 0: + // Attribute name, href for instance + if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) { + $attrname = strtolower($match[1]); + $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on'); + $working = $mode = 1; + $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr); + } + + break; + + case 1: + // Equals sign or valueless ("selected") + if (preg_match('/^\s*=\s*/', $attr)) { + $working = 1; $mode = 2; + $attr = preg_replace('/^\s*=\s*/', '', $attr); + break; + } + + if (preg_match('/^\s+/', $attr)) { + $working = 1; $mode = 0; + if (!$skip) { + $attrarr[] = $attrname; + } + $attr = preg_replace('/^\s+/', '', $attr); + } + + break; + + case 2: + // Attribute value, a URL after href= for instance + if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) { + $thisval = filter_xss_bad_protocol($match[1]); + + if (!$skip) { + $attrarr[] = "$attrname=\"$thisval\""; + } + $working = 1; + $mode = 0; + $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr); + break; + } + + if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) { + $thisval = filter_xss_bad_protocol($match[1]); + + if (!$skip) { + $attrarr[] = "$attrname='$thisval'";; + } + $working = 1; $mode = 0; + $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr); + break; + } + + if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) { + $thisval = filter_xss_bad_protocol($match[1]); + + if (!$skip) { + $attrarr[] = "$attrname=\"$thisval\""; + } + $working = 1; $mode = 0; + $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr); + } + + break; + } + + if ($working == 0) { + // not well formed, remove and try again + $attr = preg_replace('/ + ^ + ( + "[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string + | # or + \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string + | # or + \S # - a non-whitespace character + )* # any number of the above three + \s* # any number of whitespaces + /x', '', $attr); + $mode = 0; + } + } + + // the attribute list ends with a valueless attribute like "selected" + if ($mode == 1) { + $attrarr[] = $attrname; + } + return $attrarr; +} + +/** + * Processes an HTML attribute value and ensures it does not contain an URL + * with a disallowed protocol (e.g. javascript:) + * + * @param $string + * The string with the attribute value. + * @param $decode + * Whether to decode entities in the $string. Set to FALSE if the $string + * is in plain text, TRUE otherwise. Defaults to TRUE. + * @return + * Cleaned up and HTML-escaped version of $string. + */ +function filter_xss_bad_protocol($string, $decode = TRUE) { + // Get the plain text representation of the attribute value (i.e. its meaning) + if ($decode) { + $string = decode_entities($string); + } + // Remove soft hyphen + $string = str_replace(chr(194) . chr(173), '', $string); + $string2 = ''; + // Strip protocols + do { + $before = $string; + $string = preg_replace_callback('/^([^:]+):/', '_filter_xss_bad_protocol', $string); + } while ($before != $string); + return check_plain($string); +} + +function _filter_xss_bad_protocol($m) { + static $allowed_protocols = array('http' => TRUE, 'https' => TRUE, 'ftp' => TRUE, 'news' => TRUE, 'nntp' => TRUE, 'telnet' => TRUE, 'gopher' => TRUE, 'mailto' => TRUE, 'e2dk' => TRUE, 'smb' => TRUE, 'irc' => TRUE, 'rsync' => TRUE, 'ssh' => TRUE, 'sftp' => TRUE); + $string = preg_replace('/\s+/', '', $m[1]); + return isset($allowed_protocols[$string]) ? "$string:" : ''; +} + /** * @} End of "Standard filters". */ diff --git a/modules/filter/filter.module b/modules/filter/filter.module index 41e6ad3990545fae2208321834ae03a5f1da66f3..2ee9f17437c05304124e25f4a99101d172643153 100644 --- a/modules/filter/filter.module +++ b/modules/filter/filter.module @@ -14,9 +14,6 @@ define('FILTER_HTML_STRIP', 1); define('FILTER_HTML_ESCAPE', 2); -define('FILTER_STYLE_ALLOW', 0); -define('FILTER_STYLE_STRIP', 1); - /** * Implementation of hook_help(). */ @@ -65,105 +62,100 @@ function filter_filter_tips($delta, $format, $long = false) { global $base_url; switch ($delta) { case 0: - switch (variable_get("filter_html_$format", FILTER_HTML_STRIP)) { - - case FILTER_HTML_STRIP: - if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) { - switch ($long) { - case 0: - return t('Allowed HTML tags') .': '. check_plain($allowed_html); - case 1: - $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>'; - if (!variable_get("filter_html_help_$format", 1)) { - return $output; - } + if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) { + if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) { + switch ($long) { + case 0: + return t('Allowed HTML tags') .': '. check_plain($allowed_html); + case 1: + $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>'; + if (!variable_get("filter_html_help_$format", 1)) { + return $output; + } - $output .= t(' + $output .= t(' <p>This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.</p> <p>For more information see W3C\'s <a href="http://www.w3.org/TR/html/">HTML Specifications</a> or use your favorite search engine to find other sites that explain HTML.</p>'); - $tips = array( - 'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'), - 'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')), - 'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'), - 'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'), - 'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'), - 'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'), - 'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'), - 'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'), - 'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'), - 'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'), - 'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')), - 'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')), - 'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'), - 'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'), - 'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'), - // Assumes and describes tr, td, th. - 'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'), - 'tr' => NULL, 'td' => NULL, 'th' => NULL, - 'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'), - 'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'), - // Assumes and describes li. - 'ol' => array( t('Ordered list - use the <li> to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'), - 'ul' => array( t('Unordered list - use the <li> to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'), - 'li' => NULL, - // Assumes and describes dt and dd. - 'dl' => array( t('Definition lists are similar to other HTML lists. <dl> begins the definition list, <dt> begins the definition term and <dd> begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'), - 'dt' => NULL, 'dd' => NULL, - 'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'), - 'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'), - 'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'), - 'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'), - 'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'), - 'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>') - ); - $header = array(t('Tag Description'), t('You Type'), t('You Get')); - preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out); - foreach ($out[1] as $tag) { - if (array_key_exists($tag, $tips)) { - if ($tips[$tag]) { - $rows[] = array( - array('data' => $tips[$tag][0], 'class' => 'description'), - array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'), - array('data' => $tips[$tag][1], 'class' => 'get') - ); - } - } - else { + $tips = array( + 'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'), + 'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')), + 'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'), + 'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'), + 'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'), + 'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'), + 'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'), + 'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'), + 'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'), + 'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'), + 'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')), + 'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')), + 'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'), + 'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'), + 'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'), + // Assumes and describes tr, td, th. + 'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'), + 'tr' => NULL, 'td' => NULL, 'th' => NULL, + 'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'), + 'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'), + // Assumes and describes li. + 'ol' => array( t('Ordered list - use the <li> to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'), + 'ul' => array( t('Unordered list - use the <li> to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'), + 'li' => NULL, + // Assumes and describes dt and dd. + 'dl' => array( t('Definition lists are similar to other HTML lists. <dl> begins the definition list, <dt> begins the definition term and <dd> begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'), + 'dt' => NULL, 'dd' => NULL, + 'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'), + 'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'), + 'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'), + 'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'), + 'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'), + 'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>') + ); + $header = array(t('Tag Description'), t('You Type'), t('You Get')); + preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out); + foreach ($out[1] as $tag) { + if (array_key_exists($tag, $tips)) { + if ($tips[$tag]) { $rows[] = array( - array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3), + array('data' => $tips[$tag][0], 'class' => 'description'), + array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'), + array('data' => $tips[$tag][1], 'class' => 'get') ); } } - $output .= theme('table', $header, $rows); + else { + $rows[] = array( + array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3), + ); + } + } + $output .= theme('table', $header, $rows); - $output .= t(' + $output .= t(' <p>Most unusual characters can be directly entered without any problems.</p> <p>If you do encounter problems, try using HTML character entities. A common example looks like &amp; for an ampersand & character. For a full list of entities see HTML\'s <a href="http://www.w3.org/TR/html4/sgml/entities.html">entities</a> page. Some of the available characters include:</p>'); - $entities = array( - array( t('Ampersand'), '&'), - array( t('Greater than'), '>'), - array( t('Less than'), '<'), - array( t('Quotation mark'), '"'), + $entities = array( + array( t('Ampersand'), '&'), + array( t('Greater than'), '>'), + array( t('Less than'), '<'), + array( t('Quotation mark'), '"'), + ); + $header = array(t('Character Description'), t('You Type'), t('You Get')); + unset($rows); + foreach ($entities as $entity) { + $rows[] = array( + array('data' => $entity[0], 'class' => 'description'), + array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'), + array('data' => $entity[1], 'class' => 'get') ); - $header = array(t('Character Description'), t('You Type'), t('You Get')); - unset($rows); - foreach ($entities as $entity) { - $rows[] = array( - array('data' => $entity[0], 'class' => 'description'), - array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'), - array('data' => $entity[1], 'class' => 'get') - ); - } - $output .= theme('table', $header, $rows); - return $output; - } - } - else { - return t('No HTML tags allowed'); + } + $output .= theme('table', $header, $rows); + return $output; } - - case FILTER_STYLE_STRIP: + } + else { return t('No HTML tags allowed'); + } } break; @@ -975,10 +967,9 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') { */ function _filter_html_settings($format) { $form['filter_html'] = array('#type' => 'fieldset', '#title' => t('HTML filter'), '#collapsible' => TRUE, '#collapsed' => TRUE); - $form['filter_html']["filter_html_$format"] = array('#type' => 'radios', '#title' => t('Filter HTML tags'), '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP), '#options' => array(FILTER_HTML_STRIP => t('Strip tags'), FILTER_HTML_ESCAPE => t('Escape tags')), '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.')); + $form['filter_html']["filter_html_$format"] = array('#type' => 'radios', '#title' => t('Filter HTML tags'), '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP), '#options' => array(FILTER_HTML_STRIP => t('Strip disallowed tags'), FILTER_HTML_ESCAPE => t('Escape all tags')), '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip disallowed tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.')); $form['filter_html']["allowed_html_$format"] = array('#type' => 'textfield', '#title' => t('Allowed HTML tags'), '#default_value' => variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), '#size' => 64, '#maxlength' => 255, '#description' => t('If "Strip tags" is selected, optionally specify tags which should not be stripped. Javascript event attributes are always stripped.')); $form['filter_html']["filter_html_help_$format"] = array('#type' => 'checkbox', '#title' => t('Display HTML help'), '#default_value' => variable_get("filter_html_help_$format", 1), '#description' => t('If enabled, Drupal will display some basic HTML help in the long filter tips.')); - $form['filter_html']["filter_style_$format"] = array('#type' => 'radios', '#title' => t('HTML style attributes'), '#default_value' => variable_get("filter_style_$format", FILTER_STYLE_STRIP), '#options' => array(FILTER_STYLE_ALLOW => t('Allowed'), FILTER_STYLE_STRIP => t('Removed')), '#description' => t('If "Strip tags" is selected, you can choose whether "STYLE" attributes are allowed or removed from input.')); $form['filter_html']["filter_html_nofollow_$format"] = array('#type' => 'checkbox', '#title' => t('Spam link deterrent'), '#default_value' => variable_get("filter_html_nofollow_$format", FALSE), '#description' => t('If enabled, Drupal will add rel="nofollow" to all links, as a measure to reduce the effectiveness of spam links. Note: this will also prevent valid links from being followed by search engines, therefore it is likely most effective when enabled for anonymous users.')); return $form; @@ -989,12 +980,7 @@ function _filter_html_settings($format) { */ function _filter_html($text, $format) { if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) { - // Allow users to enter HTML, but filter it - $text = strip_tags($text, variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')); - if (variable_get("filter_style_$format", FILTER_STYLE_STRIP)) { - $text = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $text); - } - $text = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $text); + $text = filter_xss($text, $format); } if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_ESCAPE) { @@ -1064,6 +1050,245 @@ function _filter_autop($text) { return $output; } +/** + * Filters XSS. Based on kses by Ulf Harnhammar, see + * http://sourceforge.net/projects/kses + * + * For examples of various XSS attacks, see: + * http://ha.ckers.org/xss.html + * + * This code does four things: + * - Removes characters and constructs that can trick browsers + * - Makes sure all HTML entities are well-formed + * - Makes sure all HTML tags and attributes are well-formed + * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:) + * + * @param $string + * The string with raw HTML in it. It will be stripped of everything that can cause + * an XSS attack. + * @param $format + * The format to use. + */ +function filter_xss($string, $format) { + // Store the input format + _filter_xss_split($format); + // Remove NUL characters (ignored by some browsers) + $string = str_replace(chr(0), '', $string); + // Remove Netscape 4 JS entities + $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); + + // Defuse all HTML entities + $string = str_replace('&', '&', $string); + // Change back only well-formed entities in our whitelist + // Named entities + $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string); + // Decimal numeric entities + $string = preg_replace('/&#([0-9]+;)/', '&#\1', $string); + // Hexadecimal numeric entities + $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string); + + return preg_replace_callback('% + ( + <[^>]*.(>|$) # a string that starts with a <, up until the > or the end of the string + | # or + > # just a > + )%x + ', '_filter_xss_split', $string); +} + +/** + * Processes an HTML tag. + * + * @param + * On first call, a format identifier. On subsequent calls, an array where the + * first element is the HTML tag to process. + * @return + * If the element isn't allowed, an empty string. Otherwise, the cleaned up version + * of the HTML element. + */ +function _filter_xss_split($m) { + static $allowed_html; + + if (!isset($allowed_html)) { + $allowed_html = array_flip(preg_split('/\s+|<|>/', variable_get("allowed_html_$m", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), -1, PREG_SPLIT_NO_EMPTY)); + return; + } + + $string = &$m[1]; + + if (substr($string, 0, 1) != '<') { + // We matched a lone ">" character + return '>'; + } + + if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) { + // Seriously malformed + return ''; + } + + $slash = trim($matches[1]); + $elem = &$matches[2]; + $attrlist = &$matches[3]; + + if (!isset($allowed_html[strtolower($elem)])) { + // Disallowed HTML element + return ''; + } + + if ($slash != '') { + return "</$elem>"; + } + // Is there a closing XHTML slash at the end of the attributes? + $xhtml_slash = preg_match('%\s/\s*$%', $attr) ? '/' : ''; + + // Clean up attributes + $attr2 = implode(' ', _filter_xss_attributes($attrlist)); + $attr2 = preg_replace('/[<>]/', '', $attr2); + + return "<$elem $attr2$xhtml_slash>"; +} + +/** + * Processes a string of HTML attributes. + * + * @return + * Cleaned up version of the HTML attributes. + */ +function _filter_xss_attributes($attr) { + $attrarr = array(); + $mode = 0; + $attrname = ''; + + while (strlen($attr) != 0) { + // Was the last operation successful? + $working = 0; + + switch ($mode) { + case 0: + // Attribute name, href for instance + if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) { + $attrname = strtolower($match[1]); + $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on'); + $working = $mode = 1; + $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr); + } + + break; + + case 1: + // Equals sign or valueless ("selected") + if (preg_match('/^\s*=\s*/', $attr)) { + $working = 1; $mode = 2; + $attr = preg_replace('/^\s*=\s*/', '', $attr); + break; + } + + if (preg_match('/^\s+/', $attr)) { + $working = 1; $mode = 0; + if (!$skip) { + $attrarr[] = $attrname; + } + $attr = preg_replace('/^\s+/', '', $attr); + } + + break; + + case 2: + // Attribute value, a URL after href= for instance + if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) { + $thisval = filter_xss_bad_protocol($match[1]); + + if (!$skip) { + $attrarr[] = "$attrname=\"$thisval\""; + } + $working = 1; + $mode = 0; + $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr); + break; + } + + if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) { + $thisval = filter_xss_bad_protocol($match[1]); + + if (!$skip) { + $attrarr[] = "$attrname='$thisval'";; + } + $working = 1; $mode = 0; + $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr); + break; + } + + if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) { + $thisval = filter_xss_bad_protocol($match[1]); + + if (!$skip) { + $attrarr[] = "$attrname=\"$thisval\""; + } + $working = 1; $mode = 0; + $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr); + } + + break; + } + + if ($working == 0) { + // not well formed, remove and try again + $attr = preg_replace('/ + ^ + ( + "[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string + | # or + \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string + | # or + \S # - a non-whitespace character + )* # any number of the above three + \s* # any number of whitespaces + /x', '', $attr); + $mode = 0; + } + } + + // the attribute list ends with a valueless attribute like "selected" + if ($mode == 1) { + $attrarr[] = $attrname; + } + return $attrarr; +} + +/** + * Processes an HTML attribute value and ensures it does not contain an URL + * with a disallowed protocol (e.g. javascript:) + * + * @param $string + * The string with the attribute value. + * @param $decode + * Whether to decode entities in the $string. Set to FALSE if the $string + * is in plain text, TRUE otherwise. Defaults to TRUE. + * @return + * Cleaned up and HTML-escaped version of $string. + */ +function filter_xss_bad_protocol($string, $decode = TRUE) { + // Get the plain text representation of the attribute value (i.e. its meaning) + if ($decode) { + $string = decode_entities($string); + } + // Remove soft hyphen + $string = str_replace(chr(194) . chr(173), '', $string); + $string2 = ''; + // Strip protocols + do { + $before = $string; + $string = preg_replace_callback('/^([^:]+):/', '_filter_xss_bad_protocol', $string); + } while ($before != $string); + return check_plain($string); +} + +function _filter_xss_bad_protocol($m) { + static $allowed_protocols = array('http' => TRUE, 'https' => TRUE, 'ftp' => TRUE, 'news' => TRUE, 'nntp' => TRUE, 'telnet' => TRUE, 'gopher' => TRUE, 'mailto' => TRUE, 'e2dk' => TRUE, 'smb' => TRUE, 'irc' => TRUE, 'rsync' => TRUE, 'ssh' => TRUE, 'sftp' => TRUE); + $string = preg_replace('/\s+/', '', $m[1]); + return isset($allowed_protocols[$string]) ? "$string:" : ''; +} + /** * @} End of "Standard filters". */ diff --git a/modules/search.module b/modules/search.module index 6cba60fe4dc3a8a965dc327f326bd5d54441092c..74535839eba1e0f40f7a6269113c7cfb8e782760 100644 --- a/modules/search.module +++ b/modules/search.module @@ -332,7 +332,7 @@ function search_simplify($text) { // Simple CJK handling if (variable_get('overlap_cjk', true)) { - $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text); + $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text); } // To improve searching for numerical data such as dates, IP addresses diff --git a/modules/search/search.module b/modules/search/search.module index 6cba60fe4dc3a8a965dc327f326bd5d54441092c..74535839eba1e0f40f7a6269113c7cfb8e782760 100644 --- a/modules/search/search.module +++ b/modules/search/search.module @@ -332,7 +332,7 @@ function search_simplify($text) { // Simple CJK handling if (variable_get('overlap_cjk', true)) { - $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text); + $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text); } // To improve searching for numerical data such as dates, IP addresses diff --git a/modules/system.module b/modules/system.module index d31d4dd622d30a24255b210d8c0816b26c12cc65..248ae68a42a92fffb4776216be7e43b4d263973d 100644 --- a/modules/system.module +++ b/modules/system.module @@ -47,7 +47,7 @@ function system_help($section) { * Implementation of hook_perm(). */ function system_perm() { - return array('administer site configuration', 'access administration pages', 'bypass input data check'); + return array('administer site configuration', 'access administration pages'); } /** diff --git a/modules/system/system.module b/modules/system/system.module index d31d4dd622d30a24255b210d8c0816b26c12cc65..248ae68a42a92fffb4776216be7e43b4d263973d 100644 --- a/modules/system/system.module +++ b/modules/system/system.module @@ -47,7 +47,7 @@ function system_help($section) { * Implementation of hook_perm(). */ function system_perm() { - return array('administer site configuration', 'access administration pages', 'bypass input data check'); + return array('administer site configuration', 'access administration pages'); } /** diff --git a/modules/upload.module b/modules/upload.module index 195e9d67736df8fbaedb7752ff49f5ad7e5edd6a..79254ba83609d952376efa40373d6db2857018b7 100644 --- a/modules/upload.module +++ b/modules/upload.module @@ -208,14 +208,6 @@ function upload_nodeapi(&$node, $op, $arg) { } } - // Rename possibly executable scripts to prevent accidental execution. - // Uploaded files are attachments and should be shown in their original - // form, rather than run. - if (preg_match('/\.(php|pl|py|cgi|asp)$/i', $file->filename)) { - $file->filename .= '.txt'; - $file->filemime = 'text/plain'; - } - if ($error['extension'] == count($user->roles) && $user->uid != 1) { form_set_error('upload', t('The selected file %name can not be attached to this post, because it is only possible to attach files with the following extensions: %files-allowed.', array('%name' => theme('placeholder', $file->filename), '%files-allowed' => theme('placeholder', $extensions)))); } diff --git a/modules/upload/upload.module b/modules/upload/upload.module index 195e9d67736df8fbaedb7752ff49f5ad7e5edd6a..79254ba83609d952376efa40373d6db2857018b7 100644 --- a/modules/upload/upload.module +++ b/modules/upload/upload.module @@ -208,14 +208,6 @@ function upload_nodeapi(&$node, $op, $arg) { } } - // Rename possibly executable scripts to prevent accidental execution. - // Uploaded files are attachments and should be shown in their original - // form, rather than run. - if (preg_match('/\.(php|pl|py|cgi|asp)$/i', $file->filename)) { - $file->filename .= '.txt'; - $file->filemime = 'text/plain'; - } - if ($error['extension'] == count($user->roles) && $user->uid != 1) { form_set_error('upload', t('The selected file %name can not be attached to this post, because it is only possible to attach files with the following extensions: %files-allowed.', array('%name' => theme('placeholder', $file->filename), '%files-allowed' => theme('placeholder', $extensions)))); }