From 667bf95308cfbaac5929bee70e9ba98fa92ed3db Mon Sep 17 00:00:00 2001 From: Dries Buytaert <dries@buytaert.net> Date: Sun, 20 May 2007 16:44:35 +0000 Subject: [PATCH] - Patch #54833 by Steven: added an HTML corrector. --- CHANGELOG.txt | 3 ++ modules/filter/filter.module | 80 ++++++++++++++++++++++++++++++++++- modules/system/system.install | 28 ++++++++++++ 3 files changed, 110 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index dd927d66e557..960321085239 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -40,6 +40,9 @@ Drupal 6.0, xxxx-xx-xx (development version) * Added form to provide initial site information during installation. * Added ability to provide extra installation steps programmatically. * Made it possible to import interface translations at install time. +- Added the HTML corrector filter: + * Fixes faulty and chopped off HTML in postings. + * Tags are now automatically closed at the end of the teaser. Drupal 5.0, 2007-01-15 ---------------------- diff --git a/modules/filter/filter.module b/modules/filter/filter.module index d891ab2a6674..bfc7d1c494b0 100644 --- a/modules/filter/filter.module +++ b/modules/filter/filter.module @@ -955,7 +955,7 @@ function theme_filter_tips_more_info() { function filter_filter($op, $delta = 0, $format = -1, $text = '') { switch ($op) { case 'list': - return array(0 => t('HTML filter'), 1 => t('Line break converter'), 2 => t('URL filter')); + return array(0 => t('HTML filter'), 1 => t('Line break converter'), 2 => t('URL filter'), 3 => t('HTML corrector')); case 'description': switch ($delta) { @@ -965,6 +965,8 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') { return t('Converts line breaks into HTML (i.e. <br> and <p> tags).'); case 2: return t('Turns web and e-mail addresses into clickable links.'); + case 3: + return t('Corrects faulty and chopped off HTML in postings.'); default: return; } @@ -977,6 +979,8 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') { return _filter_autop($text); case 2: return _filter_url($text, $format); + case 3: + return _filter_htmlcorrector($text); default: return $text; } @@ -1098,6 +1102,80 @@ function _filter_url($text, $format) { return $text; } +/** + * Scan input and make sure that all HTML tags are properly closed and nested. + */ +function _filter_htmlcorrector($text) { + // Prepare tag lists. + static $no_nesting, $single_use; + if (!isset($no_nesting)) { + // Tags which cannot be nested but are typically left unclosed. + $no_nesting = drupal_map_assoc(array('li', 'p')); + + // Single use tags in HTML4 + $single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame')); + } + + // Properly entify angles. + $text = preg_replace('!<([^a-zA-Z/])!', '<\1', $text); + + // Split tags from text. + $split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE); + // Note: PHP ensures the array consists of alternating delimiters and literals + // and begins and ends with a literal (inserting $null as required). + + $tag = false; // Odd/even counter. Tag or no tag. + $stack = array(); + $output = ''; + foreach ($split as $value) { + // Process HTML tags. + if ($tag) { + list($tagname) = explode(' ', strtolower($value), 2); + // Closing tag + if ($tagname{0} == '/') { + $tagname = substr($tagname, 1); + // Discard XHTML closing tags for single use tags. + if (!isset($single_use[$tagname])) { + // See if we possibly have a matching opening tag on the stack. + if (in_array($tagname, $stack)) { + // Close other tags lingering first. + do { + $output .= '</'. $stack[0] .'>'; + } while (array_shift($stack) != $tagname); + } + // Otherwise, discard it. + } + } + // Opening tag + else { + // See if we have an identical 'no nesting' tag already open and close it if found. + if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) { + $output .= '</'. array_shift($stack) .'>'; + } + // Push non-single-use tags onto the stack + if (!isset($single_use[$tagname])) { + array_unshift($stack, $tagname); + } + // Add trailing slash to single-use tags as per X(HT)ML. + else { + $value = rtrim($value, ' /') . ' /'; + } + $output .= '<'. $value .'>'; + } + } + else { + // Passthrough all text. + $output .= $value; + } + $tag = !$tag; + } + // Close remaining tags. + while (count($stack) > 0) { + $output .= '</'. array_shift($stack) .'>'; + } + return $output; +} + /** * Make links out of absolute URLs. */ diff --git a/modules/system/system.install b/modules/system/system.install index 7bc70779f12d..30d4a62d3018 100644 --- a/modules/system/system.install +++ b/modules/system/system.install @@ -1209,12 +1209,16 @@ function system_install() { db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 0, 1)"); // Line break filter. db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 1, 2)"); + // HTML corrector filter. + db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 3, 10)"); // Full HTML: // URL filter. db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (2, 'filter', 2, 0)"); // Line break filter. db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (2, 'filter', 1, 1)"); + // HTML corrector filter. + db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 3, 10)"); db_query("INSERT INTO {variable} (name,value) VALUES ('filter_html_1','i:1;')"); @@ -4062,6 +4066,30 @@ function system_update_6017() { return $ret; } +/** + * Add HTML corrector to HTML formats or replace the old module if it was in use. + */ +function system_update_6018() { + $ret = array(); + + // Disable htmlcorrector.module, if it exists and replace its filter. + if (module_exists('htmlcorrector')) { + module_disable(array('htmlcorrector')); + $ret[] = update_sql("UPDATE {filter_formats} SET module = 'filter', delta = 3 WHERE module = 'htmlcorrector'"); + $ret[] = t('HTML Corrector module was disabled; this functionality has now been added to core.'); + return $ret; + } + + // Otherwise, find any format with 'HTML' in its name and add the filter at the end. + $result = db_query("SELECT format FROM {filter_formats} WHERE name LIKE '%HTML%'"); + while ($format = db_fetch_object($result)) { + $weight = db_result(db_query("SELECT MAX(weight) FROM {filters} WHERE format = %d", $format->format)); + db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (%d, '%s', %d, %d)", $format->format, 'filter', 3, max(10, $weight + 1)); + } + + return $ret; +} + /** * @} End of "defgroup updates-5.x-to-6.x" * The next series of updates should start at 7000. -- GitLab