Skip to content
Snippets Groups Projects
Commit 686c269b authored by Alex Pott's avatar Alex Pott
Browse files

Issue #1938670 by ParisLiakos, Xano, RobLoach, pp, alexpott: Convert...

Issue #1938670 by ParisLiakos, Xano, RobLoach, pp, alexpott: Convert unicode.inc to \Drupal\Component\Utility\Unicode.
parent 608fa1d5
No related branches found
No related tags found
2 merge requests!7452Issue #1797438. HTML5 validation is preventing form submit and not fully...,!789Issue #3210310: Adjust Database API to remove deprecated Drupal 9 code in Drupal 10
......@@ -5,6 +5,7 @@
use Drupal\Component\Utility\Settings;
use Drupal\Component\Utility\String;
use Drupal\Component\Utility\Timer;
use Drupal\Component\Utility\Unicode;
use Drupal\Core\DrupalKernel;
use Drupal\Core\Database\Database;
use Drupal\Core\DependencyInjection\ContainerBuilder;
......@@ -270,22 +271,6 @@
*/
const LANGUAGE_RTL = 1;
/**
* Indicates an error during check for PHP unicode support.
*/
const UNICODE_ERROR = -1;
/**
* Indicates that standard PHP (emulated) unicode support is being used.
*/
const UNICODE_SINGLEBYTE = 0;
/**
* Indicates that full unicode support with the PHP mbstring extension is being
* used.
*/
const UNICODE_MULTIBYTE = 1;
/**
* Time of the current request in seconds elapsed since the Unix Epoch.
*
......@@ -667,9 +652,6 @@ function drupal_environment_initialize() {
// Set sane locale settings, to ensure consistent string, dates, times and
// numbers handling.
setlocale(LC_ALL, 'C');
// Detect string handling method.
unicode_check();
}
/**
......@@ -682,51 +664,6 @@ function drupal_valid_http_host($host) {
return preg_match('/^\[?(?:[a-zA-Z0-9-:\]_]+\.?)+$/', $host);
}
/**
* Checks for Unicode support in PHP and sets the proper settings if possible.
*
* Because Drupal needs to be able to handle text in various encodings, we do
* not support mbstring function overloading. HTTP input/output conversion must
* be disabled for similar reasons.
*
* @return string
* A string identifier of a failed multibyte extension check, if any.
* Otherwise, an empty string.
*/
function unicode_check() {
global $multibyte;
// Check for mbstring extension.
if (!function_exists('mb_strlen')) {
$multibyte = UNICODE_SINGLEBYTE;
return 'mb_strlen';
}
// Check mbstring configuration.
if (ini_get('mbstring.func_overload') != 0) {
$multibyte = UNICODE_ERROR;
return 'mbstring.func_overload';
}
if (ini_get('mbstring.encoding_translation') != 0) {
$multibyte = UNICODE_ERROR;
return 'mbstring.encoding_translation';
}
if (ini_get('mbstring.http_input') != 'pass') {
$multibyte = UNICODE_ERROR;
return 'mbstring.http_input';
}
if (ini_get('mbstring.http_output') != 'pass') {
$multibyte = UNICODE_ERROR;
return 'mbstring.http_output';
}
// Set appropriate configuration.
mb_internal_encoding('utf-8');
mb_language('uni');
$multibyte = UNICODE_MULTIBYTE;
return '';
}
/**
* Sets the base URL, cookie domain, and session name from configuration.
*/
......@@ -2165,6 +2102,9 @@ function _drupal_bootstrap_configuration() {
// Start a page timer:
Timer::start('page');
// Detect string handling method.
Unicode::check();
// Load the procedural configuration system helper functions.
require_once DRUPAL_ROOT . '/core/includes/config.inc';
......
......@@ -5,6 +5,8 @@
* Theming for maintenance pages.
*/
use Drupal\Component\Utility\Unicode;
/**
* Sets up the theming system for maintenance page.
*
......@@ -28,7 +30,7 @@ function _drupal_maintenance_theme() {
require_once DRUPAL_ROOT . '/core/includes/unicode.inc';
require_once DRUPAL_ROOT . '/core/includes/file.inc';
require_once DRUPAL_ROOT . '/core/includes/module.inc';
unicode_check();
Unicode::check();
// Install and update pages are treated differently to prevent theming overrides.
if (defined('MAINTENANCE_MODE') && (MAINTENANCE_MODE == 'install' || MAINTENANCE_MODE == 'update')) {
......
......@@ -5,66 +5,8 @@
* Provides Unicode-related conversions and operations.
*/
/**
* Matches Unicode characters that are word boundaries.
*
* Characters with the following General_category (gc) property values are used
* as word boundaries. While this does not fully conform to the Word Boundaries
* algorithm described in http://unicode.org/reports/tr29, as PCRE does not
* contain the Word_Break property table, this simpler algorithm has to do.
* - Cc, Cf, Cn, Co, Cs: Other.
* - Pc, Pd, Pe, Pf, Pi, Po, Ps: Punctuation.
* - Sc, Sk, Sm, So: Symbols.
* - Zl, Zp, Zs: Separators.
*
* Non-boundary characters include the following General_category (gc) property
* values:
* - Ll, Lm, Lo, Lt, Lu: Letters.
* - Mc, Me, Mn: Combining Marks.
* - Nd, Nl, No: Numbers.
*
* Note that the PCRE property matcher is not used because we wanted to be
* compatible with Unicode 5.2.0 regardless of the PCRE version used (and any
* bugs in PCRE property tables).
*
* @see http://unicode.org/glossary
*/
define('PREG_CLASS_UNICODE_WORD_BOUNDARY',
'\x{0}-\x{2F}\x{3A}-\x{40}\x{5B}-\x{60}\x{7B}-\x{A9}\x{AB}-\x{B1}\x{B4}' .
'\x{B6}-\x{B8}\x{BB}\x{BF}\x{D7}\x{F7}\x{2C2}-\x{2C5}\x{2D2}-\x{2DF}' .
'\x{2E5}-\x{2EB}\x{2ED}\x{2EF}-\x{2FF}\x{375}\x{37E}-\x{385}\x{387}\x{3F6}' .
'\x{482}\x{55A}-\x{55F}\x{589}-\x{58A}\x{5BE}\x{5C0}\x{5C3}\x{5C6}' .
'\x{5F3}-\x{60F}\x{61B}-\x{61F}\x{66A}-\x{66D}\x{6D4}\x{6DD}\x{6E9}' .
'\x{6FD}-\x{6FE}\x{700}-\x{70F}\x{7F6}-\x{7F9}\x{830}-\x{83E}' .
'\x{964}-\x{965}\x{970}\x{9F2}-\x{9F3}\x{9FA}-\x{9FB}\x{AF1}\x{B70}' .
'\x{BF3}-\x{BFA}\x{C7F}\x{CF1}-\x{CF2}\x{D79}\x{DF4}\x{E3F}\x{E4F}' .
'\x{E5A}-\x{E5B}\x{F01}-\x{F17}\x{F1A}-\x{F1F}\x{F34}\x{F36}\x{F38}' .
'\x{F3A}-\x{F3D}\x{F85}\x{FBE}-\x{FC5}\x{FC7}-\x{FD8}\x{104A}-\x{104F}' .
'\x{109E}-\x{109F}\x{10FB}\x{1360}-\x{1368}\x{1390}-\x{1399}\x{1400}' .
'\x{166D}-\x{166E}\x{1680}\x{169B}-\x{169C}\x{16EB}-\x{16ED}' .
'\x{1735}-\x{1736}\x{17B4}-\x{17B5}\x{17D4}-\x{17D6}\x{17D8}-\x{17DB}' .
'\x{1800}-\x{180A}\x{180E}\x{1940}-\x{1945}\x{19DE}-\x{19FF}' .
'\x{1A1E}-\x{1A1F}\x{1AA0}-\x{1AA6}\x{1AA8}-\x{1AAD}\x{1B5A}-\x{1B6A}' .
'\x{1B74}-\x{1B7C}\x{1C3B}-\x{1C3F}\x{1C7E}-\x{1C7F}\x{1CD3}\x{1FBD}' .
'\x{1FBF}-\x{1FC1}\x{1FCD}-\x{1FCF}\x{1FDD}-\x{1FDF}\x{1FED}-\x{1FEF}' .
'\x{1FFD}-\x{206F}\x{207A}-\x{207E}\x{208A}-\x{208E}\x{20A0}-\x{20B8}' .
'\x{2100}-\x{2101}\x{2103}-\x{2106}\x{2108}-\x{2109}\x{2114}' .
'\x{2116}-\x{2118}\x{211E}-\x{2123}\x{2125}\x{2127}\x{2129}\x{212E}' .
'\x{213A}-\x{213B}\x{2140}-\x{2144}\x{214A}-\x{214D}\x{214F}' .
'\x{2190}-\x{244A}\x{249C}-\x{24E9}\x{2500}-\x{2775}\x{2794}-\x{2B59}' .
'\x{2CE5}-\x{2CEA}\x{2CF9}-\x{2CFC}\x{2CFE}-\x{2CFF}\x{2E00}-\x{2E2E}' .
'\x{2E30}-\x{3004}\x{3008}-\x{3020}\x{3030}\x{3036}-\x{3037}' .
'\x{303D}-\x{303F}\x{309B}-\x{309C}\x{30A0}\x{30FB}\x{3190}-\x{3191}' .
'\x{3196}-\x{319F}\x{31C0}-\x{31E3}\x{3200}-\x{321E}\x{322A}-\x{3250}' .
'\x{3260}-\x{327F}\x{328A}-\x{32B0}\x{32C0}-\x{33FF}\x{4DC0}-\x{4DFF}' .
'\x{A490}-\x{A4C6}\x{A4FE}-\x{A4FF}\x{A60D}-\x{A60F}\x{A673}\x{A67E}' .
'\x{A6F2}-\x{A716}\x{A720}-\x{A721}\x{A789}-\x{A78A}\x{A828}-\x{A82B}' .
'\x{A836}-\x{A839}\x{A874}-\x{A877}\x{A8CE}-\x{A8CF}\x{A8F8}-\x{A8FA}' .
'\x{A92E}-\x{A92F}\x{A95F}\x{A9C1}-\x{A9CD}\x{A9DE}-\x{A9DF}' .
'\x{AA5C}-\x{AA5F}\x{AA77}-\x{AA79}\x{AADE}-\x{AADF}\x{ABEB}' .
'\x{E000}-\x{F8FF}\x{FB29}\x{FD3E}-\x{FD3F}\x{FDFC}-\x{FDFD}' .
'\x{FE10}-\x{FE19}\x{FE30}-\x{FE6B}\x{FEFF}-\x{FF0F}\x{FF1A}-\x{FF20}' .
'\x{FF3B}-\x{FF40}\x{FF5B}-\x{FF65}\x{FFE0}-\x{FFFD}');
use Drupal\Component\Utility\Unicode;
use Drupal\Component\Utility\String;
/**
* Returns Unicode library status and errors.
......@@ -74,17 +16,17 @@ function unicode_requirements() {
$t = get_t();
$libraries = array(
UNICODE_SINGLEBYTE => $t('Standard PHP'),
UNICODE_MULTIBYTE => $t('PHP Mbstring Extension'),
UNICODE_ERROR => $t('Error'),
Unicode::STATUS_SINGLEBYTE => $t('Standard PHP'),
Unicode::STATUS_MULTIBYTE => $t('PHP Mbstring Extension'),
Unicode::STATUS_ERROR => $t('Error'),
);
$severities = array(
UNICODE_SINGLEBYTE => REQUIREMENT_WARNING,
UNICODE_MULTIBYTE => NULL,
UNICODE_ERROR => REQUIREMENT_ERROR,
Unicode::STATUS_SINGLEBYTE => REQUIREMENT_WARNING,
Unicode::STATUS_MULTIBYTE => NULL,
Unicode::STATUS_ERROR => REQUIREMENT_ERROR,
);
$failed_check = unicode_check();
$library = $GLOBALS['multibyte'];
$failed_check = Unicode::check();
$library = Unicode::getStatus();
$requirements['unicode'] = array(
'title' => $t('Unicode library'),
......@@ -176,29 +118,20 @@ function drupal_xml_parser_create(&$data) {
/**
* Converts data to UTF-8.
*
* Requires the iconv, GNU recode or mbstring PHP extension.
*
* @param $data
* @param string $data
* The data to be converted.
* @param $encoding
* @param string $encoding
* The encoding that the data is in.
*
* @return
* @return string|bool
* Converted data or FALSE.
*
* @see \Drupal\Component\Utility\Unicode::convertToUtf8().
*/
function drupal_convert_to_utf8($data, $encoding) {
if (function_exists('iconv')) {
$out = @iconv($encoding, 'utf-8', $data);
}
elseif (function_exists('mb_convert_encoding')) {
$out = @mb_convert_encoding($data, 'utf-8', $encoding);
}
elseif (function_exists('recode_string')) {
$out = @recode_string($encoding . '..utf-8', $data);
}
else {
$out = Unicode::convertToUtf8($data, $encoding);
if ($out === FALSE) {
watchdog('php', 'Unsupported encoding %s. Please install iconv, GNU recode or mbstring for PHP.', array('%s' => $encoding), WATCHDOG_ERROR);
return FALSE;
}
return $out;
......@@ -207,33 +140,18 @@ function drupal_convert_to_utf8($data, $encoding) {
/**
* Truncates a UTF-8-encoded string safely to a number of bytes.
*
* If the end position is in the middle of a UTF-8 sequence, it scans backwards
* until the beginning of the byte sequence.
*
* Use this function whenever you want to chop off a string at an unsure
* location. On the other hand, if you're sure that you're splitting on a
* character boundary (e.g. after using strpos() or similar), you can safely
* use substr() instead.
*
* @param $string
* @param string $string
* The string to truncate.
* @param $len
* @param int $len
* An upper limit on the returned string length.
*
* @return
* @return string
* The truncated string.
*
* @see \Drupal\Component\Utility\Unicode::truncateBytes().
*/
function drupal_truncate_bytes($string, $len) {
if (strlen($string) <= $len) {
return $string;
}
if ((ord($string[$len]) < 0x80) || (ord($string[$len]) >= 0xC0)) {
return substr($string, 0, $len);
}
// Scan backwards to beginning of the byte sequence.
while (--$len >= 0 && ord($string[$len]) >= 0x80 && ord($string[$len]) < 0xC0);
return substr($string, 0, $len);
return Unicode::truncateBytes($string, $len);
}
/**
......@@ -247,7 +165,7 @@ function drupal_truncate_bytes($string, $len) {
* @param $wordsafe
* If TRUE, attempt to truncate on a word boundary. Word boundaries are
* spaces, punctuation, and Unicode characters used as word boundaries in
* non-Latin languages; see PREG_CLASS_UNICODE_WORD_BOUNDARY for more
* non-Latin languages; see Unicode::PREG_CLASS_WORD_BOUNDARY for more
* information. If a word boundary cannot be found that would make the length
* of the returned string fall within length guidelines (see parameters
* $max_length and $min_wordsafe_length), word boundaries are ignored.
......@@ -269,70 +187,16 @@ function drupal_truncate_bytes($string, $len) {
*
* @return string
* The truncated string.
*
* @see \Drupal\Component\Utility\Unicode::truncate().
*/
function truncate_utf8($string, $max_length, $wordsafe = FALSE, $add_ellipsis = FALSE, $min_wordsafe_length = 1) {
$ellipsis = '';
$max_length = max($max_length, 0);
$min_wordsafe_length = max($min_wordsafe_length, 0);
if (drupal_strlen($string) <= $max_length) {
// No truncation needed, so don't add ellipsis, just return.
return $string;
}
if ($add_ellipsis) {
// Truncate ellipsis in case $max_length is small.
$ellipsis = drupal_substr(t('…'), 0, $max_length);
$max_length -= drupal_strlen($ellipsis);
$max_length = max($max_length, 0);
}
if ($max_length <= $min_wordsafe_length) {
// Do not attempt word-safe if lengths are bad.
$wordsafe = FALSE;
}
if ($wordsafe) {
$matches = array();
// Find the last word boundary, if there is one within $min_wordsafe_length
// to $max_length characters. preg_match() is always greedy, so it will
// find the longest string possible.
$found = preg_match('/^(.{' . $min_wordsafe_length . ',' . $max_length . '})[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . ']/u', $string, $matches);
if ($found) {
$string = $matches[1];
}
else {
$string = drupal_substr($string, 0, $max_length);
}
}
else {
$string = drupal_substr($string, 0, $max_length);
}
if ($add_ellipsis) {
// If we're adding an ellipsis, remove any trailing periods.
$string = rtrim($string, '.');
$string .= $ellipsis;
}
return $string;
return Unicode::truncate($string, $max_length, $wordsafe, $add_ellipsis, $min_wordsafe_length);
}
/**
* Encodes MIME/HTTP header values that contain incorrectly encoded characters.
*
* For example, mime_header_encode('tést.txt') returns "=?UTF-8?B?dMOpc3QudHh0?=".
*
* See http://www.rfc-editor.org/rfc/rfc2047.txt for more information.
*
* Notes:
* - Only encode strings that contain non-ASCII characters.
* - We progressively cut-off a chunk with truncate_utf8(). This is to ensure
* each chunk starts and ends on a character boundary.
* - Using \n as the chunk separator may cause problems on some systems and may
* have to be changed to \r\n or \r.
*
* @param $string
* The header to encode.
*
......@@ -340,22 +204,10 @@ function truncate_utf8($string, $max_length, $wordsafe = FALSE, $add_ellipsis =
* The mime-encoded header.
*
* @see mime_header_decode()
* @see \Drupal\Component\Utility\Unicode::mimeHeaderEncode().
*/
function mime_header_encode($string) {
if (preg_match('/[^\x20-\x7E]/', $string)) {
$chunk_size = 47; // floor((75 - strlen("=?UTF-8?B??=")) * 0.75);
$len = strlen($string);
$output = '';
while ($len > 0) {
$chunk = drupal_truncate_bytes($string, $chunk_size);
$output .= ' =?UTF-8?B?' . base64_encode($chunk) . "?=\n";
$c = strlen($chunk);
$string = substr($string, $c);
$len -= $c;
}
return trim($output);
}
return $string;
return Unicode::mimeHeaderEncode($string);
}
/**
......@@ -368,78 +220,41 @@ function mime_header_encode($string) {
* The mime-decoded header.
*
* @see mime_header_encode()
* @see \Drupal\Component\Utility\Unicode::mimeHeaderDecode().
*/
function mime_header_decode($header) {
// First step: encoded chunks followed by other encoded chunks (need to collapse whitespace)
$header = preg_replace_callback('/=\?([^?]+)\?(Q|B)\?([^?]+|\?(?!=))\?=\s+(?==\?)/', '_mime_header_decode', $header);
// Second step: remaining chunks (do not collapse whitespace)
return preg_replace_callback('/=\?([^?]+)\?(Q|B)\?([^?]+|\?(?!=))\?=/', '_mime_header_decode', $header);
}
/**
* Decodes encoded header data passed from mime_header_decode().
*
* Callback for preg_replace_callback() within mime_header_decode().
*
* @param $matches
* The array of matches from preg_replace_callback().
*
* @return string
* The mime-decoded string.
*
* @see mime_header_decode()
*/
function _mime_header_decode($matches) {
// Regexp groups:
// 1: Character set name
// 2: Escaping method (Q or B)
// 3: Encoded data
$data = ($matches[2] == 'B') ? base64_decode($matches[3]) : str_replace('_', ' ', quoted_printable_decode($matches[3]));
if (strtolower($matches[1]) != 'utf-8') {
$data = drupal_convert_to_utf8($data, $matches[1]);
}
return $data;
return Unicode::mimeHeaderDecode($header);
}
/**
* Decodes all HTML entities (including numerical ones) to regular UTF-8 bytes.
*
* Double-escaped entities will only be decoded once ("&amp;lt;" becomes "&lt;"
* , not "<"). Be careful when using this function, as decode_entities can
* revert previous sanitization efforts (&lt;script&gt; will become <script>).
*
* @param $text
* The text to decode entities in.
*
* @return
* The input $text, with all HTML entities decoded once.
*
* @see \Drupal\Component\Utility\String::decodeEntities().
*/
function decode_entities($text) {
return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
return String::decodeEntities($text);
}
/**
* Counts the number of characters in a UTF-8 string.
*
* This is less than or equal to the byte count.
*
* @param $text
* The string to run the operation on.
*
* @return integer
* The length of the string.
*
* @see \Drupal\Component\Utility\Unicode::strlen().
* @ingroup php_wrappers
*/
function drupal_strlen($text) {
global $multibyte;
if ($multibyte == UNICODE_MULTIBYTE) {
return mb_strlen($text);
}
else {
// Do not count UTF-8 continuation bytes.
return strlen(preg_replace("/[\x80-\xBF]/", '', $text));
}
return Unicode::strlen($text);
}
/**
......@@ -451,20 +266,11 @@ function drupal_strlen($text) {
* @return string
* The string in uppercase.
*
* @see \Drupal\Component\Utility\Unicode::strtoupper().
* @ingroup php_wrappers
*/
function drupal_strtoupper($text) {
global $multibyte;
if ($multibyte == UNICODE_MULTIBYTE) {
return mb_strtoupper($text);
}
else {
// Use C-locale for ASCII-only uppercase
$text = strtoupper($text);
// Case flip Latin-1 accented letters
$text = preg_replace_callback('/\xC3[\xA0-\xB6\xB8-\xBE]/', '_unicode_caseflip', $text);
return $text;
}
return Unicode::strtoupper($text);
}
/**
......@@ -476,35 +282,11 @@ function drupal_strtoupper($text) {
* @return string
* The string in lowercase.
*
* @see \Drupal\Component\Utility\Unicode::strtolower().
* @ingroup php_wrappers
*/
function drupal_strtolower($text) {
global $multibyte;
if ($multibyte == UNICODE_MULTIBYTE) {
return mb_strtolower($text);
}
else {
// Use C-locale for ASCII-only lowercase
$text = strtolower($text);
// Case flip Latin-1 accented letters
$text = preg_replace_callback('/\xC3[\x80-\x96\x98-\x9E]/', '_unicode_caseflip', $text);
return $text;
}
}
/**
* Flips U+C0-U+DE to U+E0-U+FD and back.
*
* @param $matches
* An array of matches.
*
* @return array
* The Latin-1 version of the array of matches.
*
* @see drupal_strtolower()
*/
function _unicode_caseflip($matches) {
return $matches[0][0] . chr(ord($matches[0][1]) ^ 32);
return Unicode::strtolower($text);
}
/**
......@@ -516,20 +298,16 @@ function _unicode_caseflip($matches) {
* @return
* The string with the first letter as uppercase.
*
* @see \Drupal\Component\Utility\Unicode::ucfirst().
* @ingroup php_wrappers
*/
function drupal_ucfirst($text) {
// Note: no mbstring equivalent!
return drupal_strtoupper(drupal_substr($text, 0, 1)) . drupal_substr($text, 1);
return Unicode::ucfirst($text);
}
/**
* Cuts off a piece of a string based on character indices and counts.
*
* Follows the same behavior as PHP's own substr() function. Note that for
* cutting off a string at a known character/substring location, the usage of
* PHP's normal strpos/substr is safe and much faster.
*
* @param $text
* The input string.
* @param $start
......@@ -540,92 +318,9 @@ function drupal_ucfirst($text) {
* @return
* The shortened string.
*
* @see \Drupal\Component\Utility\Unicode::substr().
* @ingroup php_wrappers
*/
function drupal_substr($text, $start, $length = NULL) {
global $multibyte;
if ($multibyte == UNICODE_MULTIBYTE) {
return $length === NULL ? mb_substr($text, $start) : mb_substr($text, $start, $length);
}
else {
$strlen = strlen($text);
// Find the starting byte offset.
$bytes = 0;
if ($start > 0) {
// Count all the continuation bytes from the start until we have found
// $start characters or the end of the string.
$bytes = -1; $chars = -1;
while ($bytes < $strlen - 1 && $chars < $start) {
$bytes++;
$c = ord($text[$bytes]);
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
}
}
}
elseif ($start < 0) {
// Count all the continuation bytes from the end until we have found
// abs($start) characters.
$start = abs($start);
$bytes = $strlen; $chars = 0;
while ($bytes > 0 && $chars < $start) {
$bytes--;
$c = ord($text[$bytes]);
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
}
}
}
$istart = $bytes;
// Find the ending byte offset.
if ($length === NULL) {
$iend = $strlen;
}
elseif ($length > 0) {
// Count all the continuation bytes from the starting index until we have
// found $length characters or reached the end of the string, then
// backtrace one byte.
$iend = $istart - 1;
$chars = -1;
$last_real = FALSE;
while ($iend < $strlen - 1 && $chars < $length) {
$iend++;
$c = ord($text[$iend]);
$last_real = FALSE;
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
$last_real = TRUE;
}
}
// Backtrace one byte if the last character we found was a real character
// and we don't need it.
if ($last_real && $chars >= $length) {
$iend--;
}
}
elseif ($length < 0) {
// Count all the continuation bytes from the end until we have found
// abs($start) characters, then backtrace one byte.
$length = abs($length);
$iend = $strlen; $chars = 0;
while ($iend > 0 && $chars < $length) {
$iend--;
$c = ord($text[$iend]);
if ($c < 0x80 || $c >= 0xC0) {
$chars++;
}
}
// Backtrace one byte if we are not at the beginning of the string.
if ($iend > 0) {
$iend--;
}
}
else {
// $length == 0, return an empty string.
return '';
}
return substr($text, $istart, max(0, $iend - $istart + 1));
}
return Unicode::substr($text, $start, $length);
}
......@@ -31,6 +31,23 @@ public static function checkPlain($text) {
return htmlspecialchars($text, ENT_QUOTES, 'UTF-8');
}
/**
* Decodes all HTML entities including numerical ones to regular UTF-8 bytes.
*
* Double-escaped entities will only be decoded once ("&amp;lt;" becomes
* "&lt;", not "<"). Be careful when using this function, as it will revert
* previous sanitization efforts (&lt;script&gt; will become <script>).
*
* @param string $text
* The text to decode entities in.
*
* @return string
* The input $text, with all HTML entities decoded once.
*/
public static function decodeEntities($text) {
return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
}
/**
* Formats a string for HTML display by replacing variable placeholders.
*
......
This diff is collapsed.
......@@ -6,6 +6,7 @@
*/
use Drupal\Core\Entity\EntityInterface;
use Drupal\Component\Utility\Unicode;
/**
* Matches all 'N' Unicode character classes (numbers)
......@@ -447,7 +448,7 @@ function search_simplify($text, $langcode = NULL) {
// With the exception of the rules above, we consider all punctuation,
// marks, spacers, etc, to be a word boundary.
$text = preg_replace('/[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . ']+/u', ' ', $text);
$text = preg_replace('/[' . Unicode::PREG_CLASS_WORD_BOUNDARY . ']+/u', ' ', $text);
// Truncate everything to 50 characters.
$words = explode(' ', $text);
......@@ -1102,7 +1103,7 @@ function search_data($keys, $module, $conditions = NULL) {
*/
function search_excerpt($keys, $text, $langcode = NULL) {
// We highlight around non-indexable or CJK characters.
$boundary = '(?:(?<=[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . PREG_CLASS_CJK . '])|(?=[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . PREG_CLASS_CJK . ']))';
$boundary = '(?:(?<=[' . Unicode::PREG_CLASS_WORD_BOUNDARY . PREG_CLASS_CJK . '])|(?=[' . Unicode::PREG_CLASS_WORD_BOUNDARY . PREG_CLASS_CJK . ']))';
// Extract positive keywords and phrases
preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' ' . $keys, $matches);
......
<?php
/**
* @file
* Definition of Drupal\system\Tests\System\UnicodeUnitTest.
*/
namespace Drupal\system\Tests\System;
use Drupal\simpletest\UnitTestBase;
/**
* Test unicode handling features implemented in unicode.inc.
*/
class UnicodeUnitTest extends UnitTestBase {
/**
* Whether to run the extended version of the tests (including non latin1 characters).
*
* @var boolean
*/
protected $extendedMode = FALSE;
public static function getInfo() {
return array(
'name' => 'Unicode handling',
'description' => 'Tests Drupal Unicode handling.',
'group' => 'System',
);
}
/**
* Test full unicode features implemented using the mbstring extension.
*/
function testMbStringUnicode() {
global $multibyte;
// mbstring was not detected on this installation, there is no way to test
// multibyte features. Treat that as an exception.
if ($multibyte == UNICODE_SINGLEBYTE) {
$this->error(t('Unable to test Multibyte features: mbstring extension was not detected.'));
}
$multibyte = UNICODE_MULTIBYTE;
$this->extendedMode = TRUE;
$this->pass(t('Testing in mbstring mode'));
$this->helperTestStrToLower();
$this->helperTestStrToUpper();
$this->helperTestUcFirst();
$this->helperTestStrLen();
$this->helperTestSubStr();
$this->helperTestTruncate();
}
/**
* Test emulated unicode features.
*/
function testEmulatedUnicode() {
global $multibyte;
$multibyte = UNICODE_SINGLEBYTE;
$this->extendedMode = FALSE;
$this->pass(t('Testing in emulated (best-effort) mode'));
$this->helperTestStrToLower();
$this->helperTestStrToUpper();
$this->helperTestUcFirst();
$this->helperTestStrLen();
$this->helperTestSubStr();
$this->helperTestTruncate();
}
function helperTestStrToLower() {
$testcase = array(
'tHe QUIcK bRoWn' => 'the quick brown',
'FrançAIS is ÜBER-åwesome' => 'français is über-åwesome',
);
if ($this->extendedMode) {
$testcase['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ'] = 'αβγδεζηθικλμνξοσὠ';
}
foreach ($testcase as $input => $output) {
$this->assertEqual(drupal_strtolower($input), $output, format_string('%input is lowercased as %output', array('%input' => $input, '%output' => $output)));
}
}
function helperTestStrToUpper() {
$testcase = array(
'tHe QUIcK bRoWn' => 'THE QUICK BROWN',
'FrançAIS is ÜBER-åwesome' => 'FRANÇAIS IS ÜBER-ÅWESOME',
);
if ($this->extendedMode) {
$testcase['αβγδεζηθικλμνξοσὠ'] = 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ';
}
foreach ($testcase as $input => $output) {
$this->assertEqual(drupal_strtoupper($input), $output, format_string('%input is uppercased as %output', array('%input' => $input, '%output' => $output)));
}
}
function helperTestUcFirst() {
$testcase = array(
'tHe QUIcK bRoWn' => 'THe QUIcK bRoWn',
'françAIS' => 'FrançAIS',
'über' => 'Über',
'åwesome' => 'Åwesome'
);
if ($this->extendedMode) {
$testcase['σion'] = 'Σion';
}
foreach ($testcase as $input => $output) {
$this->assertEqual(drupal_ucfirst($input), $output, format_string('%input is ucfirst-ed as %output', array('%input' => $input, '%output' => $output)));
}
}
function helperTestStrLen() {
$testcase = array(
'tHe QUIcK bRoWn' => 15,
'ÜBER-åwesome' => 12,
);
foreach ($testcase as $input => $output) {
$this->assertEqual(drupal_strlen($input), $output, format_string('%input length is %output', array('%input' => $input, '%output' => $output)));
}
}
function helperTestSubStr() {
$testcase = array(
// 012345678901234567890123
array('frànçAIS is über-åwesome', 0, 0,
''),
array('frànçAIS is über-åwesome', 0, 1,
'f'),
array('frànçAIS is über-åwesome', 0, 8,
'frànçAIS'),
array('frànçAIS is über-åwesome', 0, 23,
'frànçAIS is über-åwesom'),
array('frànçAIS is über-åwesome', 0, 24,
'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 0, 25,
'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 0, 100,
'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 4, 4,
'çAIS'),
array('frànçAIS is über-åwesome', 1, 0,
''),
array('frànçAIS is über-åwesome', 100, 0,
''),
array('frànçAIS is über-åwesome', -4, 2,
'so'),
array('frànçAIS is über-åwesome', -4, 3,
'som'),
array('frànçAIS is über-åwesome', -4, 4,
'some'),
array('frànçAIS is über-åwesome', -4, 5,
'some'),
array('frànçAIS is über-åwesome', -7, 10,
'åwesome'),
array('frànçAIS is über-åwesome', 5, -10,
'AIS is üb'),
array('frànçAIS is über-åwesome', 0, -10,
'frànçAIS is üb'),
array('frànçAIS is über-åwesome', 0, -1,
'frànçAIS is über-åwesom'),
array('frànçAIS is über-åwesome', -7, -2,
'åweso'),
array('frànçAIS is über-åwesome', -7, -6,
'å'),
array('frànçAIS is über-åwesome', -7, -7,
''),
array('frànçAIS is über-åwesome', -7, -8,
''),
array('...', 0, 2, '..'),
array('以呂波耳・ほへとち。リヌルヲ。', 1, 3,
'呂波耳'),
);
foreach ($testcase as $test) {
list($input, $start, $length, $output) = $test;
$result = drupal_substr($input, $start, $length);
$this->assertEqual($result, $output, format_string('%input substring at offset %offset for %length characters is %output (got %result)', array('%input' => $input, '%offset' => $start, '%length' => $length, '%output' => $output, '%result' => $result)));
}
}
/**
* Test decode_entities().
*/
function testDecodeEntities() {
$testcase = array(
'Drupal' => 'Drupal',
'<script>' => '<script>',
'&lt;script&gt;' => '<script>',
'&#60;script&#62;' => '<script>',
'&amp;lt;script&amp;gt;' => '&lt;script&gt;',
'"' => '"',
'&#34;' => '"',
'&amp;#34;' => '&#34;',
'&quot;' => '"',
'&amp;quot;' => '&quot;',
"'" => "'",
'&#39;' => "'",
'&amp;#39;' => '&#39;',
'©' => '©',
'&copy;' => '©',
'&#169;' => '©',
'→' => '→',
'&#8594;' => '→',
'➼' => '➼',
'&#10172;' => '➼',
'&euro;' => '€',
);
foreach ($testcase as $input => $output) {
$this->assertEqual(decode_entities($input), $output, format_string('Make sure the decoded entity of @input is @output', array('@input' => $input, '@output' => $output)));
}
}
/**
* Tests truncate_utf8().
*/
function helperTestTruncate() {
// Each case is an array with input string, length to truncate to, and
// expected return value.
// Test non-wordsafe, non-ellipsis cases.
$non_wordsafe_non_ellipsis_cases = array(
array('frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 23, 'frànçAIS is über-åwesom'),
array('frànçAIS is über-åwesome', 17, 'frànçAIS is über-'),
array('以呂波耳・ほへとち。リヌルヲ。', 6, '以呂波耳・ほ'),
);
$this->runTruncateTests($non_wordsafe_non_ellipsis_cases, FALSE, FALSE);
// Test non-wordsafe, ellipsis cases.
$non_wordsafe_ellipsis_cases = array(
array('frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 23, 'frànçAIS is über-åweso…'),
array('frànçAIS is über-åwesome', 17, 'frànçAIS is über…'),
);
$this->runTruncateTests($non_wordsafe_ellipsis_cases, FALSE, TRUE);
// Test wordsafe, ellipsis cases.
$wordsafe_ellipsis_cases = array(
array('123', 1, '…'),
array('123', 2, '1…'),
array('123', 3, '123'),
array('1234', 3, '12…'),
array('1234567890', 10, '1234567890'),
array('12345678901', 10, '123456789…'),
array('12345678901', 11, '12345678901'),
array('123456789012', 11, '1234567890…'),
array('12345 7890', 10, '12345 7890'),
array('12345 7890', 9, '12345…'),
array('123 567 90', 10, '123 567 90'),
array('123 567 901', 10, '123 567…'),
array('Stop. Hammertime.', 17, 'Stop. Hammertime.'),
array('Stop. Hammertime.', 16, 'Stop…'),
array('frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 23, 'frànçAIS is über…'),
array('frànçAIS is über-åwesome', 17, 'frànçAIS is über…'),
array('¿Dónde está el niño?', 20, '¿Dónde está el niño?'),
array('¿Dónde está el niño?', 19, '¿Dónde está el…'),
array('¿Dónde está el niño?', 13, '¿Dónde está…'),
array('¿Dónde está el niño?', 10, '¿Dónde…'),
array('Help! Help! Help!', 17, 'Help! Help! Help!'),
array('Help! Help! Help!', 16, 'Help! Help!…'),
array('Help! Help! Help!', 15, 'Help! Help!…'),
array('Help! Help! Help!', 14, 'Help! Help!…'),
array('Help! Help! Help!', 13, 'Help! Help!…'),
array('Help! Help! Help!', 12, 'Help! Help!…'),
array('Help! Help! Help!', 11, 'Help! Help…'),
array('Help! Help! Help!', 10, 'Help!…'),
array('Help! Help! Help!', 9, 'Help!…'),
array('Help! Help! Help!', 8, 'Help!…'),
array('Help! Help! Help!', 7, 'Help!…'),
array('Help! Help! Help!', 6, 'Help!…'),
array('Help! Help! Help!', 5, 'Help…'),
array('Help! Help! Help!', 4, 'Hel…'),
array('Help! Help! Help!', 3, 'He…'),
array('Help! Help! Help!', 2, 'H…'),
);
$this->runTruncateTests($wordsafe_ellipsis_cases, TRUE, TRUE);
}
/**
* Runs test cases for helperTestTruncate().
*
* Runs each test case through truncate_utf8() and compares the output
* to the expected output.
*
* @param $cases
* Cases array. Each case is an array with the input string, length to
* truncate to, and expected output.
* @param $wordsafe
* TRUE to use word-safe truncation, FALSE to not use word-safe truncation.
* @param $ellipsis
* TRUE to append ... if the input is truncated, FALSE to not append ....
*/
function runTruncateTests($cases, $wordsafe, $ellipsis) {
foreach ($cases as $case) {
list($input, $max_length, $expected) = $case;
$output = truncate_utf8($input, $max_length, $wordsafe, $ellipsis);
$this->assertEqual($output, $expected, format_string('%input truncate to %length characters with %wordsafe, %ellipsis is %expected (got %output)', array('%input' => $input, '%length' => $max_length, '%output' => $output, '%expected' => $expected, '%wordsafe' => ($wordsafe ? 'word-safe' : 'not word-safe'), '%ellipsis' => ($ellipsis ? 'ellipsis' : 'not ellipsis'))));
}
}
}
......@@ -7,6 +7,7 @@
namespace Drupal\views\Plugin\views;
use Drupal\Component\Utility\Unicode;
use Drupal\views\Plugin\views\display\DisplayPluginBase;
use Drupal\views\Plugin\views\PluginBase;
use Drupal\views\ViewExecutable;
......@@ -242,8 +243,6 @@ public function sanitizeValue($value, $type = NULL) {
* The transformed string.
*/
protected function caseTransform($string, $option) {
global $multibyte;
switch ($option) {
default:
return $string;
......@@ -254,7 +253,7 @@ protected function caseTransform($string, $option) {
case 'ucfirst':
return drupal_strtoupper(drupal_substr($string, 0, 1)) . drupal_substr($string, 1);
case 'ucwords':
if ($multibyte == UNICODE_MULTIBYTE) {
if (Unicode::getStatus() == Unicode::STATUS_MULTIBYTE) {
return mb_convert_case($string, MB_CASE_TITLE);
}
else {
......
......@@ -41,7 +41,7 @@ public static function getInfo() {
*/
function testCheckPlain($text, $expected, $message, $ignorewarnings = FALSE) {
$result = $ignorewarnings ? @String::checkPlain($text) : String::checkPlain($text);
$this->assertEquals($result, $expected, $message);
$this->assertEquals($expected, $result, $message);
}
/**
......@@ -80,7 +80,7 @@ function providerCheckPlain() {
*/
function testFormat($string, $args, $expected, $message) {
$result = String::format($string, $args);
$this->assertEquals($result, $expected, $message);
$this->assertEquals($expected, $result, $message);
}
/**
......@@ -106,4 +106,44 @@ function testPlaceholder() {
$this->assertEquals('<em class="placeholder">Some text</em>', String::placeholder('Some text'));
}
/**
* Tests String::decodeEntities().
*
* @dataProvider providerDecodeEntities
*/
public function testDecodeEntities($text, $expected) {
$this->assertEquals($expected, String::decodeEntities($text));
}
/**
* Data provider for testDecodeEntities().
*
* @see testCheckPlain()
*/
public function providerDecodeEntities() {
return array(
array('Drupal', 'Drupal'),
array('<script>', '<script>'),
array('&lt;script&gt;', '<script>'),
array('&#60;script&#62;', '<script>'),
array('&amp;lt;script&amp;gt;', '&lt;script&gt;'),
array('"', '"'),
array('&#34;', '"'),
array('&amp;#34;', '&#34;'),
array('&quot;', '"'),
array('&amp;quot;', '&quot;'),
array("'", "'"),
array('&#39;', "'"),
array('&amp;#39;', '&#39;'),
array('©', '©'),
array('&copy;', '©'),
array('&#169;', '©'),
array('→', '→'),
array('&#8594;', '→'),
array('➼', '➼'),
array('&#10172;', '➼'),
array('&euro;', '€'),
);
}
}
<?php
/**
* @file
* Contains \Drupal\Tests\Component\Utility\UnicodeTest.
*/
namespace Drupal\Tests\Component\Utility;
use Drupal\Tests\UnitTestCase;
use Drupal\Component\Utility\Unicode;
/**
* Test unicode handling features implemented in Unicode component.
*/
class UnicodeTest extends UnitTestCase {
public static function getInfo() {
return array(
'name' => 'Unicode handling',
'description' => 'Tests Drupal Unicode handling.',
'group' => 'System',
);
}
public function setUp() {
// Initialize unicode component.
Unicode::check();
}
/**
* Tests Unicode::getStatus() and Unicode::setStatus().
*
* @dataProvider providerTestStatus
*/
public function testStatus($value, $expected, $invalid = FALSE) {
if ($invalid) {
$this->setExpectedException('InvalidArgumentException');
}
Unicode::setStatus($value);
$this->assertEquals($expected, Unicode::getStatus());
}
/**
* Data provider for testStatus().
*
* @see testStatus()
*
* @return array
* An array containing:
* - The status value to set.
* - The status value to expect after setting the new value.
* - (optional) Boolean indicating invalid status. Defaults to FALSE.
*/
public function providerTestStatus() {
return array(
array(Unicode::STATUS_SINGLEBYTE, Unicode::STATUS_SINGLEBYTE),
array(rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE),
array(rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE),
array(Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE),
array(rand(10, 100), Unicode::STATUS_MULTIBYTE, TRUE),
array(Unicode::STATUS_ERROR, Unicode::STATUS_ERROR),
array(Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE),
);
}
/**
* Tests Unicode::mimeHeaderEncode() and Unicode::mimeHeaderDecode().
*
* @dataProvider providerTestMimeHeader
*/
public function testMimeHeader($value, $encoded) {
$this->assertEquals($encoded, Unicode::mimeHeaderEncode($value));
$this->assertEquals($value, Unicode::mimeHeaderDecode($encoded));
}
/**
* Data provider for testMimeHeader().
*
* @see testMimeHeader()
*
* @return array
* An array containing a string and its encoded value.
*/
public function providerTestMimeHeader() {
return array(
array('tést.txt', '=?UTF-8?B?dMOpc3QudHh0?='),
);
}
/**
* Tests Unicode::strtolower().
*
* @dataProvider providerStrtolower
*/
public function testStrtolower($text, $expected, $multibyte = FALSE) {
$status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
Unicode::setStatus($status);
$this->assertEquals($expected, Unicode::strtolower($text));
}
/**
* Data provider for testStrtolower().
*
* @see testStrtolower()
*
* @return array
* An array containing a string, its lowercase version and whether it should
* be processed as multibyte.
*/
public function providerStrtolower() {
$cases = array(
array('tHe QUIcK bRoWn', 'the quick brown'),
array('FrançAIS is ÜBER-åwesome', 'français is über-åwesome'),
);
foreach ($cases as $case) {
// Test the same string both in multibyte and singlebyte conditions.
array_push($case, TRUE);
$cases[] = $case;
}
// Add a multibyte string.
$cases[] = array('ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αβγδεζηθικλμνξοσὠ', TRUE);
return $cases;
}
/**
* Tests Unicode::strtoupper().
*
* @dataProvider providerStrtoupper
*/
public function testStrtoupper($text, $expected, $multibyte = FALSE) {
$status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
Unicode::setStatus($status);
$this->assertEquals($expected, Unicode::strtoupper($text));
}
/**
* Data provider for testStrtoupper().
*
* @see testStrtoupper()
*
* @return array
* An array containing a string, its uppercase version and whether it should
* be processed as multibyte.
*/
public function providerStrtoupper() {
$cases = array(
array('tHe QUIcK bRoWn', 'THE QUICK BROWN'),
array('FrançAIS is ÜBER-åwesome', 'FRANÇAIS IS ÜBER-ÅWESOME'),
);
foreach ($cases as $case) {
// Test the same string both in multibyte and singlebyte conditions.
array_push($case, TRUE);
$cases[] = $case;
}
// Add a multibyte string.
$cases[] = array('αβγδεζηθικλμνξοσὠ', 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', TRUE);
return $cases;
}
/**
* Tests Unicode::ucfirst().
*
* @dataProvider providerUcfirst
*/
public function testUcfirst($text, $expected) {
$this->assertEquals($expected, Unicode::ucfirst($text));
}
/**
* Data provider for testUcfirst().
*
* @see testUcfirst()
*
* @return array
* An array containing a string and its uppercase first version.
*/
public function providerUcfirst() {
return array(
array('tHe QUIcK bRoWn', 'THe QUIcK bRoWn'),
array('françAIS', 'FrançAIS'),
array('über', 'Über'),
array('åwesome', 'Åwesome'),
// A multibyte string.
array('σion', 'Σion'),
);
}
/**
* Tests Unicode::strlen().
*
* @dataProvider providerStrlen
*/
public function testStrlen($text, $expected) {
$this->assertEquals($expected, Unicode::strlen($text));
}
/**
* Data provider for testStrlen().
*
* @see testStrlen()
*
* @return array
* An array containing a string and its length.
*/
public function providerStrlen() {
return array(
array('tHe QUIcK bRoWn', 15),
array('ÜBER-åwesome', 12),
);
}
/**
* Tests Unicode::substr().
*
* @dataProvider providerSubstr
*/
public function testSubstr($text, $start, $length, $expected) {
$this->assertEquals($expected, Unicode::substr($text, $start, $length));
}
/**
* Data provider for testSubstr().
*
* @see testSubstr()
*
* @return array
* An array containing:
* - The string to test.
* - The start number to be processed by substr.
* - The length number to be processed by substr.
* - The expected string result.
*/
public function providerSubstr() {
return array(
array('frànçAIS is über-åwesome', 0, 0, ''),
array('frànçAIS is über-åwesome', 0, 1, 'f'),
array('frànçAIS is über-åwesome', 0, 8, 'frànçAIS'),
array('frànçAIS is über-åwesome', 0, 23, 'frànçAIS is über-åwesom'),
array('frànçAIS is über-åwesome', 0, 24, 'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 0, 25, 'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 0, 100, 'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 4, 4, 'çAIS'),
array('frànçAIS is über-åwesome', 1, 0, ''),
array('frànçAIS is über-åwesome', 100, 0, ''),
array('frànçAIS is über-åwesome', -4, 2, 'so'),
array('frànçAIS is über-åwesome', -4, 3, 'som'),
array('frànçAIS is über-åwesome', -4, 4, 'some'),
array('frànçAIS is über-åwesome', -4, 5, 'some'),
array('frànçAIS is über-åwesome', -7, 10, 'åwesome'),
array('frànçAIS is über-åwesome', 5, -10, 'AIS is üb'),
array('frànçAIS is über-åwesome', 0, -10, 'frànçAIS is üb'),
array('frànçAIS is über-åwesome', 0, -1, 'frànçAIS is über-åwesom'),
array('frànçAIS is über-åwesome', -7, -2, 'åweso'),
array('frànçAIS is über-åwesome', -7, -6, 'å'),
array('frànçAIS is über-åwesome', -7, -7, ''),
array('frànçAIS is über-åwesome', -7, -8, ''),
array('...', 0, 2, '..'),
array('以呂波耳・ほへとち。リヌルヲ。', 1, 3, '呂波耳'),
);
}
/**
* Tests Unicode::truncate().
*
* @dataProvider providerTruncate
*/
public function testTruncate($text, $max_length, $expected, $wordsafe = FALSE, $add_ellipsis = FALSE) {
Unicode::check();
$this->assertEquals($expected, Unicode::truncate($text, $max_length, $wordsafe, $add_ellipsis));
}
/**
* Data provider for testTruncate().
*
* @see testTruncate()
*
* @return array
* An array containing:
* - The string to test.
* - The max length to truncate this string to.
* - The expected string result.
* - (optional) Boolean for the $wordsafe flag. Defaults to FALSE.
* - (optional) Boolean for the $add_ellipsis flag. Defaults to FALSE.
*/
public function providerTruncate() {
return array(
array('frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'),
array('frànçAIS is über-åwesome', 23, 'frànçAIS is über-åwesom'),
array('frànçAIS is über-åwesome', 17, 'frànçAIS is über-'),
array('以呂波耳・ほへとち。リヌルヲ。', 6, '以呂波耳・ほ'),
array('frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', FALSE, TRUE),
array('frànçAIS is über-åwesome', 23, 'frànçAIS is über-åweso…', FALSE, TRUE),
array('frànçAIS is über-åwesome', 17, 'frànçAIS is über…', FALSE, TRUE),
array('123', 1, '…', TRUE, TRUE),
array('123', 2, '1…', TRUE, TRUE),
array('123', 3, '123', TRUE, TRUE),
array('1234', 3, '12…', TRUE, TRUE),
array('1234567890', 10, '1234567890', TRUE, TRUE),
array('12345678901', 10, '123456789…', TRUE, TRUE),
array('12345678901', 11, '12345678901', TRUE, TRUE),
array('123456789012', 11, '1234567890…', TRUE, TRUE),
array('12345 7890', 10, '12345 7890', TRUE, TRUE),
array('12345 7890', 9, '12345…', TRUE, TRUE),
array('123 567 90', 10, '123 567 90', TRUE, TRUE),
array('123 567 901', 10, '123 567…', TRUE, TRUE),
array('Stop. Hammertime.', 17, 'Stop. Hammertime.', TRUE, TRUE),
array('Stop. Hammertime.', 16, 'Stop…', TRUE, TRUE),
array('frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', TRUE, TRUE),
array('frànçAIS is über-åwesome', 23, 'frànçAIS is über…', TRUE, TRUE),
array('frànçAIS is über-åwesome', 17, 'frànçAIS is über…', TRUE, TRUE),
array('¿Dónde está el niño?', 20, '¿Dónde está el niño?', TRUE, TRUE),
array('¿Dónde está el niño?', 19, '¿Dónde está el…', TRUE, TRUE),
array('¿Dónde está el niño?', 13, '¿Dónde está…', TRUE, TRUE),
array('¿Dónde está el niño?', 10, '¿Dónde…', TRUE, TRUE),
array('Help! Help! Help!', 17, 'Help! Help! Help!', TRUE, TRUE),
array('Help! Help! Help!', 16, 'Help! Help!…', TRUE, TRUE),
array('Help! Help! Help!', 15, 'Help! Help!…', TRUE, TRUE),
array('Help! Help! Help!', 14, 'Help! Help!…', TRUE, TRUE),
array('Help! Help! Help!', 13, 'Help! Help!…', TRUE, TRUE),
array('Help! Help! Help!', 12, 'Help! Help!…', TRUE, TRUE),
array('Help! Help! Help!', 11, 'Help! Help…', TRUE, TRUE),
array('Help! Help! Help!', 10, 'Help!…', TRUE, TRUE),
array('Help! Help! Help!', 9, 'Help!…', TRUE, TRUE),
array('Help! Help! Help!', 8, 'Help!…', TRUE, TRUE),
array('Help! Help! Help!', 7, 'Help!…', TRUE, TRUE),
array('Help! Help! Help!', 6, 'Help!…', TRUE, TRUE),
array('Help! Help! Help!', 5, 'Help…', TRUE, TRUE),
array('Help! Help! Help!', 4, 'Hel…', TRUE, TRUE),
array('Help! Help! Help!', 3, 'He…', TRUE, TRUE),
array('Help! Help! Help!', 2, 'H…', TRUE, TRUE),
);
}
}
......@@ -20,3 +20,8 @@
require __DIR__ . "/../../core/lib/Drupal.php";
// Look into removing this later.
define('REQUEST_TIME', (int) $_SERVER['REQUEST_TIME']);
// Set sane locale settings, to ensure consistent string, dates, times and
// numbers handling.
// @see drupal_environment_initialize()
setlocale(LC_ALL, 'C');
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment