From 09b0283675f938c89a2c541b6649deb10e7c5040 Mon Sep 17 00:00:00 2001 From: Dries Buytaert <dries@buytaert.net> Date: Tue, 18 May 2010 18:26:30 +0000 Subject: [PATCH] - Patch #692044 by Damien Tournoud, effulgentsia: a site with statistics module enabled is much slower in serving cached pages in D7 than in D6. --- includes/bootstrap.inc | 108 +++++++++++++++++++++++++-- includes/cache-install.inc | 2 +- includes/cache.inc | 20 ++--- includes/common.inc | 16 ++-- includes/path.inc | 90 ---------------------- modules/statistics/statistics.module | 8 +- modules/statistics/statistics.test | 63 ++++++++++++++++ modules/system/system.install | 15 ++-- modules/update/update.install | 7 ++ modules/update/update.module | 4 - 10 files changed, 205 insertions(+), 128 deletions(-) diff --git a/includes/bootstrap.inc b/includes/bootstrap.inc index 846cbdc358a5..8520733d584a 100644 --- a/includes/bootstrap.inc +++ b/includes/bootstrap.inc @@ -1069,14 +1069,14 @@ function drupal_serve_page_from_cache(stdClass $cache) { // drupal_add_http_headers(). Keys are mixed-case. $default_headers = array(); - foreach ($cache->headers as $name => $value) { + foreach ($cache->data['headers'] as $name => $value) { // In the case of a 304 response, certain headers must be sent, and the // remaining may not (see RFC 2616, section 10.3.5). Do not override // headers set in hook_boot(). $name_lower = strtolower($name); if (in_array($name_lower, array('content-location', 'expires', 'cache-control', 'vary')) && !isset($hook_boot_headers[$name_lower])) { drupal_add_http_header($name, $value); - unset($cache->headers[$name]); + unset($cache->data['headers'][$name]); } } @@ -1106,7 +1106,7 @@ function drupal_serve_page_from_cache(stdClass $cache) { } // Send the remaining headers. - foreach ($cache->headers as $name => $value) { + foreach ($cache->data['headers'] as $name => $value) { drupal_add_http_header($name, $value); } @@ -1134,19 +1134,20 @@ function drupal_serve_page_from_cache(stdClass $cache) { header('Vary: Accept-Encoding', FALSE); // If page_compression is enabled, the cache contains gzipped data. if ($return_compressed) { - // $cache->data is already gzip'ed, so make sure zlib.output_compression - // does not compress it once more. + // $cache->data['body'] is already gzip'ed, so make sure + // zlib.output_compression does not compress it once more. ini_set('zlib.output_compression', '0'); header('Content-Encoding: gzip'); } else { // The client does not support compression, so unzip the data in the // cache. Strip the gzip header and run uncompress. - $cache->data = gzinflate(substr(substr($cache->data, 10), 0, -8)); + $cache->data['body'] = gzinflate(substr(substr($cache->data['body'], 10), 0, -8)); } } - print $cache->data; + // Print the page. + print $cache->data['body']; } /** @@ -1638,6 +1639,48 @@ function drupal_get_messages($type = NULL, $clear_queue = TRUE) { return array(); } +/** + * Get the title of the current page, for display on the page and in the title bar. + * + * @return + * The current page's title. + */ +function drupal_get_title() { + $title = drupal_set_title(); + + // During a bootstrap, menu.inc is not included and thus we cannot provide a title. + if (!isset($title) && function_exists('menu_get_active_title')) { + $title = check_plain(menu_get_active_title()); + } + + return $title; +} + +/** + * Set the title of the current page, for display on the page and in the title bar. + * + * @param $title + * Optional string value to assign to the page title; or if set to NULL + * (default), leaves the current title unchanged. + * @param $output + * Optional flag - normally should be left as CHECK_PLAIN. Only set to + * PASS_THROUGH if you have already removed any possibly dangerous code + * from $title using a function like check_plain() or filter_xss(). With this + * flag the string will be passed through unchanged. + * + * @return + * The updated title of the current page. + */ +function drupal_set_title($title = NULL, $output = CHECK_PLAIN) { + $stored_title = &drupal_static(__FUNCTION__); + + if (isset($title)) { + $stored_title = ($output == PASS_THROUGH) ? $title : check_plain($title); + } + + return $stored_title; +} + /** * Check to see if an IP address has been blocked. * @@ -1990,6 +2033,9 @@ function _drupal_bootstrap_page_cache() { // If there is a cached page, display it. if (is_object($cache)) { header('X-Drupal-Cache: HIT'); + // Restore the metadata cached with the page. + $_GET['q'] = $cache->data['path']; + drupal_set_title($cache->data['title'], PASS_THROUGH); date_default_timezone_set(drupal_get_user_timezone()); // If the skipping of the bootstrap hooks is not enforced, call // hook_boot. @@ -2307,6 +2353,54 @@ function request_path() { return $path; } +/** + * Return a component of the current Drupal path. + * + * When viewing a page at the path "admin/structure/types", for example, arg(0) + * returns "admin", arg(1) returns "structure", and arg(2) returns "types". + * + * Avoid use of this function where possible, as resulting code is hard to read. + * In menu callback functions, attempt to use named arguments. See the explanation + * in menu.inc for how to construct callbacks that take arguments. When attempting + * to use this function to load an element from the current path, e.g. loading the + * node on a node page, please use menu_get_object() instead. + * + * @param $index + * The index of the component, where each component is separated by a '/' + * (forward-slash), and where the first component has an index of 0 (zero). + * @param $path + * A path to break into components. Defaults to the path of the current page. + * + * @return + * The component specified by $index, or NULL if the specified component was + * not found. + */ +function arg($index = NULL, $path = NULL) { + // Even though $arguments doesn't need to be resettable for any functional + // reasons (the result of explode() does not depend on any run-time + // information), it should be resettable anyway in case a module needs to + // free up the memory used by it. + // Use the advanced drupal_static() pattern, since this is called very often. + static $drupal_static_fast; + if (!isset($drupal_static_fast)) { + $drupal_static_fast['arguments'] = &drupal_static(__FUNCTION__); + } + $arguments = &$drupal_static_fast['arguments']; + + if (!isset($path)) { + $path = $_GET['q']; + } + if (!isset($arguments[$path])) { + $arguments[$path] = explode('/', $path); + } + if (!isset($index)) { + return $arguments[$path]; + } + if (isset($arguments[$path][$index])) { + return $arguments[$path][$index]; + } +} + /** * If Drupal is behind a reverse proxy, we use the X-Forwarded-For header * instead of $_SERVER['REMOTE_ADDR'], which would be the IP address of diff --git a/includes/cache-install.inc b/includes/cache-install.inc index 3a754e55fd6e..545f1a2a2170 100644 --- a/includes/cache-install.inc +++ b/includes/cache-install.inc @@ -24,7 +24,7 @@ function getMultiple(&$cids) { return array(); } - function set($cid, $data, $expire = CACHE_PERMANENT, array $headers = NULL) { + function set($cid, $data, $expire = CACHE_PERMANENT) { } function clear($cid = NULL, $wildcard = FALSE) { diff --git a/includes/cache.inc b/includes/cache.inc index e3194d31b0f5..2a729eff5822 100644 --- a/includes/cache.inc +++ b/includes/cache.inc @@ -133,11 +133,9 @@ function cache_get_multiple(array &$cids, $bin = 'cache') { * general cache wipe. * - A Unix timestamp: Indicates that the item should be kept at least until * the given time, after which it behaves like CACHE_TEMPORARY. - * @param $headers - * A string containing HTTP header information for cached pages. */ -function cache_set($cid, $data, $bin = 'cache', $expire = CACHE_PERMANENT, array $headers = NULL) { - return _cache_get_object($bin)->set($cid, $data, $expire, $headers); +function cache_set($cid, $data, $bin = 'cache', $expire = CACHE_PERMANENT) { + return _cache_get_object($bin)->set($cid, $data, $expire); } /** @@ -263,10 +261,8 @@ function getMultiple(&$cids); * general cache wipe. * - A Unix timestamp: Indicates that the item should be kept at least until * the given time, after which it behaves like CACHE_TEMPORARY. - * @param $headers - * A string containing HTTP header information for cached pages. */ - function set($cid, $data, $expire = CACHE_PERMANENT, array $headers = NULL); + function set($cid, $data, $expire = CACHE_PERMANENT); /** @@ -312,7 +308,7 @@ function get($cid) { try { // Garbage collection necessary when enforcing a minimum cache lifetime. $this->garbageCollection($this->bin); - $cache = db_query("SELECT data, created, headers, expire, serialized FROM {" . $this->bin . "} WHERE cid = :cid", array(':cid' => $cid))->fetchObject(); + $cache = db_query("SELECT data, created, expire, serialized FROM {" . $this->bin . "} WHERE cid = :cid", array(':cid' => $cid))->fetchObject(); return $this->prepareItem($cache); } catch (Exception $e) { @@ -327,7 +323,7 @@ function getMultiple(&$cids) { // Garbage collection necessary when enforcing a minimum cache lifetime. $this->garbageCollection($this->bin); $query = db_select($this->bin); - $query->fields($this->bin, array('cid', 'data', 'created', 'headers', 'expire', 'serialized')); + $query->fields($this->bin, array('cid', 'data', 'created', 'expire', 'serialized')); $query->condition($this->bin . '.cid', $cids, 'IN'); $result = $query->execute(); $cache = array(); @@ -401,19 +397,15 @@ protected function prepareItem($cache) { if ($cache->serialized) { $cache->data = unserialize($cache->data); } - if (isset($cache->headers)) { - $cache->headers = unserialize($cache->headers); - } return $cache; } - function set($cid, $data, $expire = CACHE_PERMANENT, array $headers = NULL) { + function set($cid, $data, $expire = CACHE_PERMANENT) { $fields = array( 'serialized' => 0, 'created' => REQUEST_TIME, 'expire' => $expire, - 'headers' => isset($headers) ? serialize($headers) : NULL, ); if (!is_string($data)) { $fields['data'] = serialize($data); diff --git a/includes/common.inc b/includes/common.inc index 80edabc031df..0190d189af37 100644 --- a/includes/common.inc +++ b/includes/common.inc @@ -4518,28 +4518,32 @@ function drupal_page_set_cache() { if (drupal_page_is_cacheable()) { $cache = (object) array( 'cid' => $base_root . request_uri(), - 'data' => ob_get_clean(), + 'data' => array( + 'path' => $_GET['q'], + 'body' => ob_get_clean(), + 'title' => drupal_get_title(), + 'headers' => array(), + ), 'expire' => CACHE_TEMPORARY, 'created' => REQUEST_TIME, - 'headers' => array(), ); // Restore preferred header names based on the lower-case names returned // by drupal_get_http_header(). $header_names = _drupal_set_preferred_header_name(); foreach (drupal_get_http_header() as $name_lower => $value) { - $cache->headers[$header_names[$name_lower]] = $value; + $cache->data['headers'][$header_names[$name_lower]] = $value; if ($name_lower == 'expires') { // Use the actual timestamp from an Expires header if available. $cache->expire = strtotime($value); } } - if ($cache->data) { + if ($cache->data['body']) { if (variable_get('page_compression', TRUE) && extension_loaded('zlib')) { - $cache->data = gzencode($cache->data, 9, FORCE_GZIP); + $cache->data['body'] = gzencode($cache->data['body'], 9, FORCE_GZIP); } - cache_set($cache->cid, $cache->data, 'cache_page', $cache->expire, $cache->headers); + cache_set($cache->cid, $cache->data, 'cache_page', $cache->expire); } return $cache; } diff --git a/includes/path.inc b/includes/path.inc index 3984631387f5..daaca116352f 100644 --- a/includes/path.inc +++ b/includes/path.inc @@ -234,96 +234,6 @@ function drupal_get_normal_path($path, $path_language = NULL) { return $path; } -/** - * Return a component of the current Drupal path. - * - * When viewing a page at the path "admin/structure/types", for example, arg(0) - * returns "admin", arg(1) returns "structure", and arg(2) returns "types". - * - * Avoid use of this function where possible, as resulting code is hard to read. - * In menu callback functions, attempt to use named arguments. See the explanation - * in menu.inc for how to construct callbacks that take arguments. When attempting - * to use this function to load an element from the current path, e.g. loading the - * node on a node page, please use menu_get_object() instead. - * - * @param $index - * The index of the component, where each component is separated by a '/' - * (forward-slash), and where the first component has an index of 0 (zero). - * @param $path - * A path to break into components. Defaults to the path of the current page. - * - * @return - * The component specified by $index, or NULL if the specified component was - * not found. - */ -function arg($index = NULL, $path = NULL) { - // Even though $arguments doesn't need to be resettable for any functional - // reasons (the result of explode() does not depend on any run-time - // information), it should be resettable anyway in case a module needs to - // free up the memory used by it. - // Use the advanced drupal_static() pattern, since this is called very often. - static $drupal_static_fast; - if (!isset($drupal_static_fast)) { - $drupal_static_fast['arguments'] = &drupal_static(__FUNCTION__); - } - $arguments = &$drupal_static_fast['arguments']; - - if (!isset($path)) { - $path = $_GET['q']; - } - if (!isset($arguments[$path])) { - $arguments[$path] = explode('/', $path); - } - if (!isset($index)) { - return $arguments[$path]; - } - if (isset($arguments[$path][$index])) { - return $arguments[$path][$index]; - } -} - -/** - * Get the title of the current page, for display on the page and in the title bar. - * - * @return - * The current page's title. - */ -function drupal_get_title() { - $title = drupal_set_title(); - - // During a bootstrap, menu.inc is not included and thus we cannot provide a title. - if (!isset($title) && function_exists('menu_get_active_title')) { - $title = check_plain(menu_get_active_title()); - } - - return $title; -} - -/** - * Set the title of the current page, for display on the page and in the title bar. - * - * @param $title - * Optional string value to assign to the page title; or if set to NULL - * (default), leaves the current title unchanged. - * @param $output - * Optional flag - normally should be left as CHECK_PLAIN. Only set to - * PASS_THROUGH if you have already removed any possibly dangerous code - * from $title using a function like check_plain() or filter_xss(). With this - * flag the string will be passed through unchanged. - * - * @return - * The updated title of the current page. - */ -function drupal_set_title($title = NULL, $output = CHECK_PLAIN) { - $stored_title = &drupal_static(__FUNCTION__); - - if (isset($title)) { - $stored_title = ($output == PASS_THROUGH) ? $title : check_plain($title); - } - - return $stored_title; -} - /** * Check if the current page is the front page. * diff --git a/modules/statistics/statistics.module b/modules/statistics/statistics.module index fce5f6acf97f..983dba45bd7e 100644 --- a/modules/statistics/statistics.module +++ b/modules/statistics/statistics.module @@ -51,7 +51,12 @@ function statistics_help($path, $arg) { function statistics_exit() { global $user; - drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL); + // When serving cached pages with the 'page_cache_without_database' + // configuration, system variables need to be loaded. This is a major + // performance decrease for non-database page caches, but with Statistics + // module, it is likely to also have 'statistics_enable_access_log' enabled, + // in which case we need to bootstrap to the session phase anyway. + drupal_bootstrap(DRUPAL_BOOTSTRAP_VARIABLES); if (variable_get('statistics_count_content_views', 0)) { // We are counting content views. @@ -70,6 +75,7 @@ function statistics_exit() { } } if (variable_get('statistics_enable_access_log', 0)) { + drupal_bootstrap(DRUPAL_BOOTSTRAP_SESSION); // Log this page access. db_insert('accesslog') ->fields(array( diff --git a/modules/statistics/statistics.test b/modules/statistics/statistics.test index 283c2d9da7d8..8922144adcb3 100644 --- a/modules/statistics/statistics.test +++ b/modules/statistics/statistics.test @@ -33,6 +33,69 @@ class StatisticsTestCase extends DrupalWebTestCase { } } +/** + * Tests that logging via statistics_exit() works for cached and uncached pages. + * + * Subclass DrupalWebTestCase rather than StatisticsTestCase, because we want + * to test requests from an anonymous user. + */ +class StatisticsLoggingTestCase extends DrupalWebTestCase { + public static function getInfo() { + return array( + 'name' => 'Statistics logging tests', + 'description' => 'Tests request logging for cached and uncached pages.', + 'group' => 'Statistics' + ); + } + + function setUp() { + parent::setUp('statistics'); + + // Ensure we have a node page to access. + $this->node = $this->drupalCreateNode(); + + // Enable page caching. + variable_set('cache', TRUE); + + // Enable access logging. + variable_set('statistics_enable_access_log', 1); + variable_set('statistics_count_content_views', 1); + + // Clear the logs. + db_truncate('accesslog'); + db_truncate('node_counter'); + } + + /** + * Verifies request logging for cached and uncached pages. + */ + function testLogging() { + $path = 'node/' . $this->node->nid; + $expected = array( + 'title' => $this->node->title, + 'path' => $path, + ); + + // Verify logging of an uncached page. + $this->drupalGet($path); + $this->assertIdentical($this->drupalGetHeader('X-Drupal-Cache'), 'MISS', t('Testing an uncached page.')); + $log = db_query('SELECT * FROM {accesslog}')->fetchAll(PDO::FETCH_ASSOC); + $this->assertTrue(is_array($log) && count($log) == 1, t('Page request was logged.')); + $this->assertEqual(array_intersect_key($log[0], $expected), $expected); + $node_counter = statistics_get($this->node->nid); + $this->assertIdentical($node_counter['totalcount'], '1'); + + // Verify logging of a cached page. + $this->drupalGet($path); + $this->assertIdentical($this->drupalGetHeader('X-Drupal-Cache'), 'HIT', t('Testing a cached page.')); + $log = db_query('SELECT * FROM {accesslog}')->fetchAll(PDO::FETCH_ASSOC); + $this->assertTrue(is_array($log) && count($log) == 2, t('Page request was logged.')); + $this->assertEqual(array_intersect_key($log[1], $expected), $expected); + $node_counter = statistics_get($this->node->nid); + $this->assertIdentical($node_counter['totalcount'], '2'); + } +} + /** * Tests that report pages render properly, and that access logging works. */ diff --git a/modules/system/system.install b/modules/system/system.install index 5f01d45b8bfa..a80e6ceda54b 100644 --- a/modules/system/system.install +++ b/modules/system/system.install @@ -634,11 +634,6 @@ function system_schema() { 'not null' => TRUE, 'default' => 0, ), - 'headers' => array( - 'description' => 'Any custom HTTP headers to be added to cached data.', - 'type' => 'text', - 'not null' => FALSE, - ), 'serialized' => array( 'description' => 'A flag to indicate whether content is serialized (1) or not (0).', 'type' => 'int', @@ -2381,6 +2376,16 @@ function system_update_7053() { ->execute(); } +/** + * Remove {cache_*}.headers columns. + */ +function system_update_7054() { + $cache_tables = array('cache', 'cache_bootstrap', 'cache_filter', 'cache_form', 'cache_menu', 'cache_page', 'cache_path'); + foreach ($cache_tables as $table) { + db_drop_field($table, 'headers'); + } +} + /** * @} End of "defgroup updates-6.x-to-7.x" * The next series of updates should start at 8000. diff --git a/modules/update/update.install b/modules/update/update.install index 00935f190e58..f324ce5703dc 100644 --- a/modules/update/update.install +++ b/modules/update/update.install @@ -167,3 +167,10 @@ function update_update_7000() { $queue = DrupalQueue::get('update_fetch_tasks'); $queue->createQueue(); } + +/** + * Remove {cache_update}.headers columns. + */ +function update_update_7001() { + db_drop_field('cache_update', 'headers'); +} diff --git a/modules/update/update.module b/modules/update/update.module index 02bdabaef45d..1704fdfff11c 100644 --- a/modules/update/update.module +++ b/modules/update/update.module @@ -653,9 +653,6 @@ function theme_update_last_check($variables) { /** * Store data in the private update status cache table. * - * Note: this function completely ignores the {cache_update}.headers field - * since that is meaningless for the kinds of data we're caching. - * * @param $cid * The cache ID to save the data with. * @param $data @@ -671,7 +668,6 @@ function _update_cache_set($cid, $data, $expire) { $fields = array( 'created' => REQUEST_TIME, 'expire' => $expire, - 'headers' => NULL, ); if (!is_string($data)) { $fields['data'] = serialize($data); -- GitLab