From 94b5d57502c45cef2954de9885075d88b6ca56cf Mon Sep 17 00:00:00 2001
From: Lauri Eskola <lauri.eskola@acquia.com>
Date: Wed, 1 Jun 2022 12:27:12 +0300
Subject: [PATCH] Issue #3274648 by nod_, Wim Leers: HTMLRestrictions::merge()
 and ::toGeneralHtmlSupportConfig() fail on allowed attribute values that can
 be interpreted as integers

---
 .../ckeditor5/src/HTMLRestrictions.php        | 164 +++++++-----------
 .../tests/src/Unit/HTMLRestrictionsTest.php   |  57 ++++++
 2 files changed, 123 insertions(+), 98 deletions(-)

diff --git a/core/modules/ckeditor5/src/HTMLRestrictions.php b/core/modules/ckeditor5/src/HTMLRestrictions.php
index 675f16e73bb7..35010a9a6c78 100644
--- a/core/modules/ckeditor5/src/HTMLRestrictions.php
+++ b/core/modules/ckeditor5/src/HTMLRestrictions.php
@@ -684,6 +684,67 @@ public function doIntersect(HTMLRestrictions $other): HTMLRestrictions {
     return new self($intersection);
   }
 
+  /**
+   * Merge arrays of allowed elements according to HTMLRestrictions rules.
+   *
+   * @param array $array1
+   *   The first array of allowed elements.
+   * @param array $array2
+   *   The second array of allowed elements.
+   *
+   * @return array
+   *   Merged array of allowed elements.
+   */
+  private static function mergeAllowedElementsLevel(array $array1, array $array2): array {
+    $union = [];
+    $array1_keys = array_keys($array1);
+    $array2_keys = array_keys($array2);
+    $common_keys = array_intersect($array1_keys, $array2_keys);
+    if (count($common_keys) === 0) {
+      // There are no keys in common, simply append the arrays.
+      $union = $array1 + $array2;
+    }
+    else {
+      // For all the distinct keys, append them to the result.
+      $filter_keys = array_flip($common_keys);
+      // Add all unique keys from $array1.
+      $union += array_diff_key($array1, $filter_keys);
+      // Add all unique keys from $array2.
+      $union += array_diff_key($array2, $filter_keys);
+
+      // There are some keys in common that need to be merged.
+      foreach ($common_keys as $key) {
+        $value1 = $array1[$key];
+        $value2 = $array2[$key];
+        $value1_is_bool = is_bool($value1);
+        $value2_is_bool = is_bool($value2);
+
+        // When both values are boolean, combine the two.
+        if ($value1_is_bool && $value2_is_bool) {
+          $union[$key] = $value1 || $value2;
+        }
+        // When only one value is a boolean, take the most permissive result:
+        // - when the value it TRUE, keep TRUE as it is the most permissive
+        // - when the value is FALSE, take the other value.
+        elseif ($value1_is_bool) {
+          $union[$key] = $value1 ?: $value2;
+        }
+        elseif ($value2_is_bool) {
+          $union[$key] = $value2 ?: $value1;
+        }
+        // Process nested arrays, in this case it correspond to tag attributes
+        // configuration.
+        elseif (is_array($value1) && is_array($value2)) {
+          $union[$key] = self::mergeAllowedElementsLevel($value1, $value2);
+        }
+      }
+    }
+    // Make sure the order of the union array matches the order of the keys in
+    // the arrays provided.
+    $keys_order = array_merge($array1_keys, $array2_keys);
+    return array_merge(array_flip($keys_order), $union);
+  }
+
   /**
    * Computes set union of two HTML restrictions, with wildcard support.
    *
@@ -695,103 +756,7 @@ public function doIntersect(HTMLRestrictions $other): HTMLRestrictions {
    *   are either allowed in $this or in $other.
    */
   public function merge(HTMLRestrictions $other): HTMLRestrictions {
-    $union = array_merge_recursive($this->elements, $other->elements);
-    // When recursively merging elements arrays, unkeyed boolean values can
-    // appear in attribute config arrays. This removes them.
-    foreach ($union as $tag => $tag_config) {
-      if (is_array($tag_config)) {
-        // If the HTML tag restrictions for both operands were both booleans,
-        // then the result of array_merge_recursive() is an array containing two
-        // booleans (because it is designed for arrays, not for also merging
-        // booleans) under the first two numeric keys: 0 and 1. This does not
-        // match the structure expected of HTML restrictions. Combine the two
-        // booleans.
-        if (array_key_exists(0, $tag_config) && array_key_exists(1, $tag_config) && is_bool($tag_config[0]) && is_bool($tag_config[1])) {
-          // Twice FALSE.
-          if ($tag_config === [FALSE, FALSE]) {
-            $union[$tag] = FALSE;
-          }
-          // Once or twice TRUE.
-          else {
-            $union[$tag] = TRUE;
-          }
-          continue;
-        }
-
-        // If the HTML tag restrictions for only one of the two operands was a
-        // boolean, then the result of array_merge_recursive() is an array
-        // containing the complete contents of the non-boolean operand plus an
-        // additional key-value pair with the first numeric key: 0.
-        if (array_key_exists(0, $tag_config)) {
-          // If the boolean was FALSE (meaning: "no attributes allowed"), then
-          // the other operand's values should be used in an union: this yields
-          // the most permissive result.
-          if ($tag_config[0] === FALSE) {
-            unset($union[$tag][0]);
-          }
-          // If the boolean was TRUE (meaning: "all attributes allowed"), then
-          // the other operand's values should be ignored in an union: this
-          // yields the most permissive result.
-          elseif ($tag_config[0] === TRUE) {
-            $union[$tag] = TRUE;
-          }
-          continue;
-        }
-
-        // If the HTML tag restrictions are arrays for both operands, similar
-        // logic needs to be applied to the attribute-level restrictions.
-        foreach ($tag_config as $html_tag_attribute_name => $html_tag_attribute_restrictions) {
-          if (is_bool($html_tag_attribute_restrictions)) {
-            continue;
-          }
-
-          if (array_key_exists(0, $html_tag_attribute_restrictions)) {
-            // Special case: the global attribute `*` HTML tag.
-            // @see https://html.spec.whatwg.org/multipage/dom.html#global-attributes
-            // @see validateAllowedRestrictionsPhase2()
-            // @see validateAllowedRestrictionsPhase4()
-            if ($tag === '*') {
-              assert(is_bool($html_tag_attribute_restrictions[0]) || is_bool($html_tag_attribute_restrictions[1]));
-              // When both are boolean, pick the most permissive value.
-              if (is_bool($html_tag_attribute_restrictions[0]) && isset($html_tag_attribute_restrictions[1]) && is_bool($html_tag_attribute_restrictions[1])) {
-                $value = $html_tag_attribute_restrictions[0] || $html_tag_attribute_restrictions[1];
-              }
-              else {
-                $value = is_bool($html_tag_attribute_restrictions[0])
-                  ? $html_tag_attribute_restrictions[0]
-                  : $html_tag_attribute_restrictions[1];
-              }
-              $union[$tag][$html_tag_attribute_name] = $value;
-              continue;
-            }
-
-            // The "twice FALSE" case cannot occur for attributes, because
-            // attribute restrictions either have "TRUE" (to indicate any value
-            // is allowed for the attribute) or a list of allowed attribute
-            // values. If there is a numeric key, then one of the two operands
-            // must allow all attribute values (the "TRUE" case). Otherwise, an
-            // array merge would have happened, and no numeric key would exist.
-            // Therefore, this is always once or twice TRUE.
-            // e.g.: <foo bar> and <foo bar>, or <foo bar> and <foo bar="baz">
-            assert($html_tag_attribute_restrictions[0] === TRUE || $html_tag_attribute_restrictions[1] === TRUE);
-            $union[$tag][$html_tag_attribute_name] = TRUE;
-          }
-          else {
-            // Finally, when both operands list the same allowed attribute
-            // values, then the result provided by array_merge_recursive() for
-            // those allowed attribute values is an array containing two times
-            // `TRUE` (because it is designed for arrays, not for also merging
-            // booleans) under the first two numeric keys: 0 and 1.
-            // e.g.: <foo bar="baz qux"> merged with <foo bar="baz quux">.
-            foreach ($html_tag_attribute_restrictions as $allowed_attribute_value => $merged_result) {
-              if ($merged_result === [0 => TRUE, 1 => TRUE]) {
-                $union[$tag][$html_tag_attribute_name][$allowed_attribute_value] = TRUE;
-              }
-            }
-          }
-        }
-      }
-    }
+    $union = self::mergeAllowedElementsLevel($this->elements, $other->elements);
 
     // Special case: wildcard attributes, and the ability to define restrictions
     // for all concrete attributes matching them using:
@@ -1100,7 +1065,10 @@ public function toGeneralHtmlSupportConfig(): array {
           // that this class expects to the `['en', 'fr']` structure that the
           // GHS functionality in CKEditor 5 expects.
           if (is_array($value)) {
-            $value = array_keys($value);
+            // Ensure that all values are strings, this is necessary since PHP
+            // transforms the "1" string into 1 the number when it is used as
+            // an array key.
+            $value = array_map('strval', array_keys($value));
           }
           // Drupal never allows style attributes due to security concerns.
           // @see \Drupal\Component\Utility\Xss
diff --git a/core/modules/ckeditor5/tests/src/Unit/HTMLRestrictionsTest.php b/core/modules/ckeditor5/tests/src/Unit/HTMLRestrictionsTest.php
index f047f271fae6..319b3e8b4d40 100644
--- a/core/modules/ckeditor5/tests/src/Unit/HTMLRestrictionsTest.php
+++ b/core/modules/ckeditor5/tests/src/Unit/HTMLRestrictionsTest.php
@@ -294,6 +294,14 @@ public function providerConvenienceConstructors(): \Generator {
       '<a target class>',
       ['a' => ['target' => TRUE, 'class' => TRUE]],
     ];
+    yield 'tag with allowed attribute value that happen to be numbers' => [
+      '<ol type="1 A I">',
+      ['ol' => ['type' => [1 => TRUE, 'A' => TRUE, 'I' => TRUE]]],
+    ];
+    yield 'tag with allowed attribute value that happen to be numbers (reversed)' => [
+      '<ol type="I A 1">',
+      ['ol' => ['type' => ['I' => TRUE, 'A' => TRUE, 1 => TRUE]]],
+    ];
 
     // Multiple tag cases.
     yield 'two tags' => [
@@ -682,6 +690,27 @@ public function providerRepresentations(): \Generator {
         ],
       ],
     ];
+
+    yield '<ol type="1 A">' => [
+      new HTMLRestrictions(['ol' => ['type' => ['1' => TRUE, 'A' => TRUE]]]),
+      ['<ol type="1 A">'],
+      '<ol type="1 A">',
+      [
+        [
+          'name' => 'ol',
+          'attributes' => [
+            [
+              'key' => 'type',
+              'value' => [
+                'regexp' => [
+                  'pattern' => '/^(1|A)$/',
+                ],
+              ],
+            ],
+          ],
+        ],
+      ],
+    ];
   }
 
   /**
@@ -906,6 +935,34 @@ public function providerOperands(): \Generator {
       'intersection' => 'a',
       'union' => 'b',
     ];
+    yield 'attribute restrictions are different: <ol type=*> vs <ol type="A">' => [
+      'a' => new HTMLRestrictions(['ol' => ['type' => TRUE]]),
+      'b' => new HTMLRestrictions(['ol' => ['type' => ['A' => TRUE]]]),
+      'diff' => 'a',
+      'intersection' => 'b',
+      'union' => 'a',
+    ];
+    yield 'attribute restrictions are different: <ol type=*> vs <ol type="A"> — vice versa' => [
+      'b' => new HTMLRestrictions(['ol' => ['type' => ['A' => TRUE]]]),
+      'a' => new HTMLRestrictions(['ol' => ['type' => TRUE]]),
+      'diff' => HTMLRestrictions::emptySet(),
+      'intersection' => 'a',
+      'union' => 'b',
+    ];
+    yield 'attribute restrictions are different: <ol type=*> vs <ol type="1">' => [
+      'a' => new HTMLRestrictions(['ol' => ['type' => TRUE]]),
+      'b' => new HTMLRestrictions(['ol' => ['type' => ['1' => TRUE]]]),
+      'diff' => 'a',
+      'intersection' => 'b',
+      'union' => 'a',
+    ];
+    yield 'attribute restrictions are different: <ol type=*> vs <ol type="1"> — vice versa' => [
+      'b' => new HTMLRestrictions(['ol' => ['type' => ['1' => TRUE]]]),
+      'a' => new HTMLRestrictions(['ol' => ['type' => TRUE]]),
+      'diff' => HTMLRestrictions::emptySet(),
+      'intersection' => 'a',
+      'union' => 'b',
+    ];
 
     // Complex cases.
     yield 'attribute restrictions are different: <a hreflang="en"> vs <strong>' => [
-- 
GitLab