From 2c460e73c9d984ef3a01cfae45daf4d60ddf6a4e Mon Sep 17 00:00:00 2001 From: Alex Pott <alex.a.pott@googlemail.com> Date: Fri, 5 Feb 2021 13:24:51 +0000 Subject: [PATCH] Issue #3157004 by thedrupalkid, tstoeckler, Matroskeen, ayushmishra206, quietone, alexpott, mikelutz: MachineName migrate process plugin should make allowed characters regular expression configurable --- .../Plugin/migrate/process/MachineName.php | 48 ++++++++++++-- .../src/Unit/process/MachineNameTest.php | 64 +++++++++++++++---- 2 files changed, 95 insertions(+), 17 deletions(-) diff --git a/core/modules/migrate/src/Plugin/migrate/process/MachineName.php b/core/modules/migrate/src/Plugin/migrate/process/MachineName.php index 5a4b4d50696b..bd3fb33b7279 100644 --- a/core/modules/migrate/src/Plugin/migrate/process/MachineName.php +++ b/core/modules/migrate/src/Plugin/migrate/process/MachineName.php @@ -7,18 +7,30 @@ use Drupal\Core\Plugin\ContainerFactoryPluginInterface; use Drupal\migrate\ProcessPluginBase; use Drupal\migrate\MigrateExecutableInterface; +use Drupal\migrate\MigrateException; use Drupal\migrate\Row; use Symfony\Component\DependencyInjection\ContainerInterface; /** * Creates a machine name. * - * The machine_name process plugin takes the source value and runs it through - * the transliteration service. This makes the source value lowercase, - * replaces anything that is not a number or a letter with an underscore, - * and removes duplicate underscores. + * The machine_name process plugin takes the source value and turns it into a + * machine-readable name via the following four steps: + * 1. Language decorations and accents are removed by transliterating the source + * value. + * 2. The resulting value is made lowercase. + * 3. Any special characters are replaced with an underscore. By default, + * anything that is not a number or a letter is replaced, but additional + * characters can be allowed or further restricted by using the + * replace_pattern configuration as described below. + * 4. Any duplicate underscores either in the source value or as a result of + * replacing special characters are removed. * - * Letters will have language decorations and accents removed. + * Available configuration keys: + * - replace_pattern: (optional) A custom regular expression pattern to + * replace special characters with an underscore using preg_replace(). This + * can be used to allow additional characters in the machine name. + * Defaults to /[^a-z0-9_]+/ * * Example: * @@ -32,6 +44,18 @@ * If the value of foo in the source is 'áéÃ!' then the destination value of bar * will be 'aei_'. * + * @code + * process: + * bar: + * plugin: machine_name + * source: foo + * replace_pattern: '/[^a-z0-9_.]+/' + * @endcode + * + * Here the replace pattern does not match the '.' character (as it is included + * in the list of characters not to match) so if the value of foo in the source + * is 'áéÃ!.jpg' then the destination value of bar will be 'aei_.jpg'. + * * @see \Drupal\migrate\Plugin\MigrateProcessInterface * * @MigrateProcessPlugin( @@ -47,6 +71,13 @@ class MachineName extends ProcessPluginBase implements ContainerFactoryPluginInt */ protected $transliteration; + /** + * The regular expression pattern. + * + * @var string + */ + protected $replacePattern; + /** * Constructs a MachineName plugin. * @@ -62,6 +93,11 @@ class MachineName extends ProcessPluginBase implements ContainerFactoryPluginInt public function __construct(array $configuration, $plugin_id, $plugin_definition, TransliterationInterface $transliteration) { parent::__construct($configuration, $plugin_id, $plugin_definition); $this->transliteration = $transliteration; + + $this->replacePattern = $this->configuration['replace_pattern'] ?? '/[^a-z0-9_]+/'; + if (!is_string($this->replacePattern)) { + throw new MigrateException('The replace pattern should be a string'); + } } /** @@ -82,7 +118,7 @@ public static function create(ContainerInterface $container, array $configuratio public function transform($value, MigrateExecutableInterface $migrate_executable, Row $row, $destination_property) { $new_value = $this->transliteration->transliterate($value, LanguageInterface::LANGCODE_DEFAULT, '_'); $new_value = strtolower($new_value); - $new_value = preg_replace('/[^a-z0-9_]+/', '_', $new_value); + $new_value = preg_replace($this->replacePattern, '_', $new_value); return preg_replace('/_+/', '_', $new_value); } diff --git a/core/modules/migrate/tests/src/Unit/process/MachineNameTest.php b/core/modules/migrate/tests/src/Unit/process/MachineNameTest.php index 15d4573ffd48..6856492af921 100644 --- a/core/modules/migrate/tests/src/Unit/process/MachineNameTest.php +++ b/core/modules/migrate/tests/src/Unit/process/MachineNameTest.php @@ -3,6 +3,7 @@ namespace Drupal\Tests\migrate\Unit\process; use Drupal\migrate\Plugin\migrate\process\MachineName; +use Drupal\migrate\MigrateException; /** * Tests the machine name process plugin. @@ -36,26 +37,67 @@ protected function setUp(): void { /** * Tests machine name transformation of non-alphanumeric characters. + * + * @param string $human_name + * The human-readable name that will be converted in the test. + * @param array $configuration + * The plugin configuration. + * @param string $expected_result + * The expected result of the transformation. + * + * @dataProvider providerTestMachineNames */ - public function testMachineNames() { - - // Tests the following transformations: - // - non-alphanumeric character (including spaces) -> underscore, - // - Uppercase -> lowercase, - // - Multiple consecutive underscore -> single underscore. - $human_name_ascii = 'foo2, the.bar;2*&the%baz!YEE____HaW '; - $human_name = $human_name_ascii . 'áéő'; - $expected_result = 'foo2_the_bar_2_the_baz_yee_haw_aeo'; + public function testMachineNames(string $human_name, array $configuration, string $expected_result): void { // Test for calling transliterate on mock object. $this->transliteration ->expects($this->once()) ->method('transliterate') ->with($human_name) - ->will($this->returnValue($human_name_ascii . 'aeo')); + ->will($this->returnCallback(function (string $string): string { + return str_replace(['á', 'é', 'Å‘'], ['a', 'e', 'o'], $string); + })); - $plugin = new MachineName([], 'machine_name', [], $this->transliteration); + $plugin = new MachineName($configuration, 'machine_name', [], $this->transliteration); $value = $plugin->transform($human_name, $this->migrateExecutable, $this->row, 'destination_property'); $this->assertEquals($expected_result, $value); } + /** + * Provides test cases for MachineNameTest::testMachineNames(). + * + * @return array + * An array of test cases. + */ + public function providerTestMachineNames(): array { + return [ + // Tests the following transformations: + // - non-alphanumeric character (including spaces) -> underscore, + // - Uppercase -> lowercase, + // - Multiple consecutive underscore -> single underscore. + 'default' => [ + 'human_name' => 'foo2, the.bar;2*&the%baz!YEE____HaW áéő', + 'configuration' => [], + 'expected_result' => 'foo2_the_bar_2_the_baz_yee_haw_aeo', + ], + // Tests with a different pattern that allows periods. + 'period_allowed' => [ + 'human_name' => '2*&the%baz!YEE____HaW áéő.jpg', + 'configuration' => [ + 'replace_pattern' => '/[^a-z0-9_.]+/', + ], + 'expected_result' => '2_the_baz_yee_haw_aeo.jpg', + ], + ]; + } + + /** + * Tests that the replacement regular expression is a string. + */ + public function testInvalidConfiguration(): void { + $configuration['replace_pattern'] = 1; + $this->expectException(MigrateException::class); + $this->expectExceptionMessage('The replace pattern should be a string'); + new MachineName($configuration, 'machine_name', [], $this->transliteration); + } + } -- GitLab