From 82a891f6e0ed2e88aad4f52c7fa1726e063db1b2 Mon Sep 17 00:00:00 2001 From: Seb35 Date: Sat, 29 Sep 2018 14:05:00 +0200 Subject: [PATCH] Adapt for multibyte strings as UTF-8 There is no native function mb_str_split, hence writing a loop. --- src/MatchesSolver.php | 2 +- src/Solver.php | 12 +++++++++--- test/suite/MatchesArrayProvidersTrait.php | 23 +++++++++++++++++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/MatchesSolver.php b/src/MatchesSolver.php index 5079be1..d76c259 100644 --- a/src/MatchesSolver.php +++ b/src/MatchesSolver.php @@ -46,7 +46,7 @@ protected function result( array $matrix ) { return array_map(function (Match $result) use ($stringA) { - $result->value = substr($stringA, $result->index(), $result->length); + $result->value = mb_substr($stringA, $result->index(), $result->length); return $result; }, $longestIndexes); diff --git a/src/Solver.php b/src/Solver.php index 74ee735..665ec15 100644 --- a/src/Solver.php +++ b/src/Solver.php @@ -37,7 +37,7 @@ protected function result( string $stringB, array $matrix ) { - return count($longestIndexes) === 0 ? '' : substr($stringA, $longestIndexes[0], $longestLength); + return count($longestIndexes) === 0 ? '' : mb_substr($stringA, $longestIndexes[0], $longestLength); } /** @@ -55,8 +55,14 @@ public function solve(string $stringA, string $stringB) return call_user_func_array([$this, 'solve'], $arguments); } - $charsA = str_split($stringA); - $charsB = str_split($stringB); + $charsA = []; + $charsB = []; + for ($i=0; $i < max(mb_strlen($stringA), 1); $i++) { + $charsA[] = mb_substr($stringA, $i, 1); + } + for ($i=0; $i < max(mb_strlen($stringB), 1); $i++) { + $charsB[] = mb_substr($stringB, $i, 1); + } $matrix = array_fill_keys(array_keys($charsA), array_fill_keys(array_keys($charsB), 0)); $longestLength = 0; diff --git a/test/suite/MatchesArrayProvidersTrait.php b/test/suite/MatchesArrayProvidersTrait.php index 0e715c3..33986bd 100644 --- a/test/suite/MatchesArrayProvidersTrait.php +++ b/test/suite/MatchesArrayProvidersTrait.php @@ -55,6 +55,29 @@ public function twoStringsOrderedMatchesArrayProvider() ], ], ], + 'UTF-8' => [ + 'L’été était chaud.', + 'L’hiver était froid.', + [ + [ + 'value' => ' était ', + 'length' => 7, + 'indexes' => [5, 7], + ], + ], + ], + // In UTF-8: é = 0xC3A9 and © = 0xC2A9 (the last byte is the same but the Unicode characters are different) + 'UTF-8 (nasty)' => [ + 'L’été était chaud.', + 'L’hiver ©tait froid.', + [ + [ + 'value' => 'tait ', + 'length' => 5, + 'indexes' => [7, 9], + ], + ], + ], ]; } }