|
php.net | support | documentation | report a bug | advanced search | search howto | statistics | random bug | login |
PatchesPull RequestsHistoryAllCommentsChangesGit/SVN commits
[2008-10-27 14:10 UTC] jani@php.net
[2008-10-29 10:05 UTC] derick@php.net
[2008-10-29 10:11 UTC] pajoye@php.net
[2008-11-30 23:05 UTC] pajoye@php.net
[2008-12-01 10:54 UTC] gehrig at ishd dot de
[2008-12-01 11:08 UTC] pajoye@php.net
[2008-12-08 09:48 UTC] gehrig at ishd dot de
|
|||||||||||||||||||||||||||||||||||||
Copyright © 2001-2025 The PHP GroupAll rights reserved. |
Last updated: Mon Nov 03 06:00:01 2025 UTC |
Description: ------------ The strcoll() function for sorting comparing strings in a locale-aware manner does not seem to work with UTF-8 encoded strings despite using the correct Windows locale with UTF-8 codepage (65001). strcoll() always returns 2147483647 which makes array sorting of such strings more or less random (for example). Running the same snippet with Windows-1252 (ISO-8859-1) encoded strings or on a Linux machine does in fact work as expected. Please note: for running the following reproduce code, the PHP file must be UTF-8 encoded! Reproduce code: --------------- <?php function traceStrColl($a, $b) { $outValue=strcoll($a, $b); echo "$a $b $outValue\r\n"; return $outValue; } $locale=(defined('PHP_OS') && stristr(PHP_OS, 'win')) ? 'German_Germany.65001' : 'de_DE.utf8'; $string="ABCDEFGHIJKLMNOPQRSTUVWXYZ???abcdefghijklmnopqrstuvwxyz????"; $array=array(); for ($i=0; $i<mb_strlen($string, 'UTF-8'); $i++) { $array[]=mb_substr($string, $i, 1, 'UTF-8'); } $oldLocale=setlocale(LC_COLLATE, "0"); var_dump(setlocale(LC_COLLATE, $locale)); usort($array, 'traceStrColl'); setlocale(LC_COLLATE, $oldLocale); var_dump($array); Expected result: ---------------- string(20) "German_Germany.65001" a B -1 [...] array(59) { [0]=> string(1) "a" [1]=> string(1) "A" [2]=> string(2) "?" [3]=> string(2) "?" [4]=> string(1) "b" [5]=> string(1) "B" [6]=> string(1) "c" [7]=> string(1) "C" [8]=> string(1) "d" [9]=> string(1) "D" [10]=> string(1) "e" [11]=> string(1) "E" [12]=> string(1) "f" [13]=> string(1) "F" [14]=> string(1) "g" [15]=> string(1) "G" [16]=> string(1) "h" [17]=> string(1) "H" [18]=> string(1) "i" [19]=> string(1) "I" [20]=> string(1) "j" [21]=> string(1) "J" [22]=> string(1) "k" [23]=> string(1) "K" [24]=> string(1) "l" [25]=> string(1) "L" [26]=> string(1) "m" [27]=> string(1) "M" [28]=> string(1) "n" [29]=> string(1) "N" [30]=> string(1) "o" [31]=> string(1) "O" [32]=> string(2) "?" [33]=> string(2) "?" [34]=> string(1) "p" [35]=> string(1) "P" [36]=> string(1) "q" [37]=> string(1) "Q" [38]=> string(1) "r" [39]=> string(1) "R" [40]=> string(1) "s" [41]=> string(1) "S" [42]=> string(2) "?" [43]=> string(1) "t" [44]=> string(1) "T" [45]=> string(1) "u" [46]=> string(1) "U" [47]=> string(2) "?" [48]=> string(2) "?" [49]=> string(1) "v" [50]=> string(1) "V" [51]=> string(1) "w" [52]=> string(1) "W" [53]=> string(1) "x" [54]=> string(1) "X" [55]=> string(1) "y" [56]=> string(1) "Y" [57]=> string(1) "z" [58]=> string(1) "Z" } Actual result: -------------- string(20) "German_Germany.65001" a B 2147483647 [...] array(59) { [0]=> string(1) "c" [1]=> string(1) "B" [2]=> string(1) "s" [3]=> string(1) "C" [4]=> string(1) "k" [5]=> string(1) "D" [6]=> string(2) "?" [7]=> string(1) "E" [8]=> string(1) "g" [9]=> string(1) "F" [10]=> string(1) "o" [11]=> string(1) "G" [12]=> string(1) "w" [13]=> string(1) "H" [14]=> string(1) "A" [15]=> string(1) "I" [16]=> string(1) "e" [17]=> string(1) "J" [18]=> string(1) "i" [19]=> string(1) "K" [20]=> string(1) "m" [21]=> string(1) "L" [22]=> string(1) "q" [23]=> string(1) "M" [24]=> string(1) "u" [25]=> string(1) "N" [26]=> string(1) "y" [27]=> string(1) "O" [28]=> string(2) "?" [29]=> string(1) "P" [30]=> string(1) "b" [31]=> string(1) "Q" [32]=> string(1) "d" [33]=> string(1) "R" [34]=> string(1) "f" [35]=> string(1) "S" [36]=> string(1) "h" [37]=> string(1) "T" [38]=> string(1) "j" [39]=> string(1) "U" [40]=> string(1) "l" [41]=> string(1) "V" [42]=> string(1) "n" [43]=> string(1) "W" [44]=> string(1) "p" [45]=> string(1) "X" [46]=> string(1) "r" [47]=> string(1) "Y" [48]=> string(1) "t" [49]=> string(1) "Z" [50]=> string(1) "v" [51]=> string(2) "?" [52]=> string(1) "x" [53]=> string(2) "?" [54]=> string(1) "z" [55]=> string(2) "?" [56]=> string(2) "?" [57]=> string(1) "a" [58]=> string(2) "?" }