Created
November 14, 2014 09:32
-
-
Save nanasess/fefcc42073fd445a2d72 to your computer and use it in GitHub Desktop.
波ダッシュ(U+301C) を PHP で UTF-8 → SJIS-win → UTF-8 とすると、全角チルダ(U+FF5E)になってしまう罠
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$code1 = 'FF5E'; /* 全角チルダ */ | |
$code2 = '301C'; /* 波ダッシュ */ | |
$char1 = h2bin($code1); | |
$char2 = h2bin($code2); | |
var_dump(urlencode($char1)); | |
var_dump(urlencode($char2)); | |
$sjis1 = mb_convert_encoding($char1, 'SJIS-win', 'UTF-8'); | |
$sjis2 = mb_convert_encoding($char2, 'SJIS-win', 'UTF-8'); | |
var_dump(urlencode($sjis1)); | |
var_dump(urlencode($sjis2)); | |
$result1 = mb_convert_encoding($sjis1, 'UTF-8', 'SJIS-win'); | |
$result2 = mb_convert_encoding($sjis2, 'UTF-8', 'SJIS-win'); | |
// 両方とも全角チルダになってしまう | |
var_dump($result1); | |
var_dump($result2); | |
var_dump(urlencode($result1)); | |
var_dump(urlencode($result2)); | |
/* | |
実行結果 | |
$ php --version | |
PHP 5.3.3 (cli) (built: Dec 5 2013 07:09:40) | |
Copyright (c) 1997-2010 The PHP Group | |
Zend Engine v2.3.0, Copyright (c) 1998-2010 Zend Technologies | |
$ php normalize.php | |
string(3) "~" | |
string(3) "~" | |
string(9) "%EF%BD%9E" | |
string(9) "%E3%80%9C" | |
string(6) "%81%60" | |
string(6) "%81%60" | |
string(3) "~" | |
string(3) "~" | |
string(9) "%EF%BD%9E" | |
string(9) "%EF%BD%9E" | |
*/ | |
function h2bin($code) { | |
$bin = pack('H*', (str_repeat('0', 8 - strlen($code)) . $code)); | |
$char = mb_convert_encoding($bin, 'UTF-8', 'UTF-32BE'); | |
var_dump($char); | |
return $char; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
参考 MS932 と Shift_JIS について
http://www.ne.jp/asahi/yuan-jiu/home/