[Kansai-pm] http://www.pure.ne.jp/~learner/program/Perl_unicode.html [Was: Re: [Kans

Dan Kogai dankogai at dan.co.jp
Sat Oct 30 09:35:52 CDT 2004


$BCF$G$9!#Bg:e9T$-$?$+$C$?$J$"(B....

On Oct 28, 2004, at 09:42, Yasushi Nakajima wrote:
> Ktat> > $B!&(BEncode$BJY6/2q(B
> Ktat>
> Ktat> $B0JA0$K!"<+J,$G$^$H$a$?J,$O2<5-$K$"$j$^$9$,!"4V0c$C$F$k$+$b!#(B
> Ktat> http://www.pure.ne.jp/~learner/program/Perl_unicode.html
> Ktat> $B;H$$$3$J$;$PLLGr$=$&$J$s$G$9$,!"$J$+$J$+LLE]$G$9$M(B...$B!#(B
>
> $B!!2CF#$5$s$N$^$H$a$i$l$?$b$N$O$9$4$/$o$+$j$d$9$$$G$9!#$3$&$$$&$d$d$3$7$$(B
> $B$b$N$O!"$d$O$j at bL@$rJ9$$$?$@$1$G$O%@%a$G!"<+J,$G;n$5$J$$$HFq$7$$$h$&$K;W(B
> $B$$$^$9!#1i=,J}<0$N%_!<%F%#%s%0$r$d$k$H$h$$$+$b$7$l$^$;$s!#(B

$B$[$s$H!"$o$+$j$d$9$$$G$9$h$M!#$?$@!"0lIt8m2r$b$"$k$h$&$J$N$G!"%a%s%F%JK\?M$K$h$k$D$C$3$_$r!#(B

$B!t$b$&$9$0(B5.8.6$B$,=P$^$9$N$G$h$m$7$/$*4j$$$7$^$9!#(B

Dan the (J|En)code Maintainer

====
> Perl 5.8.x Unicode$B4XO"(B
> [$BCfN,(B]
> UTF8$B%U%i%0(B?

$B$3$N9`$O4pK\E*$K(BOK$B!#(B

> UTF8$B%U%i%0$N$D$$$?J8;zNs$r5-=R$9$k(B
>
> UTF8$B%U%i%0$,$D$$$?J8;zNs$H$$$&$N$r!"%=!<%9%3!<%I$K5-=R(B($B$A$g$C$H0c$&$+$b(B)$B$9$k$3$H$b=PMh$^$9!#(B
>
>  use charnames qw(:full) $B$7$F!"J8;z$NL>A0$+$iJ8;z$r8F$S=P$9!#(B
>  \x{...} $B$r;HMQ$9$k!#(B
>  chr() $B$r;HMQ$9$k!#(B
>  pack("U", ...) $B$r;HMQ$9$k!#(B

$B$b$&0l$DBg;v$JNc!"$9$J$o$A%=!<%9$=$N$b$N$r(Butf8$B$G=q$$$?>e$G!"(B"use 
utf8"$B$9$kNc$bM_$7$$!"$H$$$&$N$+:G=i$K>R2p$7$FM_$7$$$G$9!#0J2<!"Nc!#(B

use utf8;
use Encode;
my $alpha = "$B&A(B";
binmode STDOUT => ":utf8";
printf "$alpha (UTF-8:%d)\n", utf8::is_utf8($alpha);
{
     no utf8;
     my $alpha = "$B&A(B";
     binmode STDOUT => ":raw";
     printf "$alpha (UTF-8:%d)\n", utf8::is_utf8($alpha);
}

$B8+$F$N$H$*$j!"(Butf8 pragma$B$,(Bscoped$B$G!"(Bno utf8 $B$,;H$($kE@$b=q$$$F$[$7$$$G$9!#(B

> Wide character in print ...
>
> [$BCfN,(B]
>  $B$@$1$I!"(BWide character $B1>!9$H8@$o$l$k$N$O!"$?$$$F$$!"(BUTF8$B%U%i%0$N$D$$$?$b$N$G$9!#(B($BFI$s$G$o$+$kDL$j!"(B 
> $B$$$^$$$A!"$o$+$C$F$$$^$;$s(B($B$)(B)

$B$3$l$O87L)$JDj5A$,$"$j$^$9!#(B

* UTF-8 flag $B$,$?$C$F$$$k$3$H(B
* chr($s) >= 0x100 $B$G$"$k$3$H(B

$B$3$NFs$D$,K~$?$5$l$?>l9g$K$N$_!"(BWide character warning $B$,=P$^$9!#(B

>  $B$H$b$+$/!"(BWide character $B$JJ8;zNs$O$=$N$^$^=PNO$9$k$HJ86g$r8@$o$l$k$H$$$&$o$1$G$9!#(B
>  $B2sHr$9$k$K$O!"(BPerlIO$B%l%$%d$r;H$&$+!"(BUTF8$B%U%i%0$rMn$H$7$^$9!#(B

$B$3$l$O(BOK$B!#(B

> Encode

$B$3$N9`$b$[$\(BOK$B!#(B

> utf8::* $B4X?t(B
>
> $B:G=i$NJ}$G$b$+$-$^$7$?$,!"(Butf8::* $B$J4X?t$O!"(Buse $B$7$J$/$F$b;H$($^$9!#(Buse $B$9$k$H!"JL$N0UL#$K$J$C$F$7$^$&$N$G!"Cm0U$7$F$/$@$5$$!#(B

$B$3$N9`$b(BOK$B$G$9$,!"A0=R$N(Bpragma$B$H$7$F$N(Butf8$B$N;H$$J}$b at bL@$,M_$7$$!#(B

> use utf8;

$B$3$3A09`$HF1$8!#(B

> use encoding;

$B$3$N9`$b(BOK$B!#$H$F$b$h$/=q$1$F$$$k$H;W$$$^$9!#(B

> Jcode $B$+$i(B Encode$B$K(B
>
> Jcode$B$N!"0lHLE*$J;H$o$lJ}$r!"(BEncode $B$G$I$&$9$k$+!#(B
>
>  $BJ8;z%3!<%IITL@$N30It%U%!%$%k$rFI$_!"(Beuc-jp $B$KJQ99(B
> use Encode qw(from_to);
> use Encode::Guess qw/euc-jp shift-jis utf8/;;
>
> open IN, 'FILE';
> while(<IN>){
>   chomp;
>   from_to($_, 'Guess', 'euc-jp');
>   push(@row, $_);
> }
> close IN;

$B$b$&>/$7CzG+$JJ}K!$r>R2p$7$F$b$i$k$H$&$l$7$$$G$9!#$^$?!"(BGuess$B$9$k$?$a$K$OD9$a$NJ8;zNs$,I,MW$J$N$G(B
$B!"A4%U%!%$%k$rFI$s$G$+$i=hM}$9$k$N$,5H$G$9!#0J2<!"%3!<%I!#(B

use Encode qw(from_to);
use Encode::Guess;
open IN, 'FILE' or die "FILE:$!";
my $content = join '', <IN> # $B$$$C$-FI$_(B
close IN;
my $enc = guess_encoding($content, qw/euc-jp shiftjis 7-bit jis/); # 
utf8 $B$O>o$K(Bguess$B$NBP>](B
ref $enc or die $enc;
$euc_jp = encode("euc-jp", $enc->decode($content));
__END__

>  $BB>$N%b%8%e!<%k7PM3$G<u$1<h$C$?FbMF$G!"(BUTF8$B%U%i%0IU$-$+$I$&$+ITL@$J$N$r(Beuc-jp $B$KJQ99(B
> use Encode qw(from_to);
> use Encode::Guess;
>
> my $contents = shift;

$B$3$l$@$H(B shift @ARGV $B$HF1$8$J$N$G!"B>$N%b%8%e!<%k7PM3$K$O$J$j$^$;$s!#$"$H!"(BGuess$B$O(BUTF-8 
flag$B$r$-$A$s$HH=Dj$9$k$N$G>r7oJ,4t$OITMW$G$9!#(B

>  $BH>3Q%+%J$X$NJQ49(B

$B$=$s$JFq$7$/$J$$$G$9$h!#(BUTF-8 string($B%U%i%0IU$-(B)$B$K$7$F$+$i(Btr///$B$7$A$c$($P$$$$$N$G$9!#(B

$string = decode(...);
$string =~ tr/\x{30a1}-\x{30f3}/\x{ff67}-\x{ff9d}/; # tr/$B%!(B-$B%s(B/(I'(B-(I](B/

__END__



More information about the Kansai-pm mailing list