tr, s のほうが Unicode Japanese より速い
#! /usr/bin/env perl use utf8; use Benchmark; use Unicode::Japanese qw[unijp]; use Readonly; use Encode; Readonly my %dakuon => map {$_} split //, 'ウヴカガキギクグケゲコゴサザシジスズセゼソゾタダチヂツヅテデトドハバヒビフブヘベホボ'; Readonly my %handakuon => map {$_} split //, 'ハパヒピフプヘペホポ'; Readonly my $dakuon_origin => join q{}, keys %dakuon; Readonly my $handakuon_origin => join q{}, keys %handakuon; Readonly my $keywordlist_encoding => q{utf-8}; my $str = '今日はxxx@gmail.comにポストしておいて!!!'; my %h = ( 'tr' => sub{ my $x = shift || $_; $x =~ tr/ァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲン゛゜/ァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲン゛゜/; $x =~ s/([$dakuon_origin])゛/$dakuon{$1}/g; $x =~ s/([$handakuon_origin])゜/$handakuon{$1}/g; $x =~ tr/ / /; $x =~ tr[\x{01}-\x{FF}][\x{FEE1}-\x{FFDF}]; return $x; }, 'uj' => sub{ return unijp(shift || $_)->h2z->getu }); print encode_utf8 $h{tr}->($str). "\n"; print encode_utf8 $h{uj}->($str), "\n"; $_ = join '', <DATA>; timethese(50000, \%h); $_ = join '', <>; timethese(5, \%h);
今日はxxx@gmail.comにポストしておいて!!!
今日はxxx@gmail.comにポストしておいて!!!
Benchmark: timing 50000 iterations of tr, uj...
tr: 1 wallclock secs ( 0.64 usr + 0.00 sys = 0.64 CPU) @ 78125.00/s (n=50000)
uj: 3 wallclock
secs ( 2.66 usr + 0.02 sys = 2.68 CPU) @ 18656.72/s (n=50000)