[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 # 2 # $Id: GSM0338.pm,v 2.0 2007/04/22 14:54:22 dankogai Exp $ 3 # 4 package Encode::GSM0338; 5 6 use strict; 7 use warnings; 8 use Carp; 9 10 use vars qw($VERSION); 11 $VERSION = do { my @r = ( q$Revision: 2.0 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; 12 13 use Encode qw(:fallbacks); 14 15 use base qw(Encode::Encoding); 16 __PACKAGE__->Define('gsm0338'); 17 18 sub needs_lines { 1 } 19 sub perlio_ok { 0 } 20 21 use utf8; 22 our %UNI2GSM = ( 23 "\x{0040}" => "\x00", # COMMERCIAL AT 24 "\x{000A}" => "\x0A", # LINE FEED 25 "\x{000C}" => "\x1B\x0A", # FORM FEED 26 "\x{000D}" => "\x0D", # CARRIAGE RETURN 27 "\x{0020}" => "\x20", # SPACE 28 "\x{0021}" => "\x21", # EXCLAMATION MARK 29 "\x{0022}" => "\x22", # QUOTATION MARK 30 "\x{0023}" => "\x23", # NUMBER SIGN 31 "\x{0024}" => "\x02", # DOLLAR SIGN 32 "\x{0025}" => "\x25", # PERCENT SIGN 33 "\x{0026}" => "\x26", # AMPERSAND 34 "\x{0027}" => "\x27", # APOSTROPHE 35 "\x{0028}" => "\x28", # LEFT PARENTHESIS 36 "\x{0029}" => "\x29", # RIGHT PARENTHESIS 37 "\x{002A}" => "\x2A", # ASTERISK 38 "\x{002B}" => "\x2B", # PLUS SIGN 39 "\x{002C}" => "\x2C", # COMMA 40 "\x{002D}" => "\x2D", # HYPHEN-MINUS 41 "\x{002E}" => "\x2E", # FULL STOP 42 "\x{002F}" => "\x2F", # SOLIDUS 43 "\x{0030}" => "\x30", # DIGIT ZERO 44 "\x{0031}" => "\x31", # DIGIT ONE 45 "\x{0032}" => "\x32", # DIGIT TWO 46 "\x{0033}" => "\x33", # DIGIT THREE 47 "\x{0034}" => "\x34", # DIGIT FOUR 48 "\x{0035}" => "\x35", # DIGIT FIVE 49 "\x{0036}" => "\x36", # DIGIT SIX 50 "\x{0037}" => "\x37", # DIGIT SEVEN 51 "\x{0038}" => "\x38", # DIGIT EIGHT 52 "\x{0039}" => "\x39", # DIGIT NINE 53 "\x{003A}" => "\x3A", # COLON 54 "\x{003B}" => "\x3B", # SEMICOLON 55 "\x{003C}" => "\x3C", # LESS-THAN SIGN 56 "\x{003D}" => "\x3D", # EQUALS SIGN 57 "\x{003E}" => "\x3E", # GREATER-THAN SIGN 58 "\x{003F}" => "\x3F", # QUESTION MARK 59 "\x{0041}" => "\x41", # LATIN CAPITAL LETTER A 60 "\x{0042}" => "\x42", # LATIN CAPITAL LETTER B 61 "\x{0043}" => "\x43", # LATIN CAPITAL LETTER C 62 "\x{0044}" => "\x44", # LATIN CAPITAL LETTER D 63 "\x{0045}" => "\x45", # LATIN CAPITAL LETTER E 64 "\x{0046}" => "\x46", # LATIN CAPITAL LETTER F 65 "\x{0047}" => "\x47", # LATIN CAPITAL LETTER G 66 "\x{0048}" => "\x48", # LATIN CAPITAL LETTER H 67 "\x{0049}" => "\x49", # LATIN CAPITAL LETTER I 68 "\x{004A}" => "\x4A", # LATIN CAPITAL LETTER J 69 "\x{004B}" => "\x4B", # LATIN CAPITAL LETTER K 70 "\x{004C}" => "\x4C", # LATIN CAPITAL LETTER L 71 "\x{004D}" => "\x4D", # LATIN CAPITAL LETTER M 72 "\x{004E}" => "\x4E", # LATIN CAPITAL LETTER N 73 "\x{004F}" => "\x4F", # LATIN CAPITAL LETTER O 74 "\x{0050}" => "\x50", # LATIN CAPITAL LETTER P 75 "\x{0051}" => "\x51", # LATIN CAPITAL LETTER Q 76 "\x{0052}" => "\x52", # LATIN CAPITAL LETTER R 77 "\x{0053}" => "\x53", # LATIN CAPITAL LETTER S 78 "\x{0054}" => "\x54", # LATIN CAPITAL LETTER T 79 "\x{0055}" => "\x55", # LATIN CAPITAL LETTER U 80 "\x{0056}" => "\x56", # LATIN CAPITAL LETTER V 81 "\x{0057}" => "\x57", # LATIN CAPITAL LETTER W 82 "\x{0058}" => "\x58", # LATIN CAPITAL LETTER X 83 "\x{0059}" => "\x59", # LATIN CAPITAL LETTER Y 84 "\x{005A}" => "\x5A", # LATIN CAPITAL LETTER Z 85 "\x{005F}" => "\x11", # LOW LINE 86 "\x{0061}" => "\x61", # LATIN SMALL LETTER A 87 "\x{0062}" => "\x62", # LATIN SMALL LETTER B 88 "\x{0063}" => "\x63", # LATIN SMALL LETTER C 89 "\x{0064}" => "\x64", # LATIN SMALL LETTER D 90 "\x{0065}" => "\x65", # LATIN SMALL LETTER E 91 "\x{0066}" => "\x66", # LATIN SMALL LETTER F 92 "\x{0067}" => "\x67", # LATIN SMALL LETTER G 93 "\x{0068}" => "\x68", # LATIN SMALL LETTER H 94 "\x{0069}" => "\x69", # LATIN SMALL LETTER I 95 "\x{006A}" => "\x6A", # LATIN SMALL LETTER J 96 "\x{006B}" => "\x6B", # LATIN SMALL LETTER K 97 "\x{006C}" => "\x6C", # LATIN SMALL LETTER L 98 "\x{006D}" => "\x6D", # LATIN SMALL LETTER M 99 "\x{006E}" => "\x6E", # LATIN SMALL LETTER N 100 "\x{006F}" => "\x6F", # LATIN SMALL LETTER O 101 "\x{0070}" => "\x70", # LATIN SMALL LETTER P 102 "\x{0071}" => "\x71", # LATIN SMALL LETTER Q 103 "\x{0072}" => "\x72", # LATIN SMALL LETTER R 104 "\x{0073}" => "\x73", # LATIN SMALL LETTER S 105 "\x{0074}" => "\x74", # LATIN SMALL LETTER T 106 "\x{0075}" => "\x75", # LATIN SMALL LETTER U 107 "\x{0076}" => "\x76", # LATIN SMALL LETTER V 108 "\x{0077}" => "\x77", # LATIN SMALL LETTER W 109 "\x{0078}" => "\x78", # LATIN SMALL LETTER X 110 "\x{0079}" => "\x79", # LATIN SMALL LETTER Y 111 "\x{007A}" => "\x7A", # LATIN SMALL LETTER Z 112 "\x{000C}" => "\x1B\x0A", # FORM FEED 113 "\x{005B}" => "\x1B\x3C", # LEFT SQUARE BRACKET 114 "\x{005C}" => "\x1B\x2F", # REVERSE SOLIDUS 115 "\x{005D}" => "\x1B\x3E", # RIGHT SQUARE BRACKET 116 "\x{005E}" => "\x1B\x14", # CIRCUMFLEX ACCENT 117 "\x{007B}" => "\x1B\x28", # LEFT CURLY BRACKET 118 "\x{007C}" => "\x1B\x40", # VERTICAL LINE 119 "\x{007D}" => "\x1B\x29", # RIGHT CURLY BRACKET 120 "\x{007E}" => "\x1B\x3D", # TILDE 121 "\x{00A0}" => "\x1B", # NO-BREAK SPACE 122 "\x{00A1}" => "\x40", # INVERTED EXCLAMATION MARK 123 "\x{00A3}" => "\x01", # POUND SIGN 124 "\x{00A4}" => "\x24", # CURRENCY SIGN 125 "\x{00A5}" => "\x03", # YEN SIGN 126 "\x{00A7}" => "\x5F", # SECTION SIGN 127 "\x{00BF}" => "\x60", # INVERTED QUESTION MARK 128 "\x{00C4}" => "\x5B", # LATIN CAPITAL LETTER A WITH DIAERESIS 129 "\x{00C5}" => "\x0E", # LATIN CAPITAL LETTER A WITH RING ABOVE 130 "\x{00C6}" => "\x1C", # LATIN CAPITAL LETTER AE 131 "\x{00C9}" => "\x1F", # LATIN CAPITAL LETTER E WITH ACUTE 132 "\x{00D1}" => "\x5D", # LATIN CAPITAL LETTER N WITH TILDE 133 "\x{00D6}" => "\x5C", # LATIN CAPITAL LETTER O WITH DIAERESIS 134 "\x{00D8}" => "\x0B", # LATIN CAPITAL LETTER O WITH STROKE 135 "\x{00DC}" => "\x5E", # LATIN CAPITAL LETTER U WITH DIAERESIS 136 "\x{00DF}" => "\x1E", # LATIN SMALL LETTER SHARP S 137 "\x{00E0}" => "\x7F", # LATIN SMALL LETTER A WITH GRAVE 138 "\x{00E4}" => "\x7B", # LATIN SMALL LETTER A WITH DIAERESIS 139 "\x{00E5}" => "\x0F", # LATIN SMALL LETTER A WITH RING ABOVE 140 "\x{00E6}" => "\x1D", # LATIN SMALL LETTER AE 141 "\x{00E7}" => "\x09", # LATIN SMALL LETTER C WITH CEDILLA 142 "\x{00E8}" => "\x04", # LATIN SMALL LETTER E WITH GRAVE 143 "\x{00E9}" => "\x05", # LATIN SMALL LETTER E WITH ACUTE 144 "\x{00EC}" => "\x07", # LATIN SMALL LETTER I WITH GRAVE 145 "\x{00F1}" => "\x7D", # LATIN SMALL LETTER N WITH TILDE 146 "\x{00F2}" => "\x08", # LATIN SMALL LETTER O WITH GRAVE 147 "\x{00F6}" => "\x7C", # LATIN SMALL LETTER O WITH DIAERESIS 148 "\x{00F8}" => "\x0C", # LATIN SMALL LETTER O WITH STROKE 149 "\x{00F9}" => "\x06", # LATIN SMALL LETTER U WITH GRAVE 150 "\x{00FC}" => "\x7E", # LATIN SMALL LETTER U WITH DIAERESIS 151 "\x{0393}" => "\x13", # GREEK CAPITAL LETTER GAMMA 152 "\x{0394}" => "\x10", # GREEK CAPITAL LETTER DELTA 153 "\x{0398}" => "\x19", # GREEK CAPITAL LETTER THETA 154 "\x{039B}" => "\x14", # GREEK CAPITAL LETTER LAMDA 155 "\x{039E}" => "\x1A", # GREEK CAPITAL LETTER XI 156 "\x{03A0}" => "\x16", # GREEK CAPITAL LETTER PI 157 "\x{03A3}" => "\x18", # GREEK CAPITAL LETTER SIGMA 158 "\x{03A6}" => "\x12", # GREEK CAPITAL LETTER PHI 159 "\x{03A8}" => "\x17", # GREEK CAPITAL LETTER PSI 160 "\x{03A9}" => "\x15", # GREEK CAPITAL LETTER OMEGA 161 "\x{20AC}" => "\x1B\x65", # EURO SIGN 162 ); 163 our %GSM2UNI = reverse %UNI2GSM; 164 our $ESC = "\x1b"; 165 our $ATMARK = "\x40"; 166 our $FBCHAR = "\x3F"; 167 our $NBSP = "\x{00A0}"; 168 169 #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" 170 171 sub decode ($$;$) { 172 my ( $obj, $bytes, $chk ) = @_; 173 my $str; 174 while ( length $bytes ) { 175 my $c = substr( $bytes, 0, 1, '' ); 176 my $u; 177 if ( $c eq "\x00" ) { 178 my $c2 = substr( $bytes, 0, 1, '' ); 179 $u = 180 !length $c2 ? $ATMARK 181 : $c2 eq "\x00" ? "\x{0000}" 182 : exists $GSM2UNI{$c2} ? $ATMARK . $GSM2UNI{$c2} 183 : $chk 184 ? croak sprintf( "\\x%02X\\x%02X does not map to Unicode", 185 ord($c), ord($c2) ) 186 : $ATMARK . $FBCHAR; 187 188 } 189 elsif ( $c eq $ESC ) { 190 my $c2 = substr( $bytes, 0, 1, '' ); 191 $u = 192 exists $GSM2UNI{ $c . $c2 } ? $GSM2UNI{ $c . $c2 } 193 : exists $GSM2UNI{$c2} ? $NBSP . $GSM2UNI{$c2} 194 : $chk 195 ? croak sprintf( "\\x%02X\\x%02X does not map to Unicode", 196 ord($c), ord($c2) ) 197 : $NBSP . $FBCHAR; 198 } 199 else { 200 $u = 201 exists $GSM2UNI{$c} ? $GSM2UNI{$c} 202 : $chk 203 ? croak sprintf( "\\x%02X does not map to Unicode", ord($c) ) 204 : $FBCHAR; 205 } 206 $str .= $u; 207 } 208 $_[1] = $bytes if $chk; 209 return $str; 210 } 211 212 #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" 213 214 sub encode($$;$) { 215 my ( $obj, $str, $chk ) = @_; 216 my $bytes; 217 while ( length $str ) { 218 my $u = substr( $str, 0, 1, '' ); 219 my $c; 220 $bytes .= 221 exists $UNI2GSM{$u} ? $UNI2GSM{$u} 222 : $chk 223 ? croak sprintf( "\\x{%04x} does not map to %s", 224 ord($u), $obj->name ) 225 : $FBCHAR; 226 } 227 $_[1] = $str if $chk; 228 return $bytes; 229 } 230 231 1; 232 __END__ 233 234 =head1 NAME 235 236 Encode::GSM0338 -- ESTI GSM 03.38 Encoding 237 238 =head1 SYNOPSIS 239 240 use Encode qw/encode decode/; 241 $gsm0338 = encode("gsm0338", $utf8); # loads Encode::GSM0338 implicitly 242 $utf8 = decode("gsm0338", $gsm0338); # ditto 243 244 =head1 DESCRIPTION 245 246 GSM0338 is for GSM handsets. Though it shares alphanumerals with ASCII, 247 control character ranges and other parts are mapped very differently, 248 mainly to store Greek characters. There are also escape sequences 249 (starting with 0x1B) to cover e.g. the Euro sign. 250 251 This was once handled by L<Encode::Bytes> but because of all those 252 unusual specifications, Encode 2.20 has relocated the support to 253 this module. 254 255 =head1 NOTES 256 257 Unlike most other encodings, the following aways croaks on error 258 for any $chk that evaluates to true. 259 260 $gsm0338 = encode("gsm0338", $utf8 $chk); 261 $utf8 = decode("gsm0338", $gsm0338, $chk); 262 263 So if you want to check the validity of the encoding, surround the 264 expression with C<eval {}> block as follows; 265 266 eval { 267 $utf8 = decode("gsm0338", $gsm0338, $chk); 268 }; 269 if ($@){ 270 # handle exception here 271 } 272 273 =head1 BUGS 274 275 ESTI GSM 03.38 Encoding itself. 276 277 Mapping \x00 to '@' causes too much pain everywhere. 278 279 Its use of \x1b (escape) is also very questionable. 280 281 Because of those two, the code paging approach used use in ucm-based 282 Encoding SOMETIMES fails so this module was written. 283 284 =head1 SEE ALSO 285 286 L<Encode> 287 288 =cut
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |