Skip to content

Commit d8a61ce

Browse files
committed
Fix legacy conversion filter for ISO-2022-JP-KDDI
1 parent 9ac49c0 commit d8a61ce

File tree

2 files changed

+127
-24
lines changed

2 files changed

+127
-24
lines changed

ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c

Lines changed: 127 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929

3030
#include "mbfilter.h"
3131
#include "mbfilter_iso2022jp_mobile.h"
32-
#include "mbfilter_sjis_mobile.h"
3332

3433
#include "unicode_table_cp932_ext.h"
3534
#include "unicode_table_jis.h"
@@ -39,11 +38,27 @@
3938
static size_t mb_iso2022jp_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
4039
static void mb_wchar_to_iso2022jp_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
4140

41+
static int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter);
42+
static int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter);
4243
static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter);
4344
static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter);
4445

4546
extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
4647

48+
/* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF
49+
* These correspond to the letters A-Z
50+
* To display the flag emoji for a country, two unicode codepoints are combined,
51+
* which correspond to the two-letter code for that country
52+
* This macro converts uppercase ASCII values to Regional Indicator codepoints */
53+
#define NFLAGS(c) (0x1F1A5+((unsigned int)(c)))
54+
55+
static const char nflags_s[10][2] = {
56+
"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"
57+
};
58+
static const int nflags_code_kddi[10] = {
59+
0x2549, 0x2546, 0x24C0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254A, 0x24C1, 0x27F7
60+
};
61+
4762
static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL};
4863

4964
const mbfl_encoding mbfl_encoding_2022jp_kddi = {
@@ -125,7 +140,53 @@ const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi = {
125140
#define JISX0201_KANA 0x20
126141
#define JISX0208_KANJI 0x80
127142

128-
int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter)
143+
#define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0)
144+
#define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0)
145+
146+
static const char nflags_kddi[6][2] = {"FR", "DE", "IT", "GB", "CN", "KR"};
147+
148+
static inline int convert_emoji_cp(int cp)
149+
{
150+
if (cp > 0xF000)
151+
return cp + 0x10000;
152+
if (cp > 0xE000)
153+
return cp + 0xF0000;
154+
return cp;
155+
}
156+
157+
static int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
158+
{
159+
if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
160+
if (s == 0x24C0) { /* Spain */
161+
EMIT_FLAG_EMOJI("ES");
162+
} else if (s == 0x24C1) { /* Russia */
163+
EMIT_FLAG_EMOJI("RU");
164+
} else if (s >= 0x2545 && s <= 0x254A) {
165+
EMIT_FLAG_EMOJI(nflags_kddi[s - 0x2545]);
166+
} else if (s == 0x25BC) {
167+
EMIT_KEYPAD_EMOJI('#');
168+
} else {
169+
*snd = 0;
170+
return convert_emoji_cp(mb_tbl_code2uni_kddi1[s - mb_tbl_code2uni_kddi1_min]);
171+
}
172+
} else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
173+
if (s == 0x2750) { /* Japan */
174+
EMIT_FLAG_EMOJI("JP");
175+
} else if (s >= 0x27A6 && s <= 0x27AE) {
176+
EMIT_KEYPAD_EMOJI(s - 0x27A6 + '1');
177+
} else if (s == 0x27F7) { /* United States */
178+
EMIT_FLAG_EMOJI("US");
179+
} else if (s == 0x2830) {
180+
EMIT_KEYPAD_EMOJI('0');
181+
} else {
182+
*snd = 0;
183+
return convert_emoji_cp(mb_tbl_code2uni_kddi2[s - mb_tbl_code2uni_kddi2_min]);
184+
}
185+
}
186+
return 0;
187+
}
188+
189+
static int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter)
129190
{
130191
int c1, s, w, snd = 0;
131192

@@ -260,7 +321,67 @@ static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter)
260321
return 0;
261322
}
262323

263-
int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter)
324+
static int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
325+
{
326+
if ((filter->status & 0xF) == 1) {
327+
int c1 = filter->cache;
328+
filter->cache = 0;
329+
filter->status &= ~0xFF;
330+
if (c == 0x20E3) {
331+
if (c1 == '#') {
332+
*s1 = 0x25BC;
333+
} else if (c1 == '0') {
334+
*s1 = 0x2830;
335+
} else { /* Previous character was '1'-'9' */
336+
*s1 = 0x27A6 + (c1 - '1');
337+
}
338+
return 1;
339+
} else {
340+
if (filter->status & 0xFF00) {
341+
CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
342+
CK((*filter->output_function)('(', filter->data));
343+
CK((*filter->output_function)('B', filter->data));
344+
}
345+
CK((*filter->output_function)(c1, filter->data));
346+
filter->status = 0;
347+
}
348+
}
349+
350+
if (c == '#' || (c >= '0' && c <= '9')) {
351+
filter->status |= 1;
352+
filter->cache = c;
353+
return 0;
354+
}
355+
356+
if (c == 0xA9) { /* Copyright sign */
357+
*s1 = 0x27DC;
358+
return 1;
359+
} else if (c == 0xAE) { /* Registered sign */
360+
*s1 = 0x27DD;
361+
return 1;
362+
} else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
363+
int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
364+
if (i >= 0) {
365+
*s1 = mb_tbl_uni_kddi2code2_value[i];
366+
return 1;
367+
}
368+
} else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
369+
int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
370+
if (i >= 0) {
371+
*s1 = mb_tbl_uni_kddi2code3_value[i];
372+
return 1;
373+
}
374+
} else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
375+
int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
376+
if (i >= 0) {
377+
*s1 = mb_tbl_uni_kddi2code5_val[i];
378+
return 1;
379+
}
380+
}
381+
return 0;
382+
}
383+
384+
static int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter)
264385
{
265386
int c1, c2, s1 = 0, s2 = 0;
266387

@@ -293,11 +414,10 @@ int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter)
293414
}
294415

295416
if (mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter)) {
417+
/* A KDDI emoji was detected and stored in s1 */
296418
CODE2JIS(c1,c2,s1,s2);
297419
s1 -= 0x1600;
298-
}
299-
300-
if (filter->status == 1 && filter->cache) {
420+
} else if ((filter->status & 0xFF) == 1 && filter->cache) {
301421
/* We are just processing one of KDDI's special emoji for a phone keypad button */
302422
return 0;
303423
}
@@ -360,7 +480,7 @@ static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter)
360480
}
361481

362482
int c1 = filter->cache;
363-
if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
483+
if ((filter->status & 0xFF) == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
364484
(*filter->output_function)(c1, filter->data);
365485
}
366486

@@ -484,20 +604,6 @@ static size_t mb_iso2022jp_kddi_to_wchar(unsigned char **in, size_t *in_len, uin
484604
return out - buf;
485605
}
486606

487-
/* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF
488-
* These correspond to the letters A-Z
489-
* To display the flag emoji for a country, two unicode codepoints are combined,
490-
* which correspond to the two-letter code for that country
491-
* This macro converts uppercase ASCII values to Regional Indicator codepoints */
492-
#define NFLAGS(c) (0x1F1A5+((unsigned int)(c)))
493-
494-
static const char nflags_s[10][2] = {
495-
"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"
496-
};
497-
static const int nflags_code_kddi[10] = {
498-
0x2549, 0x2546, 0x24C0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254A, 0x24C1, 0x27F7
499-
};
500-
501607
static void mb_wchar_to_iso2022jp_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
502608
{
503609
unsigned char *out, *limit;

ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,4 @@ extern const mbfl_encoding mbfl_encoding_2022jp_kddi;
3636
extern const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar;
3737
extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi;
3838

39-
int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter);
40-
int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter);
41-
4239
#endif /* MBFL_MBFILTER_ISO2022_JP_MOBILE_H */

0 commit comments

Comments
 (0)