1/*-------------------------------------------------------------------------
3 * EUC_JP, SJIS and MULE_INTERNAL
5 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 * Portions Copyright (c) 1994, Regents of the University of California
9 * src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c
11 *-------------------------------------------------------------------------
19 * SJIS alternative code.
20 * this code is used if a mapping EUC -> SJIS is not defined.
22 #define PGSJISALTCODE 0x81ac
23 #define PGEUCALTCODE 0xa2ae
26 * conversion table between SJIS UDC (IBM kanji) and EUC_JP
31 .
name =
"euc_jp_and_sjis",
44 * INTEGER, -- source encoding id
45 * INTEGER, -- destination encoding id
46 * CSTRING, -- source string (null terminated C string)
47 * CSTRING, -- destination string (null terminated C string)
48 * INTEGER, -- source string length
49 * BOOL -- if true, don't throw an error if conversion fails
52 * Returns the number of bytes successfully converted.
56static int sjis2mic(
const unsigned char *sjis,
unsigned char *p,
int len,
bool noError);
57static int mic2sjis(
const unsigned char *mic,
unsigned char *p,
int len,
bool noError);
58static int euc_jp2mic(
const unsigned char *euc,
unsigned char *p,
int len,
bool noError);
59static int mic2euc_jp(
const unsigned char *mic,
unsigned char *p,
int len,
bool noError);
60static int euc_jp2sjis(
const unsigned char *euc,
unsigned char *p,
int len,
bool noError);
61static int sjis2euc_jp(
const unsigned char *sjis,
unsigned char *p,
int len,
bool noError);
163 sjis2mic(
const unsigned char *sjis,
unsigned char *p,
int len,
bool noError)
165 const unsigned char *
start = sjis;
175 if (c1 >= 0xa1 && c1 <= 0xdf)
177 /* JIS X0201 (1 byte kana) */
186 * JIS X0208, X0212, user defined extended characters
196 if (k >= 0xed40 && k < 0xf040)
198 /* NEC selection IBM kanji */
201 k2 = ibmkanji[
i].nec;
206 k = ibmkanji[
i].sjis;
207 c1 = (k >> 8) & 0xff;
217 *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
218 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
220 else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
222 /* NEC selection IBM kanji - Other undecided justice */
227 else if (k >= 0xf040 && k < 0xf540)
230 * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
231 * 0x7e7e EUC 0xf5a1 - 0xfefe
235 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
236 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
238 else if (k >= 0xf540 && k < 0xfa40)
241 * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
242 * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
246 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
247 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
249 else if (k >= 0xfa40)
252 * mapping IBM kanji to X0208 and X0212
256 k2 = ibmkanji[
i].sjis;
265 *p++ = 0x80 | ((k & 0xff00) >> 8);
266 *p++ = 0x80 | (k & 0xff);
271 *p++ = 0x80 | (k >> 8);
272 *p++ = 0x80 | (k & 0xff);
281 {
/* should be ASCII */
302 mic2sjis(
const unsigned char *mic,
unsigned char *p,
int len,
bool noError)
304 const unsigned char *
start = mic;
321 (
const char *) mic,
len);
334 (
const char *) mic,
len);
342 k = (c1 << 8) | (c2 & 0xff);
347 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
350 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
351 *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
365 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
366 *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
373 k2 = ibmkanji[
i].euc & 0xffff;
382 k = ibmkanji[
i].sjis;
395 (
const char *) mic,
len);
409 euc_jp2mic(
const unsigned char *euc,
unsigned char *p,
int len,
bool noError)
411 const unsigned char *
start = euc;
426 (
const char *) euc,
len);
439 (
const char *) euc,
len);
447 {
/* JIS X0212 kanji? */
470 mic2euc_jp(
const unsigned char *mic,
unsigned char *p,
int len,
bool noError)
472 const unsigned char *
start = mic;
487 (
const char *) mic,
len);
500 (
const char *) mic,
len);
523 (
const char *) mic,
len);
539 const unsigned char *
start = euc;
556 (
const char *) euc,
len);
569 (
const char *) euc,
len);
578 /* JIS X0212 kanji? */
586 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
587 *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
597 k2 = ibmkanji[
i].euc & 0xffff;
606 k = ibmkanji[
i].sjis;
616 /* JIS X0208 kanji? */
618 k = (c1 << 8) | (c2 & 0xff);
623 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
626 *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
627 *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
643 const unsigned char *
start = sjis;
662 (
const char *) sjis,
len);
675 (
const char *) sjis,
len);
677 if (c1 >= 0xa1 && c1 <= 0xdf)
679 /* JIS X0201 (1 byte kana) */
686 * JIS X0208, X0212, user defined extended characters
690 if (k >= 0xed40 && k < 0xf040)
692 /* NEC selection IBM kanji */
695 k2 = ibmkanji[
i].nec;
700 k = ibmkanji[
i].sjis;
701 c1 = (k >> 8) & 0xff;
710 *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
711 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
713 else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
715 /* NEC selection IBM kanji - Other undecided justice */
719 else if (k >= 0xf040 && k < 0xf540)
722 * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
723 * 0x7e7e EUC 0xf5a1 - 0xfefe
726 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
727 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
729 else if (k >= 0xf540 && k < 0xfa40)
732 * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
733 * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
737 *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
738 *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
740 else if (k >= 0xfa40)
743 * mapping IBM kanji to X0208 and X0212
748 k2 = ibmkanji[
i].sjis;
757 *p++ = 0x80 | ((k & 0xff00) >> 8);
758 *p++ = 0x80 | (k & 0xff);
762 *p++ = 0x80 | (k >> 8);
763 *p++ = 0x80 | (k & 0xff);
#define IS_HIGHBIT_SET(ch)
static int mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError)
Datum mic_to_sjis(PG_FUNCTION_ARGS)
Datum euc_jp_to_sjis(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC_EXT(.name="euc_jp_and_sjis",.version=PG_VERSION)
static int sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len, bool noError)
PG_FUNCTION_INFO_V1(euc_jp_to_sjis)
Datum euc_jp_to_mic(PG_FUNCTION_ARGS)
static int sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError)
Datum sjis_to_euc_jp(PG_FUNCTION_ARGS)
Datum mic_to_euc_jp(PG_FUNCTION_ARGS)
Datum sjis_to_mic(PG_FUNCTION_ARGS)
static int euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
static int euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len, bool noError)
static int mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError)
#define PG_GETARG_CSTRING(n)
#define PG_RETURN_INT32(x)
#define PG_GETARG_INT32(n)
#define PG_GETARG_BOOL(n)
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
void report_invalid_encoding(int encoding, const char *mbstr, int len)
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)