git.postgresql.org Git - postgresql.git/commitdiff

git projects / postgresql.git / commitdiff

Fix usage of char2wchar/wchar2char. Changes:

Mon, 2 Mar 2009 15:10:09 +0000 (15:10 +0000)

committer Teodor Sigaev <teodor@sigaev.ru>

Mon, 2 Mar 2009 15:10:09 +0000 (15:10 +0000)

- pg_wchar and wchar_t could have different size, so char2wchar
doesn't call pg_mb2wchar_with_len to prevent out-of-bound
memory bug
- make char2wchar/wchar2char symmetric, now they should not be
called with C-locale because mbstowcs/wcstombs oftenly doesn't
work correct with C-locale.
- Text parser uses pg_mb2wchar_with_len directly in case of
C-locale and multibyte encoding

Per bug report by Hiroshi Inoue <inoue@tpf.co.jp> and
following discussion.

Backpatch up to 8.2 when multybyte support was implemented in tsearch.

src/backend/tsearch/wparser_def.c patch | blob | blame | history

src/backend/utils/mb/mbutils.c patch | blob | blame | history

diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c

index a4143549756b5d25239c3ee83bce77136eb5bda4..8d7cc1b8d504d391314e8b8348933a2a6fc57fcf 100644 (file)

--- a/src/backend/tsearch/wparser_def.c

+++ b/src/backend/tsearch/wparser_def.c

@@ -7,7 +7,7 @@

* IDENTIFICATION

- * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.20 2009年01月15日 16:33:59 teodor Exp $

+ * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.21 2009年03月02日 15:10:09 teodor Exp $

*-------------------------------------------------------------------------

@@ -240,12 +240,12 @@ typedef struct TParser

int lenstr; /* length of mbstring */

#ifdef USE_WIDE_UPPER_LOWER

wchar_t *wstr; /* wide character string */

- int lenwstr; /* length of wsting */

+ pg_wchar *pgwstr; /* wide character string for C-locale */

+ bool usewide;

#endif

/* State of parse */

int charmaxlen;

- bool usewide;

TParserPosition *state;

bool ignore;

bool wanthost;

@@ -299,13 +299,24 @@ TParserInit(char *str, int len)

if (prs->charmaxlen > 1)

{

prs->usewide = true;

- prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1));

- prs->lenwstr = char2wchar(prs->wstr, prs->lenstr + 1,

- prs->str, prs->lenstr);

+ if ( lc_ctype_is_c() )

+ {

+ /*

+ * char2wchar doesn't work for C-locale and

+ * sizeof(pg_wchar) could be not equal to sizeof(wchar_t)

+ */

+ prs->pgwstr = (pg_wchar*) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));

+ pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);

+ }

+ else

+ {

+ prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1));

+ char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr);

+ }

}

else

-#endif

prs->usewide = false;

+#endif

prs->state = newTParserPosition(NULL);

prs->state->state = TPS_Base;

@@ -331,6 +342,8 @@ TParserClose(TParser *prs)

#ifdef USE_WIDE_UPPER_LOWER

if (prs->wstr)

pfree(prs->wstr);

+ if (prs->pgwstr)

+ pfree(prs->pgwstr);

#endif

pfree(prs);

@@ -338,10 +351,12 @@ TParserClose(TParser *prs)

* Character-type support functions, equivalent to is* macros, but

- * working with any possible encodings and locales. Note,

- * that with multibyte encoding and C-locale isw* function may fail

- * or give wrong result. Note 2: multibyte encoding and C-locale

- * often are used for Asian languages

+ * working with any possible encodings and locales. Notes:

+ * - with multibyte encoding and C-locale isw* function may fail

+ * or give wrong result.

+ * - multibyte encoding and C-locale often are used for

+ * Asian languages.

+ * - if locale is C the we use pgwstr instead of wstr

#ifdef USE_WIDE_UPPER_LOWER

@@ -352,14 +367,14 @@ p_is##type(TParser *prs) { \

Assert( prs->state ); \

if ( prs->usewide ) \

{ \

- if ( lc_ctype_is_c() ) \

- return is##type( 0xff & *( prs->wstr + prs->state->poschar) ); \

+ if ( prs->pgwstr ) \

+ return is##type( 0xff & *( prs->pgwstr + prs->state->poschar) );\

return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \

} \

return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \

-} \

+} \

static int \

p_isnot##type(TParser *prs) { \

@@ -373,9 +388,9 @@ p_isalnum(TParser *prs)

if (prs->usewide)

{

- if (lc_ctype_is_c())

+ if (prs->pgwstr)

{

- unsigned int c = *(prs->wstr + prs->state->poschar);

+ unsigned int c = *(prs->pgwstr + prs->state->poschar);

* any non-ascii symbol with multibyte encoding with C-locale is

@@ -405,9 +420,9 @@ p_isalpha(TParser *prs)

if (prs->usewide)

{

- if (lc_ctype_is_c())

+ if (prs->pgwstr)

{

- unsigned int c = *(prs->wstr + prs->state->poschar);

+ unsigned int c = *(prs->pgwstr + prs->state->poschar);

* any non-ascii symbol with multibyte encoding with C-locale is

diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c

index c3cf7f5db69f31abed4d7988545f7d6e1034340a..f5ba80d101d5e6f40f4cc9ff772a0261c04f4d39 100644 (file)

--- a/src/backend/utils/mb/mbutils.c

+++ b/src/backend/utils/mb/mbutils.c

@@ -4,7 +4,7 @@

* (currently mule internal code (mic) is used)

* Tatsuo Ishii

- * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.78 2009年01月22日 10:09:48 mha Exp $

+ * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.79 2009年03月02日 15:10:09 teodor Exp $

#include "postgres.h"

@@ -601,7 +601,10 @@ wchar2char(char *to, const wchar_t *from, size_t tolen)

}

else

#endif /* WIN32 */

+ {

+ Assert( !lc_ctype_is_c() );

result = wcstombs(to, from, tolen);

+ }

return result;

}

@@ -647,22 +650,12 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)

else

#endif /* WIN32 */

{

- if (lc_ctype_is_c())

- {

- /*

- * pg_mb2wchar_with_len always adds trailing '0円', so 'to' should be

- * allocated with sufficient space

- */

- result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);

- }

- else

- {

- /* mbstowcs requires ending '0円' */

- char *str = pnstrdup(from, fromlen);

+ /* mbstowcs requires ending '0円' */

+ char *str = pnstrdup(from, fromlen);

- result = mbstowcs(to, str, tolen);

- pfree(str);

- }

+ Assert( !lc_ctype_is_c() );

+ result = mbstowcs(to, str, tolen);

+ pfree(str);

}

if (result == -1)

This is the main PostgreSQL git repository.

RSS Atom