[フレーム]

varlena.h File Reference

#include "nodes/pg_list.h"
#include "utils/sortsupport.h"

Include dependency graph for varlena.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct ClosestMatchState

Typedefs

typedef struct ClosestMatchState ClosestMatchState

Functions

int varstr_cmp (const char *arg1, int len1, const char *arg2, int len2, Oid collid)

void varstr_sortsupport (SortSupport ssup, Oid typid, Oid collid)

int varstr_levenshtein (const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, bool trusted)

int varstr_levenshtein_less_equal (const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, int max_d, bool trusted)

List * textToQualifiedNameList (text *textval)

bool SplitIdentifierString (char *rawstring, char separator, List **namelist)

bool SplitDirectoriesString (char *rawstring, char separator, List **namelist)

bool SplitGUCList (char *rawstring, char separator, List **namelist)

text * replace_text_regexp (text *src_text, text *pattern_text, text *replace_text, int cflags, Oid collation, int search_start, int n)

void initClosestMatch (ClosestMatchState *state, const char *source, int max_d)

void updateClosestMatch (ClosestMatchState *state, const char *candidate)

const char * getClosestMatch (ClosestMatchState *state)

Typedef Documentation

◆ ClosestMatchState

typedef struct ClosestMatchState ClosestMatchState

Function Documentation

◆ getClosestMatch()

const char * getClosestMatch ( ClosestMatchState * state )

Definition at line 5339 of file varlena.c.

5340{

5341 Assert(state);

5342

5343 return state->match;

5344}

Assert

Assert(PointerIsAligned(start, uint64))

state

Definition: regguts.h:323

References Assert().

Referenced by dblink_fdw_validator(), file_fdw_validator(), postgres_fdw_validator(), and postgresql_fdw_validator().

◆ initClosestMatch()

void initClosestMatch ( ClosestMatchState * state,

const char * source,

int max_d

)

Definition at line 5284 of file varlena.c.

5285{

5286 Assert(state);

5287 Assert(max_d >= 0);

5288

5289 state->source = source;

5290 state->min_d = -1;

5291 state->max_d = max_d;

5292 state->match = NULL;

5293}

source

static rewind_source * source

Definition: pg_rewind.c:89

References Assert(), and source.

Referenced by dblink_fdw_validator(), file_fdw_validator(), postgres_fdw_validator(), and postgresql_fdw_validator().

◆ replace_text_regexp()

text * replace_text_regexp ( text * src_text,

text * pattern_text,

text * replace_text,

int cflags,

Oid collation,

int search_start,

int n

)

Definition at line 3302 of file varlena.c.

3306{

3307 text *ret_text;

3308 regex_t *re;

3309 int src_text_len = VARSIZE_ANY_EXHDR(src_text);

3310 int nmatches = 0;

3311 StringInfoData buf;

3312 regmatch_t pmatch[10]; /* main match, plus 1円 to 9円 */

3313 int nmatch = lengthof(pmatch);

3314 pg_wchar *data;

3315 size_t data_len;

3316 int data_pos;

3317 char *start_ptr;

3318 int escape_status;

3319

3320 initStringInfo(&buf);

3321

3322 /* Convert data string to wide characters. */

3323 data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));

3324 data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);

3325

3326 /* Check whether replace_text has escapes, especially regexp submatches. */

3327 escape_status = check_replace_text_has_escape(replace_text);

3328

3329 /* If no regexp submatches, we can use REG_NOSUB. */

3330 if (escape_status < 2)

3331 {

3332 cflags |= REG_NOSUB;

3333 /* Also tell pg_regexec we only want the whole-match location. */

3334 nmatch = 1;

3335 }

3336

3337 /* Prepare the regexp. */

3338 re = RE_compile_and_cache(pattern_text, cflags, collation);

3339

3340 /* start_ptr points to the data_pos'th character of src_text */

3341 start_ptr = (char *) VARDATA_ANY(src_text);

3342 data_pos = 0;

3343

3344 while (search_start <= data_len)

3345 {

3346 int regexec_result;

3347

3348 CHECK_FOR_INTERRUPTS();

3349

3350 regexec_result = pg_regexec(re,

3351 data,

3352 data_len,

3353 search_start,

3354 NULL, /* no details */

3355 nmatch,

3356 pmatch,

3357 0);

3358

3359 if (regexec_result == REG_NOMATCH)

3360 break;

3361

3362 if (regexec_result != REG_OKAY)

3363 {

3364 char errMsg[100];

3365

3366 pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));

3367 ereport(ERROR,

3368 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),

3369 errmsg("regular expression failed: %s", errMsg)));

3370 }

3371

3372 /*

3373 * Count matches, and decide whether to replace this match.

3374 */

3375 nmatches++;

3376 if (n > 0 && nmatches != n)

3377 {

3378 /*

3379 * No, so advance search_start, but not start_ptr/data_pos. (Thus,

3380 * we treat the matched text as if it weren't matched, and copy it

3381 * to the output later.)

3382 */

3383 search_start = pmatch[0].rm_eo;

3384 if (pmatch[0].rm_so == pmatch[0].rm_eo)

3385 search_start++;

3386 continue;

3387 }

3388

3389 /*

3390 * Copy the text to the left of the match position. Note we are given

3391 * character not byte indexes.

3392 */

3393 if (pmatch[0].rm_so - data_pos > 0)

3394 {

3395 int chunk_len;

3396

3397 chunk_len = charlen_to_bytelen(start_ptr,

3398 pmatch[0].rm_so - data_pos);

3399 appendBinaryStringInfo(&buf, start_ptr, chunk_len);

3400

3401 /*

3402 * Advance start_ptr over that text, to avoid multiple rescans of

3403 * it if the replace_text contains multiple back-references.

3404 */

3405 start_ptr += chunk_len;

3406 data_pos = pmatch[0].rm_so;

3407 }

3408

3409 /*

3410 * Copy the replace_text, processing escapes if any are present.

3411 */

3412 if (escape_status > 0)

3413 appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,

3414 start_ptr, data_pos);

3415 else

3416 appendStringInfoText(&buf, replace_text);

3417

3418 /* Advance start_ptr and data_pos over the matched text. */

3419 start_ptr += charlen_to_bytelen(start_ptr,

3420 pmatch[0].rm_eo - data_pos);

3421 data_pos = pmatch[0].rm_eo;

3422

3423 /*

3424 * If we only want to replace one occurrence, we're done.

3425 */

3426 if (n > 0)

3427 break;

3428

3429 /*

3430 * Advance search position. Normally we start the next search at the

3431 * end of the previous match; but if the match was of zero length, we

3432 * have to advance by one character, or we'd just find the same match

3433 * again.

3434 */

3435 search_start = data_pos;

3436 if (pmatch[0].rm_so == pmatch[0].rm_eo)

3437 search_start++;

3438 }

3439

3440 /*

3441 * Copy the text to the right of the last match.

3442 */

3443 if (data_pos < data_len)

3444 {

3445 int chunk_len;

3446

3447 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;

3448 appendBinaryStringInfo(&buf, start_ptr, chunk_len);

3449 }

3450

3451 ret_text = cstring_to_text_with_len(buf.data, buf.len);

3452 pfree(buf.data);

3453 pfree(data);

3454

3455 return ret_text;

3456}

lengthof

#define lengthof(array)

Definition: c.h:787

errcode

int errcode(int sqlerrcode)

Definition: elog.c:854

errmsg

int errmsg(const char *fmt,...)

Definition: elog.c:1071

ERROR

#define ERROR

Definition: elog.h:39

ereport

#define ereport(elevel,...)

Definition: elog.h:150

pg_wchar

unsigned int pg_wchar

Definition: mbprint.c:31

pg_mb2wchar_with_len

int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)

Definition: mbutils.c:987

pfree

void pfree(void *pointer)

Definition: mcxt.c:1594

palloc

void * palloc(Size size)

Definition: mcxt.c:1365

CHECK_FOR_INTERRUPTS

#define CHECK_FOR_INTERRUPTS()

Definition: miscadmin.h:122

data

const void * data

Definition: pg_crc32c_sse42.c:27

buf

static char * buf

Definition: pg_test_fsync.c:72

pg_regerror

size_t pg_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)

Definition: regerror.c:60

REG_NOMATCH

#define REG_NOMATCH

Definition: regex.h:216

regmatch_t

#define regmatch_t

Definition: regex.h:246

REG_OKAY

#define REG_OKAY

Definition: regex.h:215

REG_NOSUB

#define REG_NOSUB

Definition: regex.h:185

regex_t

#define regex_t

Definition: regex.h:245

pg_regexec

int pg_regexec(regex_t *re, const chr *string, size_t len, size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], int flags)

Definition: regexec.c:185

RE_compile_and_cache

regex_t * RE_compile_and_cache(text *text_re, int cflags, Oid collation)

Definition: regexp.c:141

appendBinaryStringInfo

void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)

Definition: stringinfo.c:281

initStringInfo

void initStringInfo(StringInfo str)

Definition: stringinfo.c:97

StringInfoData

Definition: stringinfo.h:47

varlena

Definition: c.h:692

VARSIZE_ANY

static Size VARSIZE_ANY(const void *PTR)

Definition: varatt.h:460

VARSIZE_ANY_EXHDR

static Size VARSIZE_ANY_EXHDR(const void *PTR)

Definition: varatt.h:472

VARDATA_ANY

static char * VARDATA_ANY(const void *PTR)

Definition: varatt.h:486

appendStringInfoText

static void appendStringInfoText(StringInfo str, const text *t)

Definition: varlena.c:3078

check_replace_text_has_escape

static int check_replace_text_has_escape(const text *replace_text)

Definition: varlena.c:3169

cstring_to_text_with_len

text * cstring_to_text_with_len(const char *s, int len)

Definition: varlena.c:193

appendStringInfoRegexpSubstr

static void appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, regmatch_t *pmatch, char *start_ptr, int data_pos)

Definition: varlena.c:3202

charlen_to_bytelen

static int charlen_to_bytelen(const char *p, int n)

Definition: varlena.c:501

replace_text

Datum replace_text(PG_FUNCTION_ARGS)

Definition: varlena.c:3092

References appendBinaryStringInfo(), appendStringInfoRegexpSubstr(), appendStringInfoText(), buf, charlen_to_bytelen(), CHECK_FOR_INTERRUPTS, check_replace_text_has_escape(), cstring_to_text_with_len(), data, ereport, errcode(), errmsg(), ERROR, initStringInfo(), lengthof, palloc(), pfree(), pg_mb2wchar_with_len(), pg_regerror(), pg_regexec(), RE_compile_and_cache(), REG_NOMATCH, REG_NOSUB, REG_OKAY, regex_t, regmatch_t, replace_text(), VARDATA_ANY(), VARSIZE_ANY(), and VARSIZE_ANY_EXHDR().

Referenced by textregexreplace(), textregexreplace_extended(), and textregexreplace_noopt().

◆ SplitDirectoriesString()

bool SplitDirectoriesString ( char * rawstring,

char separator,

List ** namelist

)

Definition at line 2871 of file varlena.c.

2873{

2874 char *nextp = rawstring;

2875 bool done = false;

2876

2877 *namelist = NIL;

2878

2879 while (scanner_isspace(*nextp))

2880 nextp++; /* skip leading whitespace */

2881

2882 if (*nextp == '0円')

2883 return true; /* allow empty string */

2884

2885 /* At the top of the loop, we are at start of a new directory. */

2886 do

2887 {

2888 char *curname;

2889 char *endp;

2890

2891 if (*nextp == '"')

2892 {

2893 /* Quoted name --- collapse quote-quote pairs */

2894 curname = nextp + 1;

2895 for (;;)

2896 {

2897 endp = strchr(nextp + 1, '"');

2898 if (endp == NULL)

2899 return false; /* mismatched quotes */

2900 if (endp[1] != '"')

2901 break; /* found end of quoted name */

2902 /* Collapse adjacent quotes into one quote, and look again */

2903 memmove(endp, endp + 1, strlen(endp));

2904 nextp = endp;

2905 }

2906 /* endp now points at the terminating quote */

2907 nextp = endp + 1;

2908 }

2909 else

2910 {

2911 /* Unquoted name --- extends to separator or end of string */

2912 curname = endp = nextp;

2913 while (*nextp && *nextp != separator)

2914 {

2915 /* trailing whitespace should not be included in name */

2916 if (!scanner_isspace(*nextp))

2917 endp = nextp + 1;

2918 nextp++;

2919 }

2920 if (curname == endp)

2921 return false; /* empty unquoted name not allowed */

2922 }

2923

2924 while (scanner_isspace(*nextp))

2925 nextp++; /* skip trailing whitespace */

2926

2927 if (*nextp == separator)

2928 {

2929 nextp++;

2930 while (scanner_isspace(*nextp))

2931 nextp++; /* skip leading whitespace for next */

2932 /* we expect another name, so done remains false */

2933 }

2934 else if (*nextp == '0円')

2935 done = true;

2936 else

2937 return false; /* invalid syntax */

2938

2939 /* Now safe to overwrite separator with a null */

2940 *endp = '0円';

2941

2942 /* Truncate path if it's overlength */

2943 if (strlen(curname) >= MAXPGPATH)

2944 curname[MAXPGPATH - 1] = '0円';

2945

2946 /*

2947 * Finished isolating current name --- add it to list

2948 */

2949 curname = pstrdup(curname);

2950 canonicalize_path(curname);

2951 *namelist = lappend(*namelist, curname);

2952

2953 /* Loop back if we didn't reach end of string */

2954 } while (!done);

2955

2956 return true;

2957}

lappend

List * lappend(List *list, void *datum)

Definition: list.c:339

pstrdup

char * pstrdup(const char *in)

Definition: mcxt.c:1759

MAXPGPATH

#define MAXPGPATH

Definition: pg_config_manual.h:95

NIL

#define NIL

Definition: pg_list.h:68

canonicalize_path

void canonicalize_path(char *path)

Definition: path.c:337

scanner_isspace

bool scanner_isspace(char ch)

Definition: scansup.c:117

separator

Definition: print.h:106

References canonicalize_path(), lappend(), MAXPGPATH, NIL, pstrdup(), and scanner_isspace().

Referenced by check_oauth_validator(), load_libraries(), and PostmasterMain().

◆ SplitGUCList()

bool SplitGUCList ( char * rawstring,

char separator,

List ** namelist

)

Definition at line 2992 of file varlena.c.

2994{

2995 char *nextp = rawstring;

2996 bool done = false;

2997

2998 *namelist = NIL;

2999

3000 while (scanner_isspace(*nextp))

3001 nextp++; /* skip leading whitespace */

3002

3003 if (*nextp == '0円')

3004 return true; /* allow empty string */

3005

3006 /* At the top of the loop, we are at start of a new identifier. */

3007 do

3008 {

3009 char *curname;

3010 char *endp;

3011

3012 if (*nextp == '"')

3013 {

3014 /* Quoted name --- collapse quote-quote pairs */

3015 curname = nextp + 1;

3016 for (;;)

3017 {

3018 endp = strchr(nextp + 1, '"');

3019 if (endp == NULL)

3020 return false; /* mismatched quotes */

3021 if (endp[1] != '"')

3022 break; /* found end of quoted name */

3023 /* Collapse adjacent quotes into one quote, and look again */

3024 memmove(endp, endp + 1, strlen(endp));

3025 nextp = endp;

3026 }

3027 /* endp now points at the terminating quote */

3028 nextp = endp + 1;

3029 }

3030 else

3031 {

3032 /* Unquoted name --- extends to separator or whitespace */

3033 curname = nextp;

3034 while (*nextp && *nextp != separator &&

3035 !scanner_isspace(*nextp))

3036 nextp++;

3037 endp = nextp;

3038 if (curname == nextp)

3039 return false; /* empty unquoted name not allowed */

3040 }

3041

3042 while (scanner_isspace(*nextp))

3043 nextp++; /* skip trailing whitespace */

3044

3045 if (*nextp == separator)

3046 {

3047 nextp++;

3048 while (scanner_isspace(*nextp))

3049 nextp++; /* skip leading whitespace for next */

3050 /* we expect another name, so done remains false */

3051 }

3052 else if (*nextp == '0円')

3053 done = true;

3054 else

3055 return false; /* invalid syntax */

3056

3057 /* Now safe to overwrite separator with a null */

3058 *endp = '0円';

3059

3060 /*

3061 * Finished isolating current name --- add it to list

3062 */

3063 *namelist = lappend(*namelist, curname);

3064

3065 /* Loop back if we didn't reach end of string */

3066 } while (!done);

3067

3068 return true;

3069}

References lappend(), NIL, and scanner_isspace().

Referenced by check_debug_io_direct(), dumpFunc(), parse_hba_auth_opt(), pg_get_functiondef(), and PostmasterMain().

◆ SplitIdentifierString()

bool SplitIdentifierString ( char * rawstring,

char separator,

List ** namelist

)

Definition at line 2744 of file varlena.c.

2746{

2747 char *nextp = rawstring;

2748 bool done = false;

2749

2750 *namelist = NIL;

2751

2752 while (scanner_isspace(*nextp))

2753 nextp++; /* skip leading whitespace */

2754

2755 if (*nextp == '0円')

2756 return true; /* allow empty string */

2757

2758 /* At the top of the loop, we are at start of a new identifier. */

2759 do

2760 {

2761 char *curname;

2762 char *endp;

2763

2764 if (*nextp == '"')

2765 {

2766 /* Quoted name --- collapse quote-quote pairs, no downcasing */

2767 curname = nextp + 1;

2768 for (;;)

2769 {

2770 endp = strchr(nextp + 1, '"');

2771 if (endp == NULL)

2772 return false; /* mismatched quotes */

2773 if (endp[1] != '"')

2774 break; /* found end of quoted name */

2775 /* Collapse adjacent quotes into one quote, and look again */

2776 memmove(endp, endp + 1, strlen(endp));

2777 nextp = endp;

2778 }

2779 /* endp now points at the terminating quote */

2780 nextp = endp + 1;

2781 }

2782 else

2783 {

2784 /* Unquoted name --- extends to separator or whitespace */

2785 char *downname;

2786 int len;

2787

2788 curname = nextp;

2789 while (*nextp && *nextp != separator &&

2790 !scanner_isspace(*nextp))

2791 nextp++;

2792 endp = nextp;

2793 if (curname == nextp)

2794 return false; /* empty unquoted name not allowed */

2795

2796 /*

2797 * Downcase the identifier, using same code as main lexer does.

2798 *

2799 * XXX because we want to overwrite the input in-place, we cannot

2800 * support a downcasing transformation that increases the string

2801 * length. This is not a problem given the current implementation

2802 * of downcase_truncate_identifier, but we'll probably have to do

2803 * something about this someday.

2804 */

2805 len = endp - curname;

2806 downname = downcase_truncate_identifier(curname, len, false);

2807 Assert(strlen(downname) <= len);

2808 strncpy(curname, downname, len); /* strncpy is required here */

2809 pfree(downname);

2810 }

2811

2812 while (scanner_isspace(*nextp))

2813 nextp++; /* skip trailing whitespace */

2814

2815 if (*nextp == separator)

2816 {

2817 nextp++;

2818 while (scanner_isspace(*nextp))

2819 nextp++; /* skip leading whitespace for next */

2820 /* we expect another name, so done remains false */

2821 }

2822 else if (*nextp == '0円')

2823 done = true;

2824 else

2825 return false; /* invalid syntax */

2826

2827 /* Now safe to overwrite separator with a null */

2828 *endp = '0円';

2829

2830 /* Truncate name if it's overlength */

2831 truncate_identifier(curname, strlen(curname), false);

2832

2833 /*

2834 * Finished isolating current name --- add it to list

2835 */

2836 *namelist = lappend(*namelist, curname);

2837

2838 /* Loop back if we didn't reach end of string */

2839 } while (!done);

2840

2841 return true;

2842}

len

const void size_t len

Definition: pg_crc32c_sse42.c:28

truncate_identifier

void truncate_identifier(char *ident, int len, bool warn)

Definition: scansup.c:93

downcase_truncate_identifier

char * downcase_truncate_identifier(const char *ident, int len, bool warn)

Definition: scansup.c:37

References Assert(), downcase_truncate_identifier(), lappend(), len, NIL, pfree(), scanner_isspace(), and truncate_identifier().

Referenced by check_createrole_self_grant(), check_datestyle(), check_log_connections(), check_log_destination(), check_restrict_nonsystem_relation_kind(), check_search_path(), check_temp_tablespaces(), check_wal_consistency_checking(), ExtractExtensionList(), parse_extension_control_file(), parse_output_parameters(), parse_publication_options(), plpgsql_extra_checks_check_hook(), PrepareTempTablespaces(), preprocessNamespacePath(), stringToQualifiedNameList(), textToQualifiedNameList(), and validate_sync_standby_slots().

◆ textToQualifiedNameList()

List * textToQualifiedNameList ( text * textval )

Definition at line 2686 of file varlena.c.

2687{

2688 char *rawname;

2689 List *result = NIL;

2690 List *namelist;

2691 ListCell *l;

2692

2693 /* Convert to C string (handles possible detoasting). */

2694 /* Note we rely on being able to modify rawname below. */

2695 rawname = text_to_cstring(textval);

2696

2697 if (!SplitIdentifierString(rawname, '.', &namelist))

2698 ereport(ERROR,

2699 (errcode(ERRCODE_INVALID_NAME),

2700 errmsg("invalid name syntax")));

2701

2702 if (namelist == NIL)

2703 ereport(ERROR,

2704 (errcode(ERRCODE_INVALID_NAME),

2705 errmsg("invalid name syntax")));

2706

2707 foreach(l, namelist)

2708 {

2709 char *curname = (char *) lfirst(l);

2710

2711 result = lappend(result, makeString(pstrdup(curname)));

2712 }

2713

2714 pfree(rawname);

2715 list_free(namelist);

2716

2717 return result;

2718}

list_free

void list_free(List *list)

Definition: list.c:1546

lfirst

#define lfirst(lc)

Definition: pg_list.h:172

List

Definition: pg_list.h:54

ListCell

Definition: pg_list.h:46

makeString

String * makeString(char *str)

Definition: value.c:63

SplitIdentifierString

bool SplitIdentifierString(char *rawstring, char separator, List **namelist)

Definition: varlena.c:2744

text_to_cstring

char * text_to_cstring(const text *t)

Definition: varlena.c:214

References ereport, errcode(), errmsg(), ERROR, lappend(), lfirst, list_free(), makeString(), NIL, pfree(), pstrdup(), SplitIdentifierString(), and text_to_cstring().

Referenced by bt_metap(), bt_multi_page_stats(), bt_page_items_internal(), bt_page_stats_internal(), convert_table_name(), currtid_byrelname(), get_raw_page_internal(), get_rel_from_relname(), nextval(), pg_get_serial_sequence(), pg_get_viewdef_name(), pg_get_viewdef_name_ext(), pg_relpages(), pg_relpages_v1_5(), pgrowlocks(), pgstatindex(), pgstatindex_v1_5(), pgstattuple(), pgstattuple_v1_5(), row_security_active_name(), text_regclass(), ts_parse_byname(), and ts_token_type_byname().

◆ updateClosestMatch()

void updateClosestMatch ( ClosestMatchState * state,

const char * candidate

)

Definition at line 5304 of file varlena.c.

5305{

5306 int dist;

5307

5308 Assert(state);

5309

5310 if (state->source == NULL || state->source[0] == '0円' ||

5311 candidate == NULL || candidate[0] == '0円')

5312 return;

5313

5314 /*

5315 * To avoid ERROR-ing, we check the lengths here instead of setting

5316 * 'trusted' to false in the call to varstr_levenshtein_less_equal().

5317 */

5318 if (strlen(state->source) > MAX_LEVENSHTEIN_STRLEN ||

5319 strlen(candidate) > MAX_LEVENSHTEIN_STRLEN)

5320 return;

5321

5322 dist = varstr_levenshtein_less_equal(state->source, strlen(state->source),

5323 candidate, strlen(candidate), 1, 1, 1,

5324 state->max_d, true);

5325 if (dist <= state->max_d &&

5326 dist <= strlen(state->source) / 2 &&

5327 (state->min_d == -1 || dist < state->min_d))

5328 {

5329 state->min_d = dist;

5330 state->match = candidate;

5331 }

5332}

MAX_LEVENSHTEIN_STRLEN

#define MAX_LEVENSHTEIN_STRLEN

Definition: levenshtein.c:26

varstr_levenshtein_less_equal

int varstr_levenshtein_less_equal(const char *source, int slen, const char *target, int tlen, int ins_c, int del_c, int sub_c, int max_d, bool trusted)

References Assert(), MAX_LEVENSHTEIN_STRLEN, and varstr_levenshtein_less_equal().

Referenced by dblink_fdw_validator(), file_fdw_validator(), postgres_fdw_validator(), and postgresql_fdw_validator().

◆ varstr_cmp()

int varstr_cmp ( const char * arg1,

int len1,

const char * arg2,

int len2,

Oid collid

)

Definition at line 1297 of file varlena.c.

1298{

1299 int result;

1300 pg_locale_t mylocale;

1301

1302 check_collation_set(collid);

1303

1304 mylocale = pg_newlocale_from_collation(collid);

1305

1306 if (mylocale->collate_is_c)

1307 {

1308 result = memcmp(arg1, arg2, Min(len1, len2));

1309 if ((result == 0) && (len1 != len2))

1310 result = (len1 < len2) ? -1 : 1;

1311 }

1312 else

1313 {

1314 /*

1315 * memcmp() can't tell us which of two unequal strings sorts first,

1316 * but it's a cheap way to tell if they're equal. Testing shows that

1317 * memcmp() followed by strcoll() is only trivially slower than

1318 * strcoll() by itself, so we don't lose much if this doesn't work out

1319 * very often, and if it does - for example, because there are many

1320 * equal strings in the input - then we win big by avoiding expensive

1321 * collation-aware comparisons.

1322 */

1323 if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)

1324 return 0;

1325

1326 result = pg_strncoll(arg1, len1, arg2, len2, mylocale);

1327

1328 /* Break tie if necessary. */

1329 if (result == 0 && mylocale->deterministic)

1330 {

1331 result = memcmp(arg1, arg2, Min(len1, len2));

1332 if ((result == 0) && (len1 != len2))

1333 result = (len1 < len2) ? -1 : 1;

1334 }

1335 }

1336

1337 return result;

1338}

Min

#define Min(x, y)

Definition: c.h:1003

collid

Oid collid

Definition: collationcmds.c:700

pg_newlocale_from_collation

pg_locale_t pg_newlocale_from_collation(Oid collid)

Definition: pg_locale.c:1166

pg_strncoll

int pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale)

Definition: pg_locale.c:1290

pg_locale_struct

Definition: pg_locale.h:150

pg_locale_struct::collate_is_c

bool collate_is_c

Definition: pg_locale.h:152

pg_locale_struct::deterministic

bool deterministic

Definition: pg_locale.h:151

check_collation_set

static void check_collation_set(Oid collid)

Definition: varlena.c:1268

References check_collation_set(), pg_locale_struct::collate_is_c, collid, pg_locale_struct::deterministic, Min, pg_newlocale_from_collation(), and pg_strncoll().

Referenced by bpchar_larger(), bpchar_smaller(), bpcharcmp(), bpchareq(), bpcharge(), bpchargt(), bpcharle(), bpcharlt(), bpcharne(), btnametextcmp(), bttextnamecmp(), citextcmp(), compareJsonbScalarValue(), gin_compare_jsonb(), make_greater_string(), namecmp(), nameeqtext(), namenetext(), spg_text_leaf_consistent(), text_cmp(), texteqname(), and textnename().

◆ varstr_levenshtein()

int varstr_levenshtein ( const char * source,

int slen,

const char * target,

int tlen,

int ins_c,

int del_c,

int sub_c,

bool trusted

)

Definition at line 73 of file levenshtein.c.

78{

79 int m,

80 n;

81 int *prev;

82 int *curr;

83 int *s_char_len = NULL;

84 int j;

85 const char *y;

86

87 /*

88 * For varstr_levenshtein_less_equal, we have real variables called

89 * start_column and stop_column; otherwise it's just short-hand for 0 and

90 * m.

91 */

92#ifdef LEVENSHTEIN_LESS_EQUAL

93 int start_column,

94 stop_column;

95

96#undef START_COLUMN

97#undef STOP_COLUMN

98#define START_COLUMN start_column

99#define STOP_COLUMN stop_column

100#else

101#undef START_COLUMN

102#undef STOP_COLUMN

103#define START_COLUMN 0

104#define STOP_COLUMN m

105#endif

106

107 /* Convert string lengths (in bytes) to lengths in characters */

108 m = pg_mbstrlen_with_len(source, slen);

109 n = pg_mbstrlen_with_len(target, tlen);

110

111 /*

112 * We can transform an empty s into t with n insertions, or a non-empty t

113 * into an empty s with m deletions.

114 */

115 if (!m)

116 return n * ins_c;

117 if (!n)

118 return m * del_c;

119

120 /*

121 * For security concerns, restrict excessive CPU+RAM usage. (This

122 * implementation uses O(m) memory and has O(mn) complexity.) If

123 * "trusted" is true, caller is responsible for not making excessive

124 * requests, typically by using a small max_d along with strings that are

125 * bounded, though not necessarily to MAX_LEVENSHTEIN_STRLEN exactly.

126 */

127 if (!trusted &&

128 (m > MAX_LEVENSHTEIN_STRLEN ||

129 n > MAX_LEVENSHTEIN_STRLEN))

130 ereport(ERROR,

131 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),

132 errmsg("levenshtein argument exceeds maximum length of %d characters",

133 MAX_LEVENSHTEIN_STRLEN)));

134

135#ifdef LEVENSHTEIN_LESS_EQUAL

136 /* Initialize start and stop columns. */

137 start_column = 0;

138 stop_column = m + 1;

139

140 /*

141 * If max_d >= 0, determine whether the bound is impossibly tight. If so,

142 * return max_d + 1 immediately. Otherwise, determine whether it's tight

143 * enough to limit the computation we must perform. If so, figure out

144 * initial stop column.

145 */

146 if (max_d >= 0)

147 {

148 int min_theo_d; /* Theoretical minimum distance. */

149 int max_theo_d; /* Theoretical maximum distance. */

150 int net_inserts = n - m;

151

152 min_theo_d = net_inserts < 0 ?

153 -net_inserts * del_c : net_inserts * ins_c;

154 if (min_theo_d > max_d)

155 return max_d + 1;

156 if (ins_c + del_c < sub_c)

157 sub_c = ins_c + del_c;

158 max_theo_d = min_theo_d + sub_c * Min(m, n);

159 if (max_d >= max_theo_d)

160 max_d = -1;

161 else if (ins_c + del_c > 0)

162 {

163 /*

164 * Figure out how much of the first row of the notional matrix we

165 * need to fill in. If the string is growing, the theoretical

166 * minimum distance already incorporates the cost of deleting the

167 * number of characters necessary to make the two strings equal in

168 * length. Each additional deletion forces another insertion, so

169 * the best-case total cost increases by ins_c + del_c. If the

170 * string is shrinking, the minimum theoretical cost assumes no

171 * excess deletions; that is, we're starting no further right than

172 * column n - m. If we do start further right, the best-case

173 * total cost increases by ins_c + del_c for each move right.

174 */

175 int slack_d = max_d - min_theo_d;

176 int best_column = net_inserts < 0 ? -net_inserts : 0;

177

178 stop_column = best_column + (slack_d / (ins_c + del_c)) + 1;

179 if (stop_column > m)

180 stop_column = m + 1;

181 }

182 }

183#endif

184

185 /*

186 * In order to avoid calling pg_mblen() repeatedly on each character in s,

187 * we cache all the lengths before starting the main loop -- but if all

188 * the characters in both strings are single byte, then we skip this and

189 * use a fast-path in the main loop. If only one string contains

190 * multi-byte characters, we still build the array, so that the fast-path

191 * needn't deal with the case where the array hasn't been initialized.

192 */

193 if (m != slen || n != tlen)

194 {

195 int i;

196 const char *cp = source;

197

198 s_char_len = (int *) palloc((m + 1) * sizeof(int));

199 for (i = 0; i < m; ++i)

200 {

201 s_char_len[i] = pg_mblen(cp);

202 cp += s_char_len[i];

203 }

204 s_char_len[i] = 0;

205 }

206

207 /* One more cell for initialization column and row. */

208 ++m;

209 ++n;

210

211 /* Previous and current rows of notional array. */

212 prev = (int *) palloc(2 * m * sizeof(int));

213 curr = prev + m;

214

215 /*

216 * To transform the first i characters of s into the first 0 characters of

217 * t, we must perform i deletions.

218 */

219 for (int i = START_COLUMN; i < STOP_COLUMN; i++)

220 prev[i] = i * del_c;

221

222 /* Loop through rows of the notional array */

223 for (y = target, j = 1; j < n; j++)

224 {

225 int *temp;

226 const char *x = source;

227 int y_char_len = n != tlen + 1 ? pg_mblen(y) : 1;

228 int i;

229

230#ifdef LEVENSHTEIN_LESS_EQUAL

231

232 /*

233 * In the best case, values percolate down the diagonal unchanged, so

234 * we must increment stop_column unless it's already on the right end

235 * of the array. The inner loop will read prev[stop_column], so we

236 * have to initialize it even though it shouldn't affect the result.

237 */

238 if (stop_column < m)

239 {

240 prev[stop_column] = max_d + 1;

241 ++stop_column;

242 }

243

244 /*

245 * The main loop fills in curr, but curr[0] needs a special case: to

246 * transform the first 0 characters of s into the first j characters

247 * of t, we must perform j insertions. However, if start_column > 0,

248 * this special case does not apply.

249 */

250 if (start_column == 0)

251 {

252 curr[0] = j * ins_c;

253 i = 1;

254 }

255 else

256 i = start_column;

257#else

258 curr[0] = j * ins_c;

259 i = 1;

260#endif

261

262 /*

263 * This inner loop is critical to performance, so we include a

264 * fast-path to handle the (fairly common) case where no multibyte

265 * characters are in the mix. The fast-path is entitled to assume

266 * that if s_char_len is not initialized then BOTH strings contain

267 * only single-byte characters.

268 */

269 if (s_char_len != NULL)

270 {

271 for (; i < STOP_COLUMN; i++)

272 {

273 int ins;

274 int del;

275 int sub;

276 int x_char_len = s_char_len[i - 1];

277

278 /*

279 * Calculate costs for insertion, deletion, and substitution.

280 *

281 * When calculating cost for substitution, we compare the last

282 * character of each possibly-multibyte character first,

283 * because that's enough to rule out most mis-matches. If we

284 * get past that test, then we compare the lengths and the

285 * remaining bytes.

286 */

287 ins = prev[i] + ins_c;

288 del = curr[i - 1] + del_c;

289 if (x[x_char_len - 1] == y[y_char_len - 1]

290 && x_char_len == y_char_len &&

291 (x_char_len == 1 || rest_of_char_same(x, y, x_char_len)))

292 sub = prev[i - 1];

293 else

294 sub = prev[i - 1] + sub_c;

295

296 /* Take the one with minimum cost. */

297 curr[i] = Min(ins, del);

298 curr[i] = Min(curr[i], sub);

299

300 /* Point to next character. */

301 x += x_char_len;

302 }

303 }

304 else

305 {

306 for (; i < STOP_COLUMN; i++)

307 {

308 int ins;

309 int del;

310 int sub;

311

312 /* Calculate costs for insertion, deletion, and substitution. */

313 ins = prev[i] + ins_c;

314 del = curr[i - 1] + del_c;

315 sub = prev[i - 1] + ((*x == *y) ? 0 : sub_c);

316

317 /* Take the one with minimum cost. */

318 curr[i] = Min(ins, del);

319 curr[i] = Min(curr[i], sub);

320

321 /* Point to next character. */

322 x++;

323 }

324 }

325

326 /* Swap current row with previous row. */

327 temp = curr;

328 curr = prev;

329 prev = temp;

330

331 /* Point to next character. */

332 y += y_char_len;

333

334#ifdef LEVENSHTEIN_LESS_EQUAL

335

336 /*

337 * This chunk of code represents a significant performance hit if used

338 * in the case where there is no max_d bound. This is probably not

339 * because the max_d >= 0 test itself is expensive, but rather because

340 * the possibility of needing to execute this code prevents tight

341 * optimization of the loop as a whole.

342 */

343 if (max_d >= 0)

344 {

345 /*

346 * The "zero point" is the column of the current row where the

347 * remaining portions of the strings are of equal length. There

348 * are (n - 1) characters in the target string, of which j have

349 * been transformed. There are (m - 1) characters in the source

350 * string, so we want to find the value for zp where (n - 1) - j =

351 * (m - 1) - zp.

352 */

353 int zp = j - (n - m);

354

355 /* Check whether the stop column can slide left. */

356 while (stop_column > 0)

357 {

358 int ii = stop_column - 1;

359 int net_inserts = ii - zp;

360

361 if (prev[ii] + (net_inserts > 0 ? net_inserts * ins_c :

362 -net_inserts * del_c) <= max_d)

363 break;

364 stop_column--;

365 }

366

367 /* Check whether the start column can slide right. */

368 while (start_column < stop_column)

369 {

370 int net_inserts = start_column - zp;

371

372 if (prev[start_column] +

373 (net_inserts > 0 ? net_inserts * ins_c :

374 -net_inserts * del_c) <= max_d)

375 break;

376

377 /*

378 * We'll never again update these values, so we must make sure

379 * there's nothing here that could confuse any future

380 * iteration of the outer loop.

381 */

382 prev[start_column] = max_d + 1;

383 curr[start_column] = max_d + 1;

384 if (start_column != 0)

385 source += (s_char_len != NULL) ? s_char_len[start_column - 1] : 1;

386 start_column++;

387 }

388

389 /* If they cross, we're going to exceed the bound. */

390 if (start_column >= stop_column)

391 return max_d + 1;

392 }

393#endif

394 }

395

396 /*

397 * Because the final value was swapped from the previous row to the

398 * current row, that's where we'll find it.

399 */

400 return prev[m - 1];

401}

y

int y

Definition: isn.c:76

x

int x

Definition: isn.c:75

j

int j

Definition: isn.c:78

i

int i

Definition: isn.c:77

START_COLUMN

#define START_COLUMN

STOP_COLUMN

#define STOP_COLUMN

pg_mbstrlen_with_len

int pg_mbstrlen_with_len(const char *mbstr, int limit)

Definition: mbutils.c:1058

pg_mblen

int pg_mblen(const char *mbstr)

Definition: mbutils.c:1024

rest_of_char_same

static bool rest_of_char_same(const char *s1, const char *s2, int len)

Definition: varlena.c:5248

References ereport, errcode(), errmsg(), ERROR, i, j, MAX_LEVENSHTEIN_STRLEN, Min, palloc(), pg_mblen(), pg_mbstrlen_with_len(), rest_of_char_same(), source, START_COLUMN, STOP_COLUMN, x, and y.

Referenced by levenshtein(), and levenshtein_with_costs().

◆ varstr_levenshtein_less_equal()

int varstr_levenshtein_less_equal ( const char * source,

int slen,

const char * target,

int tlen,

int ins_c,

int del_c,

int sub_c,

int max_d,

bool trusted

)

Referenced by levenshtein_less_equal(), levenshtein_less_equal_with_costs(), searchRangeTableForCol(), updateClosestMatch(), and updateFuzzyAttrMatchState().

◆ varstr_sortsupport()

void varstr_sortsupport ( SortSupport ssup,

Oid typid,

Oid collid

)

Definition at line 1615 of file varlena.c.

1616{

1617 bool abbreviate = ssup->abbreviate;

1618 bool collate_c = false;

1619 VarStringSortSupport *sss;

1620 pg_locale_t locale;

1621

1622 check_collation_set(collid);

1623

1624 locale = pg_newlocale_from_collation(collid);

1625

1626 /*

1627 * If possible, set ssup->comparator to a function which can be used to

1628 * directly compare two datums. If we can do this, we'll avoid the

1629 * overhead of a trip through the fmgr layer for every comparison, which

1630 * can be substantial.

1631 *

1632 * Most typically, we'll set the comparator to varlenafastcmp_locale,

1633 * which uses strcoll() to perform comparisons. We use that for the

1634 * BpChar case too, but type NAME uses namefastcmp_locale. However, if

1635 * LC_COLLATE = C, we can make things quite a bit faster with

1636 * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use

1637 * memcmp() rather than strcoll().

1638 */

1639 if (locale->collate_is_c)

1640 {

1641 if (typid == BPCHAROID)

1642 ssup->comparator = bpcharfastcmp_c;

1643 else if (typid == NAMEOID)

1644 {

1645 ssup->comparator = namefastcmp_c;

1646 /* Not supporting abbreviation with type NAME, for now */

1647 abbreviate = false;

1648 }

1649 else

1650 ssup->comparator = varstrfastcmp_c;

1651

1652 collate_c = true;

1653 }

1654 else

1655 {

1656 /*

1657 * We use varlenafastcmp_locale except for type NAME.

1658 */

1659 if (typid == NAMEOID)

1660 {

1661 ssup->comparator = namefastcmp_locale;

1662 /* Not supporting abbreviation with type NAME, for now */

1663 abbreviate = false;

1664 }

1665 else

1666 ssup->comparator = varlenafastcmp_locale;

1667

1668 /*

1669 * Unfortunately, it seems that abbreviation for non-C collations is

1670 * broken on many common platforms; see pg_strxfrm_enabled().

1671 *

1672 * Even apart from the risk of broken locales, it's possible that

1673 * there are platforms where the use of abbreviated keys should be

1674 * disabled at compile time. For example, macOS's strxfrm()

1675 * implementation is known to not effectively concentrate a

1676 * significant amount of entropy from the original string in earlier

1677 * transformed blobs. It's possible that other supported platforms

1678 * are similarly encumbered. So, if we ever get past disabling this

1679 * categorically, we may still want or need to disable it for

1680 * particular platforms.

1681 */

1682 if (!pg_strxfrm_enabled(locale))

1683 abbreviate = false;

1684 }

1685

1686 /*

1687 * If we're using abbreviated keys, or if we're using a locale-aware

1688 * comparison, we need to initialize a VarStringSortSupport object. Both

1689 * cases will make use of the temporary buffers we initialize here for

1690 * scratch space (and to detect requirement for BpChar semantics from

1691 * caller), and the abbreviation case requires additional state.

1692 */

1693 if (abbreviate || !collate_c)

1694 {

1695 sss = palloc(sizeof(VarStringSortSupport));

1696 sss->buf1 = palloc(TEXTBUFLEN);

1697 sss->buflen1 = TEXTBUFLEN;

1698 sss->buf2 = palloc(TEXTBUFLEN);

1699 sss->buflen2 = TEXTBUFLEN;

1700 /* Start with invalid values */

1701 sss->last_len1 = -1;

1702 sss->last_len2 = -1;

1703 /* Initialize */

1704 sss->last_returned = 0;

1705 if (collate_c)

1706 sss->locale = NULL;

1707 else

1708 sss->locale = locale;

1709

1710 /*

1711 * To avoid somehow confusing a strxfrm() blob and an original string,

1712 * constantly keep track of the variety of data that buf1 and buf2

1713 * currently contain.

1714 *

1715 * Comparisons may be interleaved with conversion calls. Frequently,

1716 * conversions and comparisons are batched into two distinct phases,

1717 * but the correctness of caching cannot hinge upon this. For

1718 * comparison caching, buffer state is only trusted if cache_blob is

1719 * found set to false, whereas strxfrm() caching only trusts the state

1720 * when cache_blob is found set to true.

1721 *

1722 * Arbitrarily initialize cache_blob to true.

1723 */

1724 sss->cache_blob = true;

1725 sss->collate_c = collate_c;

1726 sss->typid = typid;

1727 ssup->ssup_extra = sss;

1728

1729 /*

1730 * If possible, plan to use the abbreviated keys optimization. The

1731 * core code may switch back to authoritative comparator should

1732 * abbreviation be aborted.

1733 */

1734 if (abbreviate)

1735 {

1736 sss->prop_card = 0.20;

1737 initHyperLogLog(&sss->abbr_card, 10);

1738 initHyperLogLog(&sss->full_card, 10);

1739 ssup->abbrev_full_comparator = ssup->comparator;

1740 ssup->comparator = ssup_datum_unsigned_cmp;

1741 ssup->abbrev_converter = varstr_abbrev_convert;

1742 ssup->abbrev_abort = varstr_abbrev_abort;

1743 }

1744 }

1745}

initHyperLogLog

void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)

Definition: hyperloglog.c:66

locale

static char * locale

Definition: initdb.c:140

pg_strxfrm_enabled

bool pg_strxfrm_enabled(pg_locale_t locale)

Definition: pg_locale.c:1304

SortSupportData::abbreviate

bool abbreviate

Definition: sortsupport.h:155

SortSupportData::comparator

int(* comparator)(Datum x, Datum y, SortSupport ssup)

Definition: sortsupport.h:106

SortSupportData::abbrev_converter

Datum(* abbrev_converter)(Datum original, SortSupport ssup)

Definition: sortsupport.h:172

SortSupportData::ssup_extra

void * ssup_extra

Definition: sortsupport.h:87

SortSupportData::abbrev_full_comparator

int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)

Definition: sortsupport.h:191

SortSupportData::abbrev_abort

bool(* abbrev_abort)(int memtupcount, SortSupport ssup)

Definition: sortsupport.h:182

VarStringSortSupport

Definition: varlena.c:84

VarStringSortSupport::last_returned

int last_returned

Definition: varlena.c:92

VarStringSortSupport::typid

Oid typid

Definition: varlena.c:95

VarStringSortSupport::collate_c

bool collate_c

Definition: varlena.c:94

VarStringSortSupport::last_len2

int last_len2

Definition: varlena.c:91

VarStringSortSupport::locale

pg_locale_t locale

Definition: varlena.c:99

VarStringSortSupport::buf1

char * buf1

Definition: varlena.c:85

VarStringSortSupport::buflen2

int buflen2

Definition: varlena.c:89

VarStringSortSupport::last_len1

int last_len1

Definition: varlena.c:90

VarStringSortSupport::full_card

hyperLogLogState full_card

Definition: varlena.c:97

VarStringSortSupport::cache_blob

bool cache_blob

Definition: varlena.c:93

VarStringSortSupport::buf2

char * buf2

Definition: varlena.c:87

VarStringSortSupport::prop_card

double prop_card

Definition: varlena.c:98

VarStringSortSupport::buflen1

int buflen1

Definition: varlena.c:88

VarStringSortSupport::abbr_card

hyperLogLogState abbr_card

Definition: varlena.c:96

ssup_datum_unsigned_cmp

int ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup)

Definition: tuplesort.c:3133

varstr_abbrev_abort

static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup)

Definition: varlena.c:2170

varlenafastcmp_locale

static int varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)

Definition: varlena.c:1833

bpcharfastcmp_c

static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)

Definition: varlena.c:1788

namefastcmp_c

static int namefastcmp_c(Datum x, Datum y, SortSupport ssup)

Definition: varlena.c:1821

namefastcmp_locale

static int namefastcmp_locale(Datum x, Datum y, SortSupport ssup)

Definition: varlena.c:1864

varstr_abbrev_convert

static Datum varstr_abbrev_convert(Datum original, SortSupport ssup)

Definition: varlena.c:1978

varstrfastcmp_c

static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)

Definition: varlena.c:1751

TEXTBUFLEN

#define TEXTBUFLEN

Definition: varlena.c:117

Referenced by bpchar_sortsupport(), btbpchar_pattern_sortsupport(), btnamesortsupport(), bttext_pattern_sortsupport(), bttextsortsupport(), and bytea_sortsupport().