1/*-------------------------------------------------------------------------
4 * main parse functions for tsearch
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
10 * src/backend/tsearch/ts_parse.c
12 *-------------------------------------------------------------------------
21 #define IGNORE_LONGLEXEME 1
52 * fields to store last variant to lexize (basically, thesaurus or similar
53 * to, which wants several lexemes
77 list->tail->next = newpl;
93 if (
list->head == NULL)
183 * usual mode: dictionary wants only one word, but we should keep in
184 * mind that we should go through all stack
190 char *curValLemm = curVal->
lemm;
191 int curValLenLemm = curVal->
lenlemm;
197 /* skip this type of lexeme */
217 * dictionary wants next word, so setup and store current
218 * position and go to multiword mode
229 if (!res)
/* dictionary doesn't know this lexeme */
235 curValLenLemm = strlen(res->
lexeme);
248 {
/* curDictId is valid */
252 * Dictionary ld->curDictId asks us about following words
261 if (curVal->
type != 0)
263 bool dictExists =
false;
267 /* skip this type of lexeme */
273 * We should be sure that current type of lexeme is recognized
274 * by our dictionary: we just check is it exist in list of
277 for (
i = 0;
i < map->
len && !dictExists;
i++)
284 * Dictionary can't work with current type of lexeme,
285 * return to basic mode and redo all stored lexemes
303 /* Dictionary wants one more */
313 * Dictionary normalizes lexemes, so we remove from stack all
314 * used lexemes, return to basic mode and redo end of stack
327 /* reset to initial state */
337 * Dict don't want next lexem and didn't recognize anything, redo
338 * from ld->towork.head
350 * Parse string and lexize words.
352 * prs will be filled in.
358 lenlemm = 0;
/* silence compiler warning */
384#ifdef IGNORE_LONGLEXEME
386 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
387 errmsg(
"word is too long to be indexed"),
388 errdetail(
"Words longer than %d characters are ignored.",
393 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
394 errmsg(
"word is too long to be indexed"),
395 errdetail(
"Words longer than %d characters are ignored.",
402 while ((norms =
LexizeExec(&ldata, NULL)) != NULL)
406 prs->
pos++;
/* set pos */
438/* Add a word to prs->words[] */
456 * Add pos and matching-query-item data to the just-added word.
457 * Here, buf/buflen represent a processed lexeme, not raw token text.
459 * If the query contains more than one matching item, we replicate
460 * the last-added word so that each item can be pointed to. The
461 * duplicate entries are marked with repeated = 1.
478 for (
i = 0;
i < query->
size;
i++)
512 while (ptr && ptr->
lexeme)
520 tmplexs = lexs->
next;
543 lenlemm = 0;
/* silence compiler warning */
570#ifdef IGNORE_LONGLEXEME
572 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
573 errmsg(
"word is too long to be indexed"),
574 errdetail(
"Words longer than %d characters are ignored.",
579 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
580 errmsg(
"word is too long to be indexed"),
581 errdetail(
"Words longer than %d characters are ignored.",
590 if ((norms =
LexizeExec(&ldata, &lexs)) != NULL)
604 * Generate the headline, as a text object, from HeadlineParsedText.
612 int numfragments = 0;
624 int dist = ptr - ((
char *) out);
628 ptr = ((
char *) out) + dist;
636 /* start of a new fragment */
639 /* add a fragment delimiter if this is after the first one */
640 if (numfragments > 1)
658 memcpy(ptr, wrd->
word, wrd->
len);
int errdetail(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
#define FunctionCall4(flinfo, arg1, arg2, arg3, arg4)
#define FunctionCall1(flinfo, arg1)
#define FunctionCall2(flinfo, arg1, arg2)
#define FunctionCall3(flinfo, arg1, arg2, arg3)
void * repalloc(void *pointer, Size size)
void pfree(void *pointer)
static Datum PointerGetDatum(const void *X)
static Pointer DatumGetPointer(Datum X)
static Datum Int32GetDatum(int32 X)
static int32 DatumGetInt32(Datum X)
static void word(struct vars *v, int dir, struct state *lp, struct state *rp)
HeadlineWordEntry * words
TSConfigCacheEntry * lookup_ts_config_cache(Oid cfgId)
TSParserCacheEntry * lookup_ts_parser_cache(Oid prsId)
TSDictionaryCacheEntry * lookup_ts_dictionary_cache(Oid dictId)
static void hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
static void setNewTmpRes(LexizeData *ld, ParsedLex *lex, TSLexeme *res)
text * generateHeadline(HeadlineParsedText *prs)
struct ListParsedLex ListParsedLex
static void RemoveHead(LexizeData *ld)
static void setCorrLex(LexizeData *ld, ParsedLex **correspondLexem)
void parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
static void hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
static TSLexeme * LexizeExec(LexizeData *ld, ParsedLex **correspondLexem)
static void LPLAddTail(ListParsedLex *list, ParsedLex *newpl)
static void moveToWaste(LexizeData *ld, ParsedLex *stop)
struct ParsedLex ParsedLex
static ParsedLex * LPLRemoveHead(ListParsedLex *list)
static void addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme *norms)
static void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm)
static void LexizeInit(LexizeData *ld, TSConfigCacheEntry *cfg)
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
static void SET_VARSIZE(void *PTR, Size len)