1/*-------------------------------------------------------------------------
4 * I/O functions for tsvector
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
10 * src/backend/utils/adt/tsvector.c
12 *-------------------------------------------------------------------------
22#include "utils/fmgrprotos.h"
30 int poslen;
/* number of elements in pos */
34/* Compare two WordEntryPos values for qsort */
45 * Removes duplicate pos entries. If there's two entries with same pos but
46 * different weight, the higher weight is retained, so we can't use
83 * Compare two WordEntry structs for qsort_arg. This can also be used on
84 * WordEntryIN structs, since those have WordEntry as their first field.
91 char *BufferStr = (
char *)
arg;
94 &BufferStr[
b->pos],
b->len,
99 * Sort an array of WordEntryIN, remove duplicates.
100 * *outbuflen receives the amount of space needed for strings and positions.
123 /* done accumulating data into *res, count space needed */
139 /* append ptr's positions to res's positions */
151 /* just give ptr's positions to pos */
160 /* count space needed for last item */
178 Node *escontext = fcinfo->context;
182 int arrlen;
/* allocated size of arr */
195 * Tokens are appended to tmpbuf, cur is a pointer to the end of used
200 int buflen = 256;
/* allocated size of tmpbuf */
212 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
213 errmsg(
"word is too long (%ld bytes, max %ld bytes)",
219 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
220 errmsg(
"string is too long for tsvector (%ld bytes, max %ld bytes)",
224 * Enlarge buffers if needed
232 while ((
cur -
tmpbuf) + toklen >= buflen)
262 /* Did gettoken_tsvector fail? */
273 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
274 errmsg(
"string is too long for tsvector (%d bytes, max %d bytes)", buflen,
MAXSTRPOS)));
288 if (arr[
i].entry.haspos)
290 /* This should be unreachable because of MAXNUMPOS restrictions */
291 if (arr[
i].poslen > 0xFFFF)
294 /* Copy number of positions */
300 memcpy(strbuf + stroff, arr[
i].pos, arr[
i].poslen *
sizeof(
WordEntryPos));
308 Assert((strbuf + stroff - (
char *) in) == totallen);
326 lenbuf = out->
size * 2
/* '' */ + out->
size - 1
/* space */ + 2
/* 0円 */ ;
327 for (
i = 0;
i < out->
size;
i++)
331 lenbuf += 1
/* : */ + 7
/* int2 + , + weight */ *
POSDATALEN(out, &(ptr[
i]));
334 curout = outbuf = (
char *)
palloc(lenbuf);
335 for (
i = 0;
i < out->
size;
i++)
337 curbegin = curin =
STRPTR(out) + ptr->
pos;
341 while (curin - curbegin < ptr->
len)
347 else if (
t_iseq(curin,
'\\'))
351 *curout++ = *curin++;
395 * Binary Input / Output functions. The binary format is as follows:
397 * uint32 number of lexemes
400 * lexeme text in client encoding, null-terminated
401 * uint16 number of positions
403 * uint16 WordEntryPos
418 for (
i = 0;
i < vec->
size;
i++)
423 * the strings in the TSVector array are not null-terminated, so we
424 * have to send the null-terminator separately
436 for (
j = 0;
j < npos;
j++)
452 int datalen;
/* number of bytes used in the variable size
453 * area after fixed size TSVector header and
456 Size len;
/* allocated size of vec */
457 bool needSort =
false;
465 len = hdrlen * 2;
/* times two to make room for lexemes */
467 vec->
size = nentries;
470 for (
i = 0;
i < nentries;
i++)
481 lex_len = strlen(lexeme);
483 elog(
ERROR,
"invalid tsvector: lexeme too long");
486 elog(
ERROR,
"invalid tsvector: maximum total lexeme length exceeded");
489 elog(
ERROR,
"unexpected number of tsvector positions");
492 * Looks valid. Fill the WordEntry struct, and copy lexeme.
494 * But make sure the buffer is large enough first.
496 while (hdrlen +
SHORTALIGN(datalen + lex_len) +
507 memcpy(
STRPTR(vec) + datalen, lexeme, lex_len);
516 /* Receive positions */
523 * Pad to 2-byte alignment if necessary. Though we used palloc0
524 * for the initial allocation, subsequent repalloc'd memory areas
525 * are not initialized to zero.
529 *(
STRPTR(vec) + datalen) =
'0円';
536 for (
j = 0;
j < npos;
j++)
540 elog(
ERROR,
"position information is misordered");
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereturn(context, dummy_value,...)
#define PG_FREE_IF_COPY(ptr, n)
#define PG_RETURN_BYTEA_P(x)
#define PG_GETARG_POINTER(n)
#define PG_RETURN_CSTRING(x)
#define PG_GETARG_CSTRING(n)
Assert(PointerIsAligned(start, uint64))
#define CALCDATASIZE(x, lenstr)
static int pg_cmp_s32(int32 a, int32 b)
int pg_database_encoding_max_length(void)
int pg_mblen(const char *mbstr)
void * repalloc(void *pointer, Size size)
void pfree(void *pointer)
void * palloc0(Size size)
#define SOFT_ERROR_OCCURRED(escontext)
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
#define qsort(a, b, c, d)
struct StringInfoData * StringInfo
WordEntry entries[FLEXIBLE_ARRAY_MEMBER]
#define PG_GETARG_TSVECTOR(n)
#define PG_RETURN_TSVECTOR(x)
#define WEP_SETWEIGHT(x, v)
Datum tsvectorout(PG_FUNCTION_ARGS)
static int uniquePos(WordEntryPos *a, int l)
Datum tsvectorrecv(PG_FUNCTION_ARGS)
static int compareentry(const void *va, const void *vb, void *arg)
Datum tsvectorin(PG_FUNCTION_ARGS)
int compareWordEntryPos(const void *a, const void *b)
Datum tsvectorsend(PG_FUNCTION_ARGS)
static int uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
void close_tsvector_parser(TSVectorParseState state)
bool gettoken_tsvector(TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr)
TSVectorParseState init_tsvector_parser(char *input, int flags, Node *escontext)
static void SET_VARSIZE(void *PTR, Size len)
static StringInfoData tmpbuf