1/*-------------------------------------------------------------------------
4 * GiST support for network types.
6 * The key thing to understand about this code is the definition of the
7 * "union" of a set of INET/CIDR values. It works like this:
8 * 1. If the values are not all of the same IP address family, the "union"
9 * is a dummy value with family number zero, minbits zero, commonbits zero,
10 * address all zeroes. Otherwise:
11 * 2. The union has the common IP address family number.
12 * 3. The union's minbits value is the smallest netmask length ("ip_bits")
13 * of all the input values.
14 * 4. Let C be the number of leading address bits that are in common among
15 * all the input values (C ranges from 0 to ip_maxbits for the family).
16 * 5. The union's commonbits value is C.
17 * 6. The union's address value is the same as the common prefix for its
18 * first C bits, and is zeroes to the right of that. The physical width
19 * of the address value is ip_maxbits for the address family.
21 * In a leaf index entry (representing a single key), commonbits is equal to
22 * ip_maxbits for the address family, minbits is the same as the represented
23 * value's ip_bits, and the address is equal to the represented address.
24 * Although it may appear that we're wasting a byte by storing the union
25 * format and not just the represented INET/CIDR value in leaf keys, the
26 * extra byte is actually "free" because of alignment considerations.
28 * Note that this design tracks minbits and commonbits independently; in any
29 * given union value, either might be smaller than the other. This does not
30 * help us much when descending the tree, because of the way inet comparison
31 * is defined: at non-leaf nodes we can't compare more than minbits bits
32 * even if we know them. However, it greatly improves the quality of split
33 * decisions. Preliminary testing suggests that searches are as much as
34 * twice as fast as for a simpler design in which a single field doubles as
35 * the common prefix length and the minimum ip_bits value.
37 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
38 * Portions Copyright (c) 1994, Regents of the University of California
42 * src/backend/utils/adt/network_gist.c
44 *-------------------------------------------------------------------------
52#include "utils/fmgrprotos.h"
57 * Operator strategy numbers used in the GiST inet_ops opclass
59 #define INETSTRAT_OVERLAPS RTOverlapStrategyNumber
60 #define INETSTRAT_EQ RTEqualStrategyNumber
61 #define INETSTRAT_NE RTNotEqualStrategyNumber
62 #define INETSTRAT_LT RTLessStrategyNumber
63 #define INETSTRAT_LE RTLessEqualStrategyNumber
64 #define INETSTRAT_GT RTGreaterStrategyNumber
65 #define INETSTRAT_GE RTGreaterEqualStrategyNumber
66 #define INETSTRAT_SUB RTSubStrategyNumber
67 #define INETSTRAT_SUBEQ RTSubEqualStrategyNumber
68 #define INETSTRAT_SUP RTSuperStrategyNumber
69 #define INETSTRAT_SUPEQ RTSuperEqualStrategyNumber
73 * Representation of a GiST INET/CIDR index key. This is not identical to
74 * INET/CIDR because we need to keep track of the length of the common address
75 * prefix as well as the minimum netmask length. However, as long as it
76 * follows varlena header rules, the core GiST code won't know the difference.
77 * For simplicity we always use 1-byte-header varlena format.
82 unsigned char family;
/* PGSQL_AF_INET, PGSQL_AF_INET6, or zero */
83 unsigned char minbits;
/* minimum number of bits in netmask */
84 unsigned char commonbits;
/* number of common prefix bits in addresses */
85 unsigned char ipaddr[16];
/* up to 128 bits of common address */
88 #define DatumGetInetKeyP(X) ((GistInetKey *) DatumGetPointer(X))
89 #define InetKeyPGetDatum(X) PointerGetDatum(X)
92 * Access macros; not really exciting, but we use these for notational
93 * consistency with access to INET/CIDR values. Note that family-zero values
94 * are stored with 4 bytes of address, not 16.
96 #define gk_ip_family(gkptr) ((gkptr)->family)
97 #define gk_ip_minbits(gkptr) ((gkptr)->minbits)
98 #define gk_ip_commonbits(gkptr) ((gkptr)->commonbits)
99 #define gk_ip_addr(gkptr) ((gkptr)->ipaddr)
100 #define ip_family_maxbits(fam) ((fam) == PGSQL_AF_INET6 ? 128 : 32)
102/* These require that the family field has been set: */
103 #define gk_ip_addrsize(gkptr) \
104 (gk_ip_family(gkptr) == PGSQL_AF_INET6 ? 16 : 4)
105 #define gk_ip_maxbits(gkptr) \
106 ip_family_maxbits(gk_ip_family(gkptr))
107 #define SET_GK_VARSIZE(dst) \
108 SET_VARSIZE_SHORT(dst, offsetof(GistInetKey, ipaddr) + gk_ip_addrsize(dst))
112 * The GiST query consistency check
121 /* Oid subtype = PG_GETARG_OID(3); */
127 /* All operators served by this function are exact. */
131 * Check 0: different families
133 * If key represents multiple address families, its children could match
134 * anything. This can only happen on an inner index page.
143 * Check 1: different families
145 * Matching families do not help any of the strategies.
166 /* For all other cases, we can be sure there is no match */
171 * Check 2: network bit count
173 * Network bit count (ip_bits) helps to check leaves for sub network and
174 * sup network operators. At non-leaf nodes, we know every child value
175 * has ip_bits >= gk_ip_minbits(key), so we can avoid descending in some
203 * Check 3: common network bits
205 * Compare available common prefix bits to the query, but not beyond
206 * either the query's netmask or the minimum netmask among the represented
207 * values. If these bits don't match the query, we have our answer (and
208 * may or may not need to descend, depending on the operator). If they do
209 * match, and we are not at a leaf, we descend in all cases.
211 * Note this is the final check for operators that only consider the
212 * network part of the address.
258 * Remaining checks are only for leaves and basic comparison strategies.
259 * See network_cmp_internal() in network.c for the implementation we need
260 * to match. Note that in a leaf key, commonbits should equal the address
261 * length, so we compared the whole network parts above.
266 * Check 4: network bit count
268 * Next step is to compare netmask widths.
300 * Check 5: whole address
302 * Netmask bit counts are the same, so check all the address bits.
327 elog(
ERROR,
"unknown strategy for inet GiST");
332 * Calculate parameters of the union of some GistInetKeys.
334 * Examine the keys in elements m..n inclusive of the GISTENTRY array,
335 * and compute these output parameters:
336 * *minfamily_p = minimum IP address family number
337 * *maxfamily_p = maximum IP address family number
338 * *minbits_p = minimum netmask width
339 * *commonbits_p = number of leading bits in common among the addresses
341 * minbits and commonbits are forced to zero if there's more than one
360 /* Must be at least one key. */
363 /* Initialize variables using the first key. */
370 /* Scan remaining keys. */
371 for (
i = m + 1;
i <= n;
i++)
375 /* Determine range of family numbers */
381 /* Find minimum minbits */
385 /* Find minimum number of bits in common */
392 /* Force minbits/commonbits to zero if more than one family. */
393 if (minfamily != maxfamily)
394 minbits = commonbits = 0;
396 *minfamily_p = minfamily;
397 *maxfamily_p = maxfamily;
398 *minbits_p = minbits;
399 *commonbits_p = commonbits;
403 * Same as above, but the GISTENTRY elements to examine are those with
404 * indices listed in the offsets[] array.
422 /* Must be at least one key. */
425 /* Initialize variables using the first key. */
432 /* Scan remaining keys. */
433 for (
i = 1;
i < noffsets;
i++)
437 /* Determine range of family numbers */
443 /* Find minimum minbits */
447 /* Find minimum number of bits in common */
454 /* Force minbits/commonbits to zero if more than one family. */
455 if (minfamily != maxfamily)
456 minbits = commonbits = 0;
458 *minfamily_p = minfamily;
459 *maxfamily_p = maxfamily;
460 *minbits_p = minbits;
461 *commonbits_p = commonbits;
465 * Construct a GistInetKey representing a union value.
467 * Inputs are the family/minbits/commonbits values to use, plus a pointer to
468 * the address field of one of the union inputs. (Since we're going to copy
469 * just the bits-in-common, it doesn't matter which one.)
477 /* Make sure any unused bits are zeroed. */
484 /* Clone appropriate bytes of the address. */
486 memcpy(
gk_ip_addr(result), addr, (commonbits + 7) / 8);
488 /* Clean any unwanted bits in the last partial byte. */
489 if (commonbits % 8 != 0)
490 gk_ip_addr(result)[commonbits / 8] &= ~(0xFF >> (commonbits % 8));
492 /* Set varlena header correctly. */
500 * The GiST union function
502 * See comments at head of file for the definition of the union.
517 /* Determine parameters of the union. */
519 &minfamily, &maxfamily,
520 &minbits, &commonbits);
522 /* If more than one family, emit family number zero. */
523 if (minfamily != maxfamily)
526 /* Initialize address using the first key. */
530 /* Construct the union value. */
537 * The GiST compress function
539 * Convert an inet value to GistInetKey.
580 * We do not need a decompress function, because the other GiST inet
581 * support functions work with the GistInetKey representation.
585 * The GiST fetch function
587 * Reconstruct the original inet datum from a GistInetKey.
612 * The GiST page split penalty function
614 * Charge a large penalty if address family doesn't match, or a somewhat
615 * smaller one if the new value would degrade the union's minbits
616 * (minimum netmask width). Otherwise, penalty is inverse of the
617 * new number of common address bits.
637 *penalty = 1.0f / commonbits;
651 * The GiST PickSplit method
653 * There are two ways to split. First one is to split by address families,
654 * if there are multiple families appearing in the input.
656 * The second and more common way is to split by addresses. To achieve this,
657 * determine the number of leading bits shared by all the keys, then split on
658 * the next bit. (We don't currently consider the netmask widths while doing
659 * this; should we?) If we fail to get a nontrivial split that way, split
682 maxoff = entryvec->
n - 1;
694 /* Determine parameters of the union of all the inputs. */
696 &minfamily, &maxfamily,
697 &minbits, &commonbits);
699 if (minfamily != maxfamily)
701 /* Multiple families, so split by family. */
705 * If there's more than 2 families, all but maxfamily go into the
706 * left union. This could only happen if the inputs include some
707 * IPv4, some IPv6, and some already-multiple-family unions.
719 * Split on the next bit after the common bits. If that yields a
720 * trivial split, try the next bit position to the right. Repeat till
721 * success; or if we run out of bits, do an arbitrary 50-50 split.
725 while (commonbits < maxbits)
727 /* Split using the commonbits'th bit position. */
728 int bitbyte = commonbits / 8;
729 int bitmask = 0x80 >> (commonbits % 8);
737 if ((addr[bitbyte] & bitmask) == 0)
748 if (commonbits >= maxbits)
750 /* Failed ... do a 50-50 split. */
765 * Compute the union value for each side from scratch. In most cases we
766 * could approximate the union values with what we already know, but this
767 * ensures that each side has minbits and commonbits set as high as
771 &minfamily, &maxfamily,
772 &minbits, &commonbits);
773 if (minfamily != maxfamily)
781 &minfamily, &maxfamily,
782 &minbits, &commonbits);
783 if (minfamily != maxfamily)
794 * The GiST equality function
#define PG_GETARG_POINTER(n)
#define PG_GETARG_DATUM(n)
#define PG_GETARG_UINT16(n)
#define PG_RETURN_POINTER(x)
#define PG_RETURN_BOOL(x)
#define gistentryinit(e, k, r, pg, o, l)
Assert(PointerIsAligned(start, uint64))
void * palloc0(Size size)
int bitncommon(const unsigned char *l, const unsigned char *r, int n)
int bitncmp(const unsigned char *l, const unsigned char *r, int n)
#define gk_ip_commonbits(gkptr)
static void calc_inet_union_params(GISTENTRY *ent, int m, int n, int *minfamily_p, int *maxfamily_p, int *minbits_p, int *commonbits_p)
Datum inet_gist_fetch(PG_FUNCTION_ARGS)
#define DatumGetInetKeyP(X)
static void calc_inet_union_params_indexed(GISTENTRY *ent, OffsetNumber *offsets, int noffsets, int *minfamily_p, int *maxfamily_p, int *minbits_p, int *commonbits_p)
#define gk_ip_maxbits(gkptr)
Datum inet_gist_compress(PG_FUNCTION_ARGS)
#define gk_ip_family(gkptr)
#define gk_ip_addr(gkptr)
#define gk_ip_addrsize(gkptr)
struct GistInetKey GistInetKey
static GistInetKey * build_inet_union_key(int family, int minbits, int commonbits, unsigned char *addr)
#define SET_GK_VARSIZE(dst)
Datum inet_gist_consistent(PG_FUNCTION_ARGS)
#define ip_family_maxbits(fam)
Datum inet_gist_union(PG_FUNCTION_ARGS)
Datum inet_gist_picksplit(PG_FUNCTION_ARGS)
Datum inet_gist_penalty(PG_FUNCTION_ARGS)
Datum inet_gist_same(PG_FUNCTION_ARGS)
#define INETSTRAT_OVERLAPS
#define gk_ip_minbits(gkptr)
#define OffsetNumberNext(offsetNumber)
#define FirstOffsetNumber
static Datum PointerGetDatum(const void *X)
static Pointer DatumGetPointer(Datum X)
GISTENTRY vector[FLEXIBLE_ARRAY_MEMBER]
static Datum InetPGetDatum(const inet *X)
static inet * DatumGetInetPP(Datum X)
#define SET_INET_VARSIZE(dst)
#define PG_GETARG_INET_PP(n)
#define ip_family(inetptr)
#define ip_addrsize(inetptr)