[フレーム]

mcv.c

1/*-------------------------------------------------------------------------

2 *

3 * mcv.c

4 * POSTGRES multivariate MCV lists

5 *

6 *

9 *

10 * IDENTIFICATION

11 * src/backend/statistics/mcv.c

12 *

13 *-------------------------------------------------------------------------

14 */

15#include "postgres.h"

16

17#include <math.h>

18

19#include "access/htup_details.h"

20#include "catalog/pg_statistic_ext.h"

21#include "catalog/pg_statistic_ext_data.h"

22#include "fmgr.h"

23#include "funcapi.h"

24#include "nodes/nodeFuncs.h"

25#include "statistics/extended_stats_internal.h"

26#include "statistics/statistics.h"

27#include "utils/array.h"

28#include "utils/builtins.h"

29#include "utils/fmgrprotos.h"

30#include "utils/lsyscache.h"

31#include "utils/selfuncs.h"

32#include "utils/syscache.h"

33#include "utils/typcache.h"

34

35/*

36 * Computes size of a serialized MCV item, depending on the number of

37 * dimensions (columns) the statistic is defined on. The datum values are

38 * stored in a separate array (deduplicated, to minimize the size), and

39 * so the serialized items only store uint16 indexes into that array.

40 *

41 * Each serialized item stores (in this order):

42 *

43 * - indexes to values (ndim * sizeof(uint16))

44 * - null flags (ndim * sizeof(bool))

45 * - frequency (sizeof(double))

46 * - base_frequency (sizeof(double))

47 *

48 * There is no alignment padding within an MCV item.

49 * So in total each MCV item requires this many bytes:

50 *

51 * ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)

52 */

53 #define ITEM_SIZE(ndims) \

54 ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))

55

56/*

57 * Used to compute size of serialized MCV list representation.

58 */

59 #define MinSizeOfMCVList \

60 (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))

61

62/*

63 * Size of the serialized MCV list, excluding the space needed for

64 * deduplicated per-dimension values. The macro is meant to be used

65 * when it's not yet safe to access the serialized info about amount

66 * of data for each column.

67 */

68 #define SizeOfMCVList(ndims,nitems) \

69 ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \

70 ((ndims) * sizeof(DimensionInfo)) + \

71 ((nitems) * ITEM_SIZE(ndims)))

72

73static MultiSortSupport build_mss(StatsBuildData *data);

74

75static SortItem *build_distinct_groups(int numrows, SortItem *items,

76 MultiSortSupport mss, int *ndistinct);

77

78static SortItem **build_column_frequencies(SortItem *groups, int ngroups,

79 MultiSortSupport mss, int *ncounts);

80

81static int count_distinct_groups(int numrows, SortItem *items,

82 MultiSortSupport mss);

83

84/*

85 * Compute new value for bitmap item, considering whether it's used for

86 * clauses connected by AND/OR.

87 */

88 #define RESULT_MERGE(value, is_or, match) \

89 ((is_or) ? ((value) || (match)) : ((value) && (match)))

90

91/*

92 * When processing a list of clauses, the bitmap item may get set to a value

93 * such that additional clauses can't change it. For example, when processing

94 * a list of clauses connected to AND, as soon as the item gets set to 'false'

95 * then it'll remain like that. Similarly clauses connected by OR and 'true'.

96 *

97 * Returns true when the value in the bitmap can't change no matter how the

98 * remaining clauses are evaluated.

99 */

100 #define RESULT_IS_FINAL(value, is_or) ((is_or) ? (value) : (!(value)))

101

102/*

103 * get_mincount_for_mcv_list

104 * Determine the minimum number of times a value needs to appear in

105 * the sample for it to be included in the MCV list.

106 *

107 * We want to keep only values that appear sufficiently often in the

108 * sample that it is reasonable to extrapolate their sample frequencies to

109 * the entire table. We do this by placing an upper bound on the relative

110 * standard error of the sample frequency, so that any estimates the

111 * planner generates from the MCV statistics can be expected to be

112 * reasonably accurate.

113 *

114 * Since we are sampling without replacement, the sample frequency of a

115 * particular value is described by a hypergeometric distribution. A

116 * common rule of thumb when estimating errors in this situation is to

117 * require at least 10 instances of the value in the sample, in which case

118 * the distribution can be approximated by a normal distribution, and

119 * standard error analysis techniques can be applied. Given a sample size

120 * of n, a population size of N, and a sample frequency of p=cnt/n, the

121 * standard error of the proportion p is given by

122 * SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))

123 * where the second term is the finite population correction. To get

124 * reasonably accurate planner estimates, we impose an upper bound on the

125 * relative standard error of 20% -- i.e., SE/p < 0.2. This 20% relative

126 * error bound is fairly arbitrary, but has been found empirically to work

127 * well. Rearranging this formula gives a lower bound on the number of

128 * instances of the value seen:

129 * cnt > n*(N-n) / (N-n+0.04*n*(N-1))

130 * This bound is at most 25, and approaches 0 as n approaches 0 or N. The

131 * case where n approaches 0 cannot happen in practice, since the sample

132 * size is at least 300. The case where n approaches N corresponds to

133 * sampling the whole table, in which case it is reasonable to keep

134 * the whole MCV list (have no lower bound), so it makes sense to apply

135 * this formula for all inputs, even though the above derivation is

136 * technically only valid when the right hand side is at least around 10.

137 *

138 * An alternative way to look at this formula is as follows -- assume that

139 * the number of instances of the value seen scales up to the entire

140 * table, so that the population count is K=N*cnt/n. Then the distribution

141 * in the sample is a hypergeometric distribution parameterised by N, n

142 * and K, and the bound above is mathematically equivalent to demanding

143 * that the standard deviation of that distribution is less than 20% of

144 * its mean. Thus the relative errors in any planner estimates produced

145 * from the MCV statistics are likely to be not too large.

146 */

147static double

148 get_mincount_for_mcv_list(int samplerows, double totalrows)

149{

150 double n = samplerows;

151 double N = totalrows;

152 double numer,

153 denom;

154

155 numer = n * (N - n);

156 denom = N - n + 0.04 * n * (N - 1);

157

158 /* Guard against division by zero (possible if n = N = 1) */

159 if (denom == 0.0)

160 return 0.0;

161

162 return numer / denom;

163}

164

165/*

166 * Builds MCV list from the set of sampled rows.

167 *

168 * The algorithm is quite simple:

169 *

170 * (1) sort the data (default collation, '<' for the data type)

171 *

172 * (2) count distinct groups, decide how many to keep

173 *

174 * (3) build the MCV list using the threshold determined in (2)

175 *

176 * (4) remove rows represented by the MCV from the sample

177 *

178 */

179MCVList *

180 statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)

181{

182 int i,

183 numattrs,

184 numrows,

185 ngroups,

186 nitems;

187 double mincount;

188 SortItem *items;

189 SortItem *groups;

190 MCVList *mcvlist = NULL;

191 MultiSortSupport mss;

192

193 /* comparator for all the columns */

194 mss = build_mss(data);

195

196 /* sort the rows */

197 items = build_sorted_items(data, &nitems, mss,

198 data->nattnums, data->attnums);

199

200 if (!items)

201 return NULL;

202

203 /* for convenience */

204 numattrs = data->nattnums;

205 numrows = data->numrows;

206

207 /* transform the sorted rows into groups (sorted by frequency) */

208 groups = build_distinct_groups(nitems, items, mss, &ngroups);

209

210 /*

211 * The maximum number of MCV items to store, based on the statistics

212 * target we computed for the statistics object (from the target set for

213 * the object itself, attributes and the system default). In any case, we

214 * can't keep more groups than we have available.

215 */

216 nitems = stattarget;

217 if (nitems > ngroups)

218 nitems = ngroups;

219

220 /*

221 * Decide how many items to keep in the MCV list. We can't use the same

222 * algorithm as per-column MCV lists, because that only considers the

223 * actual group frequency - but we're primarily interested in how the

224 * actual frequency differs from the base frequency (product of simple

225 * per-column frequencies, as if the columns were independent).

226 *

227 * Using the same algorithm might exclude items that are close to the

228 * "average" frequency of the sample. But that does not say whether the

229 * observed frequency is close to the base frequency or not. We also need

230 * to consider unexpectedly uncommon items (again, compared to the base

231 * frequency), and the single-column algorithm does not have to.

232 *

233 * We simply decide how many items to keep by computing the minimum count

234 * using get_mincount_for_mcv_list() and then keep all items that seem to

235 * be more common than that.

236 */

237 mincount = get_mincount_for_mcv_list(numrows, totalrows);

238

239 /*

240 * Walk the groups until we find the first group with a count below the

241 * mincount threshold (the index of that group is the number of groups we

242 * want to keep).

243 */

244 for (i = 0; i < nitems; i++)

245 {

246 if (groups[i].count < mincount)

247 {

248 nitems = i;

249 break;

250 }

251 }

252

253 /*

254 * At this point, we know the number of items for the MCV list. There

255 * might be none (for uniform distribution with many groups), and in that

256 * case, there will be no MCV list. Otherwise, construct the MCV list.

257 */

258 if (nitems > 0)

259 {

260 int j;

261 SortItem key;

262 MultiSortSupport tmp;

263

264 /* frequencies for values in each attribute */

265 SortItem **freqs;

266 int *nfreqs;

267

268 /* used to search values */

269 tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)

270 + sizeof(SortSupportData));

271

272 /* compute frequencies for values in each column */

273 nfreqs = (int *) palloc0(sizeof(int) * numattrs);

274 freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);

275

276 /*

277 * Allocate the MCV list structure, set the global parameters.

278 */

279 mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +

280 sizeof(MCVItem) * nitems);

281

282 mcvlist->magic = STATS_MCV_MAGIC;

283 mcvlist->type = STATS_MCV_TYPE_BASIC;

284 mcvlist->ndimensions = numattrs;

285 mcvlist->nitems = nitems;

286

287 /* store info about data type OIDs */

288 for (i = 0; i < numattrs; i++)

289 mcvlist->types[i] = data->stats[i]->attrtypid;

290

291 /* Copy the first chunk of groups into the result. */

292 for (i = 0; i < nitems; i++)

293 {

294 /* just point to the proper place in the list */

295 MCVItem *item = &mcvlist->items[i];

296

297 item->values = (Datum *) palloc(sizeof(Datum) * numattrs);

298 item->isnull = (bool *) palloc(sizeof(bool) * numattrs);

299

300 /* copy values for the group */

301 memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);

302 memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);

303

304 /* groups should be sorted by frequency in descending order */

305 Assert((i == 0) || (groups[i - 1].count >= groups[i].count));

306

307 /* group frequency */

308 item->frequency = (double) groups[i].count / numrows;

309

310 /* base frequency, if the attributes were independent */

311 item->base_frequency = 1.0;

312 for (j = 0; j < numattrs; j++)

313 {

314 SortItem *freq;

315

316 /* single dimension */

317 tmp->ndims = 1;

318 tmp->ssup[0] = mss->ssup[j];

319

320 /* fill search key */

321 key.values = &groups[i].values[j];

322 key.isnull = &groups[i].isnull[j];

323

324 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],

325 sizeof(SortItem),

326 multi_sort_compare, tmp);

327

328 item->base_frequency *= ((double) freq->count) / numrows;

329 }

330 }

331

332 pfree(nfreqs);

333 pfree(freqs);

334 }

335

336 pfree(items);

337 pfree(groups);

338

339 return mcvlist;

340}

341

342/*

343 * build_mss

344 * Build a MultiSortSupport for the given StatsBuildData.

345 */

346static MultiSortSupport

347 build_mss(StatsBuildData *data)

348{

349 int i;

350 int numattrs = data->nattnums;

351

352 /* Sort by multiple columns (using array of SortSupport) */

353 MultiSortSupport mss = multi_sort_init(numattrs);

354

355 /* prepare the sort functions for all the attributes */

356 for (i = 0; i < numattrs; i++)

357 {

358 VacAttrStats *colstat = data->stats[i];

359 TypeCacheEntry *type;

360

361 type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);

362 if (type->lt_opr == InvalidOid) /* shouldn't happen */

363 elog(ERROR, "cache lookup failed for ordering operator for type %u",

364 colstat->attrtypid);

365

366 multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);

367 }

368

369 return mss;

370}

371

372/*

373 * count_distinct_groups

374 * Count distinct combinations of SortItems in the array.

375 *

376 * The array is assumed to be sorted according to the MultiSortSupport.

377 */

378static int

379 count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)

380{

381 int i;

382 int ndistinct;

383

384 ndistinct = 1;

385 for (i = 1; i < numrows; i++)

386 {

387 /* make sure the array really is sorted */

388 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);

389

390 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)

391 ndistinct += 1;

392 }

393

394 return ndistinct;

395}

396

397/*

398 * compare_sort_item_count

399 * Comparator for sorting items by count (frequencies) in descending

400 * order.

401 */

402static int

403 compare_sort_item_count(const void *a, const void *b, void *arg)

404{

405 SortItem *ia = (SortItem *) a;

406 SortItem *ib = (SortItem *) b;

407

408 if (ia->count == ib->count)

409 return 0;

410 else if (ia->count > ib->count)

411 return -1;

412

413 return 1;

414}

415

416/*

417 * build_distinct_groups

418 * Build an array of SortItems for distinct groups and counts matching

419 * items.

420 *

421 * The 'items' array is assumed to be sorted.

422 */

423static SortItem *

424 build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,

425 int *ndistinct)

426{

427 int i,

428 j;

429 int ngroups = count_distinct_groups(numrows, items, mss);

430

431 SortItem *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));

432

433 j = 0;

434 groups[0] = items[0];

435 groups[0].count = 1;

436

437 for (i = 1; i < numrows; i++)

438 {

439 /* Assume sorted in ascending order. */

440 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);

441

442 /* New distinct group detected. */

443 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)

444 {

445 groups[++j] = items[i];

446 groups[j].count = 0;

447 }

448

449 groups[j].count++;

450 }

451

452 /* ensure we filled the expected number of distinct groups */

453 Assert(j + 1 == ngroups);

454

455 /* Sort the distinct groups by frequency (in descending order). */

456 qsort_interruptible(groups, ngroups, sizeof(SortItem),

457 compare_sort_item_count, NULL);

458

459 *ndistinct = ngroups;

460 return groups;

461}

462

463/* compare sort items (single dimension) */

464static int

465 sort_item_compare(const void *a, const void *b, void *arg)

466{

467 SortSupport ssup = (SortSupport) arg;

468 SortItem *ia = (SortItem *) a;

469 SortItem *ib = (SortItem *) b;

470

471 return ApplySortComparator(ia->values[0], ia->isnull[0],

472 ib->values[0], ib->isnull[0],

473 ssup);

474}

475

476/*

477 * build_column_frequencies

478 * Compute frequencies of values in each column.

479 *

480 * This returns an array of SortItems for each attribute the MCV is built

481 * on, with a frequency (number of occurrences) for each value. This is

482 * then used to compute "base" frequency of MCV items.

483 *

484 * All the memory is allocated in a single chunk, so that a single pfree

485 * is enough to release it. We do not allocate space for values/isnull

486 * arrays in the SortItems, because we can simply point into the input

487 * groups directly.

488 */

489static SortItem **

490 build_column_frequencies(SortItem *groups, int ngroups,

491 MultiSortSupport mss, int *ncounts)

492{

493 int i,

494 dim;

495 SortItem **result;

496 char *ptr;

497

498 Assert(groups);

499 Assert(ncounts);

500

501 /* allocate arrays for all columns as a single chunk */

502 ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +

503 mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));

504

505 /* initial array of pointers */

506 result = (SortItem **) ptr;

507 ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);

508

509 for (dim = 0; dim < mss->ndims; dim++)

510 {

511 SortSupport ssup = &mss->ssup[dim];

512

513 /* array of values for a single column */

514 result[dim] = (SortItem *) ptr;

515 ptr += MAXALIGN(sizeof(SortItem) * ngroups);

516

517 /* extract data for the dimension */

518 for (i = 0; i < ngroups; i++)

519 {

520 /* point into the input groups */

521 result[dim][i].values = &groups[i].values[dim];

522 result[dim][i].isnull = &groups[i].isnull[dim];

523 result[dim][i].count = groups[i].count;

524 }

525

526 /* sort the values, deduplicate */

527 qsort_interruptible(result[dim], ngroups, sizeof(SortItem),

528 sort_item_compare, ssup);

529

530 /*

531 * Identify distinct values, compute frequency (there might be

532 * multiple MCV items containing this value, so we need to sum counts

533 * from all of them.

534 */

535 ncounts[dim] = 1;

536 for (i = 1; i < ngroups; i++)

537 {

538 if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)

539 {

540 result[dim][ncounts[dim] - 1].count += result[dim][i].count;

541 continue;

542 }

543

544 result[dim][ncounts[dim]] = result[dim][i];

545

546 ncounts[dim]++;

547 }

548 }

549

550 return result;

551}

552

553/*

554 * statext_mcv_load

555 * Load the MCV list for the indicated pg_statistic_ext_data tuple.

556 */

557MCVList *

558 statext_mcv_load(Oid mvoid, bool inh)

559{

560 MCVList *result;

561 bool isnull;

562 Datum mcvlist;

563 HeapTuple htup = SearchSysCache2(STATEXTDATASTXOID,

564 ObjectIdGetDatum(mvoid), BoolGetDatum(inh));

565

566 if (!HeapTupleIsValid(htup))

567 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);

568

569 mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,

570 Anum_pg_statistic_ext_data_stxdmcv, &isnull);

571

572 if (isnull)

573 elog(ERROR,

574 "requested statistics kind \"%c\" is not yet built for statistics object %u",

575 STATS_EXT_MCV, mvoid);

576

577 result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));

578

579 ReleaseSysCache(htup);

580

581 return result;

582}

583

584

585/*

586 * statext_mcv_serialize

587 * Serialize MCV list into a pg_mcv_list value.

588 *

589 * The MCV items may include values of various data types, and it's reasonable

590 * to expect redundancy (values for a given attribute, repeated for multiple

591 * MCV list items). So we deduplicate the values into arrays, and then replace

592 * the values by indexes into those arrays.

593 *

594 * The overall structure of the serialized representation looks like this:

595 *

596 * +---------------+----------------+---------------------+-------+

598 * +---------------+----------------+---------------------+-------+

599 *

600 * Where dimension info stores information about the type of the K-th

601 * attribute (e.g. typlen, typbyval and length of deduplicated values).

602 * Deduplicated values store deduplicated values for each attribute. And

603 * items store the actual MCV list items, with values replaced by indexes into

604 * the arrays.

605 *

606 * When serializing the items, we use uint16 indexes. The number of MCV items

607 * is limited by the statistics target (which is capped to 10k at the moment).

608 * We might increase this to 65k and still fit into uint16, so there's a bit of

609 * slack. Furthermore, this limit is on the number of distinct values per column,

610 * and we usually have few of those (and various combinations of them for the

611 * those MCV list). So uint16 seems fine for now.

612 *

613 * We don't really expect the serialization to save as much space as for

614 * histograms, as we are not doing any bucket splits (which is the source

615 * of high redundancy in histograms).

616 *

617 * TODO: Consider packing boolean flags (NULL) for each item into a single char

618 * (or a longer type) instead of using an array of bool items.

619 */

620bytea *

621 statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)

622{

623 int i;

624 int dim;

625 int ndims = mcvlist->ndimensions;

626

627 SortSupport ssup;

628 DimensionInfo *info;

629

630 Size total_length;

631

632 /* serialized items (indexes into arrays, etc.) */

633 bytea *raw;

634 char *ptr;

635 char *endptr PG_USED_FOR_ASSERTS_ONLY;

636

637 /* values per dimension (and number of non-NULL values) */

638 Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);

639 int *counts = (int *) palloc0(sizeof(int) * ndims);

640

641 /*

642 * We'll include some rudimentary information about the attribute types

643 * (length, by-val flag), so that we don't have to look them up while

644 * deserializing the MCV list (we already have the type OID in the

645 * header). This is safe because when changing the type of the attribute

646 * the statistics gets dropped automatically. We need to store the info

647 * about the arrays of deduplicated values anyway.

648 */

649 info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);

650

651 /* sort support data for all attributes included in the MCV list */

652 ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);

653

654 /* collect and deduplicate values for each dimension (attribute) */

655 for (dim = 0; dim < ndims; dim++)

656 {

657 int ndistinct;

658 TypeCacheEntry *typentry;

659

660 /*

661 * Lookup the LT operator (can't get it from stats extra_data, as we

662 * don't know how to interpret that - scalar vs. array etc.).

663 */

664 typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);

665

666 /* copy important info about the data type (length, by-value) */

667 info[dim].typlen = stats[dim]->attrtype->typlen;

668 info[dim].typbyval = stats[dim]->attrtype->typbyval;

669

670 /* allocate space for values in the attribute and collect them */

671 values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);

672

673 for (i = 0; i < mcvlist->nitems; i++)

674 {

675 /* skip NULL values - we don't need to deduplicate those */

676 if (mcvlist->items[i].isnull[dim])

677 continue;

678

679 /* append the value at the end */

680 values[dim][counts[dim]] = mcvlist->items[i].values[dim];

681 counts[dim] += 1;

682 }

683

684 /* if there are just NULL values in this dimension, we're done */

685 if (counts[dim] == 0)

686 continue;

687

688 /* sort and deduplicate the data */

689 ssup[dim].ssup_cxt = CurrentMemoryContext;

690 ssup[dim].ssup_collation = stats[dim]->attrcollid;

691 ssup[dim].ssup_nulls_first = false;

692

693 PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);

694

695 qsort_interruptible(values[dim], counts[dim], sizeof(Datum),

696 compare_scalars_simple, &ssup[dim]);

697

698 /*

699 * Walk through the array and eliminate duplicate values, but keep the

700 * ordering (so that we can do a binary search later). We know there's

701 * at least one item as (counts[dim] != 0), so we can skip the first

702 * element.

703 */

704 ndistinct = 1; /* number of distinct values */

705 for (i = 1; i < counts[dim]; i++)

706 {

707 /* expect sorted array */

708 Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);

709

710 /* if the value is the same as the previous one, we can skip it */

711 if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))

712 continue;

713

714 values[dim][ndistinct] = values[dim][i];

715 ndistinct += 1;

716 }

717

718 /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */

719 Assert(ndistinct <= PG_UINT16_MAX);

720

721 /*

722 * Store additional info about the attribute - number of deduplicated

723 * values, and also size of the serialized data. For fixed-length data

724 * types this is trivial to compute, for varwidth types we need to

725 * actually walk the array and sum the sizes.

726 */

727 info[dim].nvalues = ndistinct;

728

729 if (info[dim].typbyval) /* by-value data types */

730 {

731 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;

732

733 /*

734 * We copy the data into the MCV item during deserialization, so

735 * we don't need to allocate any extra space.

736 */

737 info[dim].nbytes_aligned = 0;

738 }

739 else if (info[dim].typlen > 0) /* fixed-length by-ref */

740 {

741 /*

742 * We don't care about alignment in the serialized data, so we

743 * pack the data as much as possible. But we also track how much

744 * data will be needed after deserialization, and in that case we

745 * need to account for alignment of each item.

746 *

747 * Note: As the items are fixed-length, we could easily compute

748 * this during deserialization, but we do it here anyway.

749 */

750 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;

751 info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);

752 }

753 else if (info[dim].typlen == -1) /* varlena */

754 {

755 info[dim].nbytes = 0;

756 info[dim].nbytes_aligned = 0;

757 for (i = 0; i < info[dim].nvalues; i++)

758 {

759 Size len;

760

761 /*

762 * For varlena values, we detoast the values and store the

763 * length and data separately. We don't bother with alignment

764 * here, which means that during deserialization we need to

765 * copy the fields and only access the copies.

766 */

767 values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));

768

769 /* serialized length (uint32 length + data) */

770 len = VARSIZE_ANY_EXHDR(DatumGetPointer(values[dim][i]));

771 info[dim].nbytes += sizeof(uint32); /* length */

772 info[dim].nbytes += len; /* value (no header) */

773

774 /*

775 * During deserialization we'll build regular varlena values

776 * with full headers, and we need to align them properly.

777 */

778 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);

779 }

780 }

781 else if (info[dim].typlen == -2) /* cstring */

782 {

783 info[dim].nbytes = 0;

784 info[dim].nbytes_aligned = 0;

785 for (i = 0; i < info[dim].nvalues; i++)

786 {

787 Size len;

788

789 /*

790 * cstring is handled similar to varlena - first we store the

791 * length as uint32 and then the data. We don't care about

792 * alignment, which means that during deserialization we need

793 * to copy the fields and only access the copies.

794 */

795

796 /* c-strings include terminator, so +1 byte */

797 len = strlen(DatumGetCString(values[dim][i])) + 1;

798 info[dim].nbytes += sizeof(uint32); /* length */

799 info[dim].nbytes += len; /* value */

800

801 /* space needed for properly aligned deserialized copies */

802 info[dim].nbytes_aligned += MAXALIGN(len);

803 }

804 }

805

806 /* we know (count>0) so there must be some data */

807 Assert(info[dim].nbytes > 0);

808 }

809

810 /*

811 * Now we can finally compute how much space we'll actually need for the

812 * whole serialized MCV list (varlena header, MCV header, dimension info

813 * for each attribute, deduplicated values and items).

814 */

815 total_length = (3 * sizeof(uint32)) /* magic + type + nitems */

816 + sizeof(AttrNumber) /* ndimensions */

817 + (ndims * sizeof(Oid)); /* attribute types */

818

819 /* dimension info */

820 total_length += ndims * sizeof(DimensionInfo);

821

822 /* add space for the arrays of deduplicated values */

823 for (i = 0; i < ndims; i++)

824 total_length += info[i].nbytes;

825

826 /*

827 * And finally account for the items (those are fixed-length, thanks to

828 * replacing values with uint16 indexes into the deduplicated arrays).

829 */

830 total_length += mcvlist->nitems * ITEM_SIZE(dim);

831

832 /*

833 * Allocate space for the whole serialized MCV list (we'll skip bytes, so

834 * we set them to zero to make the result more compressible).

835 */

836 raw = (bytea *) palloc0(VARHDRSZ + total_length);

837 SET_VARSIZE(raw, VARHDRSZ + total_length);

838

839 ptr = VARDATA(raw);

840 endptr = ptr + total_length;

841

842 /* copy the MCV list header fields, one by one */

843 memcpy(ptr, &mcvlist->magic, sizeof(uint32));

844 ptr += sizeof(uint32);

845

846 memcpy(ptr, &mcvlist->type, sizeof(uint32));

847 ptr += sizeof(uint32);

848

849 memcpy(ptr, &mcvlist->nitems, sizeof(uint32));

850 ptr += sizeof(uint32);

851

852 memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));

853 ptr += sizeof(AttrNumber);

854

855 memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);

856 ptr += (sizeof(Oid) * ndims);

857

858 /* store information about the attributes (data amounts, ...) */

859 memcpy(ptr, info, sizeof(DimensionInfo) * ndims);

860 ptr += sizeof(DimensionInfo) * ndims;

861

862 /* Copy the deduplicated values for all attributes to the output. */

863 for (dim = 0; dim < ndims; dim++)

864 {

865 /* remember the starting point for Asserts later */

866 char *start PG_USED_FOR_ASSERTS_ONLY = ptr;

867

868 for (i = 0; i < info[dim].nvalues; i++)

869 {

870 Datum value = values[dim][i];

871

872 if (info[dim].typbyval) /* passed by value */

873 {

874 Datum tmp;

875

876 /*

877 * For byval types, we need to copy just the significant bytes

878 * - we can't use memcpy directly, as that assumes

879 * little-endian behavior. store_att_byval does almost what

880 * we need, but it requires a properly aligned buffer - the

881 * output buffer does not guarantee that. So we simply use a

882 * local Datum variable (which guarantees proper alignment),

883 * and then copy the value from it.

884 */

885 store_att_byval(&tmp, value, info[dim].typlen);

886

887 memcpy(ptr, &tmp, info[dim].typlen);

888 ptr += info[dim].typlen;

889 }

890 else if (info[dim].typlen > 0) /* passed by reference */

891 {

892 /* no special alignment needed, treated as char array */

893 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);

894 ptr += info[dim].typlen;

895 }

896 else if (info[dim].typlen == -1) /* varlena */

897 {

898 uint32 len = VARSIZE_ANY_EXHDR(DatumGetPointer(value));

899

900 /* copy the length */

901 memcpy(ptr, &len, sizeof(uint32));

902 ptr += sizeof(uint32);

903

904 /* data from the varlena value (without the header) */

905 memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);

906 ptr += len;

907 }

908 else if (info[dim].typlen == -2) /* cstring */

909 {

910 uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;

911

912 /* copy the length */

913 memcpy(ptr, &len, sizeof(uint32));

914 ptr += sizeof(uint32);

915

916 /* value */

917 memcpy(ptr, DatumGetCString(value), len);

918 ptr += len;

919 }

920

921 /* no underflows or overflows */

922 Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));

923 }

924

925 /* we should get exactly nbytes of data for this dimension */

926 Assert((ptr - start) == info[dim].nbytes);

927 }

928

929 /* Serialize the items, with uint16 indexes instead of the values. */

930 for (i = 0; i < mcvlist->nitems; i++)

931 {

932 MCVItem *mcvitem = &mcvlist->items[i];

933

934 /* don't write beyond the allocated space */

935 Assert(ptr <= (endptr - ITEM_SIZE(dim)));

936

937 /* copy NULL and frequency flags into the serialized MCV */

938 memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);

939 ptr += sizeof(bool) * ndims;

940

941 memcpy(ptr, &mcvitem->frequency, sizeof(double));

942 ptr += sizeof(double);

943

944 memcpy(ptr, &mcvitem->base_frequency, sizeof(double));

945 ptr += sizeof(double);

946

947 /* store the indexes last */

948 for (dim = 0; dim < ndims; dim++)

949 {

950 uint16 index = 0;

951 Datum *value;

952

953 /* do the lookup only for non-NULL values */

954 if (!mcvitem->isnull[dim])

955 {

956 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],

957 info[dim].nvalues, sizeof(Datum),

958 compare_scalars_simple, &ssup[dim]);

959

960 Assert(value != NULL); /* serialization or deduplication

961 * error */

962

963 /* compute index within the deduplicated array */

964 index = (uint16) (value - values[dim]);

965

966 /* check the index is within expected bounds */

967 Assert(index < info[dim].nvalues);

968 }

969

970 /* copy the index into the serialized MCV */

971 memcpy(ptr, &index, sizeof(uint16));

972 ptr += sizeof(uint16);

973 }

974

975 /* make sure we don't overflow the allocated value */

976 Assert(ptr <= endptr);

977 }

978

979 /* at this point we expect to match the total_length exactly */

980 Assert(ptr == endptr);

981

982 pfree(values);

983 pfree(counts);

984

985 return raw;

986}

987

988/*

989 * statext_mcv_deserialize

990 * Reads serialized MCV list into MCVList structure.

991 *

992 * All the memory needed by the MCV list is allocated as a single chunk, so

993 * it's possible to simply pfree() it at once.

994 */

995MCVList *

996 statext_mcv_deserialize(bytea *data)

997{

998 int dim,

999 i;

1000 Size expected_size;

1001 MCVList *mcvlist;

1002 char *raw;

1003 char *ptr;

1004 char *endptr PG_USED_FOR_ASSERTS_ONLY;

1005

1006 int ndims,

1007 nitems;

1008 DimensionInfo *info = NULL;

1009

1010 /* local allocation buffer (used only for deserialization) */

1011 Datum **map = NULL;

1012

1013 /* MCV list */

1014 Size mcvlen;

1015

1016 /* buffer used for the result */

1017 Size datalen;

1018 char *dataptr;

1019 char *valuesptr;

1020 char *isnullptr;

1021

1022 if (data == NULL)

1023 return NULL;

1024

1025 /*

1026 * We can't possibly deserialize a MCV list if there's not even a complete

1027 * header. We need an explicit formula here, because we serialize the

1028 * header fields one by one, so we need to ignore struct alignment.

1029 */

1030 if (VARSIZE_ANY(data) < MinSizeOfMCVList)

1031 elog(ERROR, "invalid MCV size %zu (expected at least %zu)",

1032 VARSIZE_ANY(data), MinSizeOfMCVList);

1033

1034 /* read the MCV list header */

1035 mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));

1036

1037 /* pointer to the data part (skip the varlena header) */

1038 raw = (char *) data;

1039 ptr = VARDATA_ANY(raw);

1040 endptr = (char *) raw + VARSIZE_ANY(data);

1041

1042 /* get the header and perform further sanity checks */

1043 memcpy(&mcvlist->magic, ptr, sizeof(uint32));

1044 ptr += sizeof(uint32);

1045

1046 memcpy(&mcvlist->type, ptr, sizeof(uint32));

1047 ptr += sizeof(uint32);

1048

1049 memcpy(&mcvlist->nitems, ptr, sizeof(uint32));

1050 ptr += sizeof(uint32);

1051

1052 memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));

1053 ptr += sizeof(AttrNumber);

1054

1055 if (mcvlist->magic != STATS_MCV_MAGIC)

1056 elog(ERROR, "invalid MCV magic %u (expected %u)",

1057 mcvlist->magic, STATS_MCV_MAGIC);

1058

1059 if (mcvlist->type != STATS_MCV_TYPE_BASIC)

1060 elog(ERROR, "invalid MCV type %u (expected %u)",

1061 mcvlist->type, STATS_MCV_TYPE_BASIC);

1062

1063 if (mcvlist->ndimensions == 0)

1064 elog(ERROR, "invalid zero-length dimension array in MCVList");

1065 else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||

1066 (mcvlist->ndimensions < 0))

1067 elog(ERROR, "invalid length (%d) dimension array in MCVList",

1068 mcvlist->ndimensions);

1069

1070 if (mcvlist->nitems == 0)

1071 elog(ERROR, "invalid zero-length item array in MCVList");

1072 else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)

1073 elog(ERROR, "invalid length (%u) item array in MCVList",

1074 mcvlist->nitems);

1075

1076 nitems = mcvlist->nitems;

1077 ndims = mcvlist->ndimensions;

1078

1079 /*

1080 * Check amount of data including DimensionInfo for all dimensions and

1081 * also the serialized items (including uint16 indexes). Also, walk

1082 * through the dimension information and add it to the sum.

1083 */

1084 expected_size = SizeOfMCVList(ndims, nitems);

1085

1086 /*

1087 * Check that we have at least the dimension and info records, along with

1088 * the items. We don't know the size of the serialized values yet. We need

1089 * to do this check first, before accessing the dimension info.

1090 */

1091 if (VARSIZE_ANY(data) < expected_size)

1092 elog(ERROR, "invalid MCV size %zu (expected %zu)",

1093 VARSIZE_ANY(data), expected_size);

1094

1095 /* Now copy the array of type Oids. */

1096 memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);

1097 ptr += (sizeof(Oid) * ndims);

1098

1099 /* Now it's safe to access the dimension info. */

1100 info = palloc(ndims * sizeof(DimensionInfo));

1101

1102 memcpy(info, ptr, ndims * sizeof(DimensionInfo));

1103 ptr += (ndims * sizeof(DimensionInfo));

1104

1105 /* account for the value arrays */

1106 for (dim = 0; dim < ndims; dim++)

1107 {

1108 /*

1109 * XXX I wonder if we can/should rely on asserts here. Maybe those

1110 * checks should be done every time?

1111 */

1112 Assert(info[dim].nvalues >= 0);

1113 Assert(info[dim].nbytes >= 0);

1114

1115 expected_size += info[dim].nbytes;

1116 }

1117

1118 /*

1119 * Now we know the total expected MCV size, including all the pieces

1120 * (header, dimension info. items and deduplicated data). So do the final

1121 * check on size.

1122 */

1123 if (VARSIZE_ANY(data) != expected_size)

1124 elog(ERROR, "invalid MCV size %zu (expected %zu)",

1125 VARSIZE_ANY(data), expected_size);

1126

1127 /*

1128 * We need an array of Datum values for each dimension, so that we can

1129 * easily translate the uint16 indexes later. We also need a top-level

1130 * array of pointers to those per-dimension arrays.

1131 *

1132 * While allocating the arrays for dimensions, compute how much space we

1133 * need for a copy of the by-ref data, as we can't simply point to the

1134 * original values (it might go away).

1135 */

1136 datalen = 0; /* space for by-ref data */

1137 map = (Datum **) palloc(ndims * sizeof(Datum *));

1138

1139 for (dim = 0; dim < ndims; dim++)

1140 {

1141 map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);

1142

1143 /* space needed for a copy of data for by-ref types */

1144 datalen += info[dim].nbytes_aligned;

1145 }

1146

1147 /*

1148 * Now resize the MCV list so that the allocation includes all the data.

1149 *

1150 * Allocate space for a copy of the data, as we can't simply reference the

1151 * serialized data - it's not aligned properly, and it may disappear while

1152 * we're still using the MCV list, e.g. due to catcache release.

1153 *

1154 * We do care about alignment here, because we will allocate all the

1155 * pieces at once, but then use pointers to different parts.

1156 */

1157 mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));

1158

1159 /* arrays of values and isnull flags for all MCV items */

1160 mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);

1161 mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);

1162

1163 /* we don't quite need to align this, but it makes some asserts easier */

1164 mcvlen += MAXALIGN(datalen);

1165

1166 /* now resize the deserialized MCV list, and compute pointers to parts */

1167 mcvlist = repalloc(mcvlist, mcvlen);

1168

1169 /* pointer to the beginning of values/isnull arrays */

1170 valuesptr = (char *) mcvlist

1171 + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));

1172

1173 isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));

1174

1175 dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));

1176

1177 /*

1178 * Build mapping (index => value) for translating the serialized data into

1179 * the in-memory representation.

1180 */

1181 for (dim = 0; dim < ndims; dim++)

1182 {

1183 /* remember start position in the input array */

1184 char *start PG_USED_FOR_ASSERTS_ONLY = ptr;

1185

1186 if (info[dim].typbyval)

1187 {

1188 /* for by-val types we simply copy data into the mapping */

1189 for (i = 0; i < info[dim].nvalues; i++)

1190 {

1191 Datum v = 0;

1192

1193 memcpy(&v, ptr, info[dim].typlen);

1194 ptr += info[dim].typlen;

1195

1196 map[dim][i] = fetch_att(&v, true, info[dim].typlen);

1197

1198 /* no under/overflow of input array */

1199 Assert(ptr <= (start + info[dim].nbytes));

1200 }

1201 }

1202 else

1203 {

1204 /* for by-ref types we need to also make a copy of the data */

1205

1206 /* passed by reference, but fixed length (name, tid, ...) */

1207 if (info[dim].typlen > 0)

1208 {

1209 for (i = 0; i < info[dim].nvalues; i++)

1210 {

1211 memcpy(dataptr, ptr, info[dim].typlen);

1212 ptr += info[dim].typlen;

1213

1214 /* just point into the array */

1215 map[dim][i] = PointerGetDatum(dataptr);

1216 dataptr += MAXALIGN(info[dim].typlen);

1217 }

1218 }

1219 else if (info[dim].typlen == -1)

1220 {

1221 /* varlena */

1222 for (i = 0; i < info[dim].nvalues; i++)

1223 {

1224 uint32 len;

1225

1226 /* read the uint32 length */

1227 memcpy(&len, ptr, sizeof(uint32));

1228 ptr += sizeof(uint32);

1229

1230 /* the length is data-only */

1231 SET_VARSIZE(dataptr, len + VARHDRSZ);

1232 memcpy(VARDATA(dataptr), ptr, len);

1233 ptr += len;

1234

1235 /* just point into the array */

1236 map[dim][i] = PointerGetDatum(dataptr);

1237

1238 /* skip to place of the next deserialized value */

1239 dataptr += MAXALIGN(len + VARHDRSZ);

1240 }

1241 }

1242 else if (info[dim].typlen == -2)

1243 {

1244 /* cstring */

1245 for (i = 0; i < info[dim].nvalues; i++)

1246 {

1247 uint32 len;

1248

1249 memcpy(&len, ptr, sizeof(uint32));

1250 ptr += sizeof(uint32);

1251

1252 memcpy(dataptr, ptr, len);

1253 ptr += len;

1254

1255 /* just point into the array */

1256 map[dim][i] = PointerGetDatum(dataptr);

1257 dataptr += MAXALIGN(len);

1258 }

1259 }

1260

1261 /* no under/overflow of input array */

1262 Assert(ptr <= (start + info[dim].nbytes));

1263

1264 /* no overflow of the output mcv value */

1265 Assert(dataptr <= ((char *) mcvlist + mcvlen));

1266 }

1267

1268 /* check we consumed input data for this dimension exactly */

1269 Assert(ptr == (start + info[dim].nbytes));

1270 }

1271

1272 /* we should have also filled the MCV list exactly */

1273 Assert(dataptr == ((char *) mcvlist + mcvlen));

1274

1275 /* deserialize the MCV items and translate the indexes to Datums */

1276 for (i = 0; i < nitems; i++)

1277 {

1278 MCVItem *item = &mcvlist->items[i];

1279

1280 item->values = (Datum *) valuesptr;

1281 valuesptr += MAXALIGN(sizeof(Datum) * ndims);

1282

1283 item->isnull = (bool *) isnullptr;

1284 isnullptr += MAXALIGN(sizeof(bool) * ndims);

1285

1286 memcpy(item->isnull, ptr, sizeof(bool) * ndims);

1287 ptr += sizeof(bool) * ndims;

1288

1289 memcpy(&item->frequency, ptr, sizeof(double));

1290 ptr += sizeof(double);

1291

1292 memcpy(&item->base_frequency, ptr, sizeof(double));

1293 ptr += sizeof(double);

1294

1295 /* finally translate the indexes (for non-NULL only) */

1296 for (dim = 0; dim < ndims; dim++)

1297 {

1298 uint16 index;

1299

1300 memcpy(&index, ptr, sizeof(uint16));

1301 ptr += sizeof(uint16);

1302

1303 if (item->isnull[dim])

1304 continue;

1305

1306 item->values[dim] = map[dim][index];

1307 }

1308

1309 /* check we're not overflowing the input */

1310 Assert(ptr <= endptr);

1311 }

1312

1313 /* check that we processed all the data */

1314 Assert(ptr == endptr);

1315

1316 /* release the buffers used for mapping */

1317 for (dim = 0; dim < ndims; dim++)

1318 pfree(map[dim]);

1319

1320 pfree(map);

1321

1322 return mcvlist;

1323}

1324

1325/*

1326 * SRF with details about buckets of a histogram:

1327 *

1328 * - item ID (0...nitems)

1329 * - values (string array)

1330 * - nulls only (boolean array)

1331 * - frequency (double precision)

1332 * - base_frequency (double precision)

1333 *

1334 * The input is the OID of the statistics, and there are no rows returned if

1335 * the statistics contains no histogram.

1336 */

1337Datum

1338 pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)

1339{

1340 FuncCallContext *funcctx;

1341

1342 /* stuff done only on the first call of the function */

1343 if (SRF_IS_FIRSTCALL())

1344 {

1345 MemoryContext oldcontext;

1346 MCVList *mcvlist;

1347 TupleDesc tupdesc;

1348

1349 /* create a function context for cross-call persistence */

1350 funcctx = SRF_FIRSTCALL_INIT();

1351

1352 /* switch to memory context appropriate for multiple function calls */

1353 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

1354

1355 mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0));

1356

1357 funcctx->user_fctx = mcvlist;

1358

1359 /* total number of tuples to be returned */

1360 funcctx->max_calls = 0;

1361 if (funcctx->user_fctx != NULL)

1362 funcctx->max_calls = mcvlist->nitems;

1363

1364 /* Build a tuple descriptor for our result type */

1365 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)

1366 ereport(ERROR,

1367 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),

1368 errmsg("function returning record called in context "

1369 "that cannot accept type record")));

1370 tupdesc = BlessTupleDesc(tupdesc);

1371

1372 /*

1373 * generate attribute metadata needed later to produce tuples from raw

1374 * C strings

1375 */

1376 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);

1377

1378 MemoryContextSwitchTo(oldcontext);

1379 }

1380

1381 /* stuff done on every call of the function */

1382 funcctx = SRF_PERCALL_SETUP();

1383

1384 if (funcctx->call_cntr < funcctx->max_calls) /* do when there is more

1385 * left to send */

1386 {

1387 Datum values[5];

1388 bool nulls[5];

1389 HeapTuple tuple;

1390 Datum result;

1391 ArrayBuildState *astate_values = NULL;

1392 ArrayBuildState *astate_nulls = NULL;

1393

1394 int i;

1395 MCVList *mcvlist;

1396 MCVItem *item;

1397

1398 mcvlist = (MCVList *) funcctx->user_fctx;

1399

1400 Assert(funcctx->call_cntr < mcvlist->nitems);

1401

1402 item = &mcvlist->items[funcctx->call_cntr];

1403

1404 for (i = 0; i < mcvlist->ndimensions; i++)

1405 {

1406

1407 astate_nulls = accumArrayResult(astate_nulls,

1408 BoolGetDatum(item->isnull[i]),

1409 false,

1410 BOOLOID,

1411 CurrentMemoryContext);

1412

1413 if (!item->isnull[i])

1414 {

1415 bool isvarlena;

1416 Oid outfunc;

1417 FmgrInfo fmgrinfo;

1418 Datum val;

1419 text *txt;

1420

1421 /* lookup output func for the type */

1422 getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);

1423 fmgr_info(outfunc, &fmgrinfo);

1424

1425 val = FunctionCall1(&fmgrinfo, item->values[i]);

1426 txt = cstring_to_text(DatumGetPointer(val));

1427

1428 astate_values = accumArrayResult(astate_values,

1429 PointerGetDatum(txt),

1430 false,

1431 TEXTOID,

1432 CurrentMemoryContext);

1433 }

1434 else

1435 astate_values = accumArrayResult(astate_values,

1436 (Datum) 0,

1437 true,

1438 TEXTOID,

1439 CurrentMemoryContext);

1440 }

1441

1442 values[0] = Int32GetDatum(funcctx->call_cntr);

1443 values[1] = makeArrayResult(astate_values, CurrentMemoryContext);

1444 values[2] = makeArrayResult(astate_nulls, CurrentMemoryContext);

1445 values[3] = Float8GetDatum(item->frequency);

1446 values[4] = Float8GetDatum(item->base_frequency);

1447

1448 /* no NULLs in the tuple */

1449 memset(nulls, 0, sizeof(nulls));

1450

1451 /* build a tuple */

1452 tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);

1453

1454 /* make the tuple into a datum */

1455 result = HeapTupleGetDatum(tuple);

1456

1457 SRF_RETURN_NEXT(funcctx, result);

1458 }

1459 else /* do when there is no more left */

1460 {

1461 SRF_RETURN_DONE(funcctx);

1462 }

1463}

1464

1465/*

1466 * pg_mcv_list_in - input routine for type pg_mcv_list.

1467 *

1468 * pg_mcv_list is real enough to be a table column, but it has no operations

1469 * of its own, and disallows input too

1470 */

1471Datum

1472 pg_mcv_list_in(PG_FUNCTION_ARGS)

1473{

1474 /*

1475 * pg_mcv_list stores the data in binary form and parsing text input is

1476 * not needed, so disallow this.

1477 */

1478 ereport(ERROR,

1479 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),

1480 errmsg("cannot accept a value of type %s", "pg_mcv_list")));

1481

1482 PG_RETURN_VOID(); /* keep compiler quiet */

1483}

1484

1485

1486/*

1487 * pg_mcv_list_out - output routine for type pg_mcv_list.

1488 *

1489 * MCV lists are serialized into a bytea value, so we simply call byteaout()

1490 * to serialize the value into text. But it'd be nice to serialize that into

1491 * a meaningful representation (e.g. for inspection by people).

1492 *

1493 * XXX This should probably return something meaningful, similar to what

1494 * pg_dependencies_out does. Not sure how to deal with the deduplicated

1495 * values, though - do we want to expand that or not?

1496 */

1497Datum

1498 pg_mcv_list_out(PG_FUNCTION_ARGS)

1499{

1500 return byteaout(fcinfo);

1501}

1502

1503/*

1504 * pg_mcv_list_recv - binary input routine for type pg_mcv_list.

1505 */

1506Datum

1507 pg_mcv_list_recv(PG_FUNCTION_ARGS)

1508{

1509 ereport(ERROR,

1510 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),

1511 errmsg("cannot accept a value of type %s", "pg_mcv_list")));

1512

1513 PG_RETURN_VOID(); /* keep compiler quiet */

1514}

1515

1516/*

1517 * pg_mcv_list_send - binary output routine for type pg_mcv_list.

1518 *

1519 * MCV lists are serialized in a bytea value (although the type is named

1520 * differently), so let's just send that.

1521 */

1522Datum

1523 pg_mcv_list_send(PG_FUNCTION_ARGS)

1524{

1525 return byteasend(fcinfo);

1526}

1527

1528/*

1529 * match the attribute/expression to a dimension of the statistic

1530 *

1531 * Returns the zero-based index of the matching statistics dimension.

1532 * Optionally determines the collation.

1533 */

1534static int

1535 mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)

1536{

1537 int idx;

1538

1539 if (IsA(expr, Var))

1540 {

1541 /* simple Var, so just lookup using varattno */

1542 Var *var = (Var *) expr;

1543

1544 if (collid)

1545 *collid = var->varcollid;

1546

1547 idx = bms_member_index(keys, var->varattno);

1548

1549 if (idx < 0)

1550 elog(ERROR, "variable not found in statistics object");

1551 }

1552 else

1553 {

1554 /* expression - lookup in stats expressions */

1555 ListCell *lc;

1556

1557 if (collid)

1558 *collid = exprCollation(expr);

1559

1560 /* expressions are stored after the simple columns */

1561 idx = bms_num_members(keys);

1562 foreach(lc, exprs)

1563 {

1564 Node *stat_expr = (Node *) lfirst(lc);

1565

1566 if (equal(expr, stat_expr))

1567 break;

1568

1569 idx++;

1570 }

1571

1572 if (lc == NULL)

1573 elog(ERROR, "expression not found in statistics object");

1574 }

1575

1576 return idx;

1577}

1578

1579/*

1580 * mcv_get_match_bitmap

1581 * Evaluate clauses using the MCV list, and update the match bitmap.

1582 *

1583 * A match bitmap keeps match/mismatch status for each MCV item, and we

1584 * update it based on additional clauses. We also use it to skip items

1585 * that can't possibly match (e.g. item marked as "mismatch" can't change

1586 * to "match" when evaluating AND clause list).

1587 *

1588 * The function also returns a flag indicating whether there was an

1589 * equality condition for all attributes, the minimum frequency in the MCV

1590 * list, and a total MCV frequency (sum of frequencies for all items).

1591 *

1592 * XXX Currently the match bitmap uses a bool for each MCV item, which is

1593 * somewhat wasteful as we could do with just a single bit, thus reducing

1594 * the size to ~1/8. It would also allow us to combine bitmaps simply using

1595 * & and |, which should be faster than min/max. The bitmaps are fairly

1596 * small, though (thanks to the cap on the MCV list size).

1597 */

1598static bool *

1599 mcv_get_match_bitmap(PlannerInfo *root, List *clauses,

1600 Bitmapset *keys, List *exprs,

1601 MCVList *mcvlist, bool is_or)

1602{

1603 ListCell *l;

1604 bool *matches;

1605

1606 /* The bitmap may be partially built. */

1607 Assert(clauses != NIL);

1608 Assert(mcvlist != NULL);

1609 Assert(mcvlist->nitems > 0);

1610 Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);

1611

1612 matches = palloc(sizeof(bool) * mcvlist->nitems);

1613 memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);

1614

1615 /*

1616 * Loop through the list of clauses, and for each of them evaluate all the

1617 * MCV items not yet eliminated by the preceding clauses.

1618 */

1619 foreach(l, clauses)

1620 {

1621 Node *clause = (Node *) lfirst(l);

1622

1623 /* if it's a RestrictInfo, then extract the clause */

1624 if (IsA(clause, RestrictInfo))

1625 clause = (Node *) ((RestrictInfo *) clause)->clause;

1626

1627 /*

1628 * Handle the various types of clauses - OpClause, NullTest and

1629 * AND/OR/NOT

1630 */

1631 if (is_opclause(clause))

1632 {

1633 OpExpr *expr = (OpExpr *) clause;

1634 FmgrInfo opproc;

1635

1636 /* valid only after examine_opclause_args returns true */

1637 Node *clause_expr;

1638 Const *cst;

1639 bool expronleft;

1640 int idx;

1641 Oid collid;

1642

1643 fmgr_info(get_opcode(expr->opno), &opproc);

1644

1645 /* extract the var/expr and const from the expression */

1646 if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))

1647 elog(ERROR, "incompatible clause");

1648

1649 /* match the attribute/expression to a dimension of the statistic */

1650 idx = mcv_match_expression(clause_expr, keys, exprs, &collid);

1651

1652 /*

1653 * Walk through the MCV items and evaluate the current clause. We

1654 * can skip items that were already ruled out, and terminate if

1655 * there are no remaining MCV items that might possibly match.

1656 */

1657 for (int i = 0; i < mcvlist->nitems; i++)

1658 {

1659 bool match = true;

1660 MCVItem *item = &mcvlist->items[i];

1661

1662 Assert(idx >= 0);

1663

1664 /*

1665 * When the MCV item or the Const value is NULL we can treat

1666 * this as a mismatch. We must not call the operator because

1667 * of strictness.

1668 */

1669 if (item->isnull[idx] || cst->constisnull)

1670 {

1671 matches[i] = RESULT_MERGE(matches[i], is_or, false);

1672 continue;

1673 }

1674

1675 /*

1676 * Skip MCV items that can't change result in the bitmap. Once

1677 * the value gets false for AND-lists, or true for OR-lists,

1678 * we don't need to look at more clauses.

1679 */

1680 if (RESULT_IS_FINAL(matches[i], is_or))

1681 continue;

1682

1683 /*

1684 * First check whether the constant is below the lower

1685 * boundary (in that case we can skip the bucket, because

1686 * there's no overlap).

1687 *

1688 * We don't store collations used to build the statistics, but

1689 * we can use the collation for the attribute itself, as

1690 * stored in varcollid. We do reset the statistics after a

1691 * type change (including collation change), so this is OK.

1692 * For expressions, we use the collation extracted from the

1693 * expression itself.

1694 */

1695 if (expronleft)

1696 match = DatumGetBool(FunctionCall2Coll(&opproc,

1697 collid,

1698 item->values[idx],

1699 cst->constvalue));

1700 else

1701 match = DatumGetBool(FunctionCall2Coll(&opproc,

1702 collid,

1703 cst->constvalue,

1704 item->values[idx]));

1705

1706 /* update the match bitmap with the result */

1707 matches[i] = RESULT_MERGE(matches[i], is_or, match);

1708 }

1709 }

1710 else if (IsA(clause, ScalarArrayOpExpr))

1711 {

1712 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;

1713 FmgrInfo opproc;

1714

1715 /* valid only after examine_opclause_args returns true */

1716 Node *clause_expr;

1717 Const *cst;

1718 bool expronleft;

1719 Oid collid;

1720 int idx;

1721

1722 /* array evaluation */

1723 ArrayType *arrayval;

1724 int16 elmlen;

1725 bool elmbyval;

1726 char elmalign;

1727 int num_elems;

1728 Datum *elem_values;

1729 bool *elem_nulls;

1730

1731 fmgr_info(get_opcode(expr->opno), &opproc);

1732

1733 /* extract the var/expr and const from the expression */

1734 if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))

1735 elog(ERROR, "incompatible clause");

1736

1737 /* We expect Var on left */

1738 if (!expronleft)

1739 elog(ERROR, "incompatible clause");

1740

1741 /*

1742 * Deconstruct the array constant, unless it's NULL (we'll cover

1743 * that case below)

1744 */

1745 if (!cst->constisnull)

1746 {

1747 arrayval = DatumGetArrayTypeP(cst->constvalue);

1748 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),

1749 &elmlen, &elmbyval, &elmalign);

1750 deconstruct_array(arrayval,

1751 ARR_ELEMTYPE(arrayval),

1752 elmlen, elmbyval, elmalign,

1753 &elem_values, &elem_nulls, &num_elems);

1754 }

1755

1756 /* match the attribute/expression to a dimension of the statistic */

1757 idx = mcv_match_expression(clause_expr, keys, exprs, &collid);

1758

1759 /*

1760 * Walk through the MCV items and evaluate the current clause. We

1761 * can skip items that were already ruled out, and terminate if

1762 * there are no remaining MCV items that might possibly match.

1763 */

1764 for (int i = 0; i < mcvlist->nitems; i++)

1765 {

1766 int j;

1767 bool match = !expr->useOr;

1768 MCVItem *item = &mcvlist->items[i];

1769

1770 /*

1771 * When the MCV item or the Const value is NULL we can treat

1772 * this as a mismatch. We must not call the operator because

1773 * of strictness.

1774 */

1775 if (item->isnull[idx] || cst->constisnull)

1776 {

1777 matches[i] = RESULT_MERGE(matches[i], is_or, false);

1778 continue;

1779 }

1780

1781 /*

1782 * Skip MCV items that can't change result in the bitmap. Once

1783 * the value gets false for AND-lists, or true for OR-lists,

1784 * we don't need to look at more clauses.

1785 */

1786 if (RESULT_IS_FINAL(matches[i], is_or))

1787 continue;

1788

1789 for (j = 0; j < num_elems; j++)

1790 {

1791 Datum elem_value = elem_values[j];

1792 bool elem_isnull = elem_nulls[j];

1793 bool elem_match;

1794

1795 /* NULL values always evaluate as not matching. */

1796 if (elem_isnull)

1797 {

1798 match = RESULT_MERGE(match, expr->useOr, false);

1799 continue;

1800 }

1801

1802 /*

1803 * Stop evaluating the array elements once we reach a

1804 * matching value that can't change - ALL() is the same as

1805 * AND-list, ANY() is the same as OR-list.

1806 */

1807 if (RESULT_IS_FINAL(match, expr->useOr))

1808 break;

1809

1810 elem_match = DatumGetBool(FunctionCall2Coll(&opproc,

1811 collid,

1812 item->values[idx],

1813 elem_value));

1814

1815 match = RESULT_MERGE(match, expr->useOr, elem_match);

1816 }

1817

1818 /* update the match bitmap with the result */

1819 matches[i] = RESULT_MERGE(matches[i], is_or, match);

1820 }

1821 }

1822 else if (IsA(clause, NullTest))

1823 {

1824 NullTest *expr = (NullTest *) clause;

1825 Node *clause_expr = (Node *) (expr->arg);

1826

1827 /* match the attribute/expression to a dimension of the statistic */

1828 int idx = mcv_match_expression(clause_expr, keys, exprs, NULL);

1829

1830 /*

1831 * Walk through the MCV items and evaluate the current clause. We

1832 * can skip items that were already ruled out, and terminate if

1833 * there are no remaining MCV items that might possibly match.

1834 */

1835 for (int i = 0; i < mcvlist->nitems; i++)

1836 {

1837 bool match = false; /* assume mismatch */

1838 MCVItem *item = &mcvlist->items[i];

1839

1840 /* if the clause mismatches the MCV item, update the bitmap */

1841 switch (expr->nulltesttype)

1842 {

1843 case IS_NULL:

1844 match = (item->isnull[idx]) ? true : match;

1845 break;

1846

1847 case IS_NOT_NULL:

1848 match = (!item->isnull[idx]) ? true : match;

1849 break;

1850 }

1851

1852 /* now, update the match bitmap, depending on OR/AND type */

1853 matches[i] = RESULT_MERGE(matches[i], is_or, match);

1854 }

1855 }

1856 else if (is_orclause(clause) || is_andclause(clause))

1857 {

1858 /* AND/OR clause, with all subclauses being compatible */

1859

1860 int i;

1861 BoolExpr *bool_clause = ((BoolExpr *) clause);

1862 List *bool_clauses = bool_clause->args;

1863

1864 /* match/mismatch bitmap for each MCV item */

1865 bool *bool_matches = NULL;

1866

1867 Assert(bool_clauses != NIL);

1868 Assert(list_length(bool_clauses) >= 2);

1869

1870 /* build the match bitmap for the OR-clauses */

1871 bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,

1872 mcvlist, is_orclause(clause));

1873

1874 /*

1875 * Merge the bitmap produced by mcv_get_match_bitmap into the

1876 * current one. We need to consider if we're evaluating AND or OR

1877 * condition when merging the results.

1878 */

1879 for (i = 0; i < mcvlist->nitems; i++)

1880 matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);

1881

1882 pfree(bool_matches);

1883 }

1884 else if (is_notclause(clause))

1885 {

1886 /* NOT clause, with all subclauses compatible */

1887

1888 int i;

1889 BoolExpr *not_clause = ((BoolExpr *) clause);

1890 List *not_args = not_clause->args;

1891

1892 /* match/mismatch bitmap for each MCV item */

1893 bool *not_matches = NULL;

1894

1895 Assert(not_args != NIL);

1896 Assert(list_length(not_args) == 1);

1897

1898 /* build the match bitmap for the NOT-clause */

1899 not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,

1900 mcvlist, false);

1901

1902 /*

1903 * Merge the bitmap produced by mcv_get_match_bitmap into the

1904 * current one. We're handling a NOT clause, so invert the result

1905 * before merging it into the global bitmap.

1906 */

1907 for (i = 0; i < mcvlist->nitems; i++)

1908 matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);

1909

1910 pfree(not_matches);

1911 }

1912 else if (IsA(clause, Var))

1913 {

1914 /* Var (has to be a boolean Var, possibly from below NOT) */

1915

1916 Var *var = (Var *) (clause);

1917

1918 /* match the attribute to a dimension of the statistic */

1919 int idx = bms_member_index(keys, var->varattno);

1920

1921 Assert(var->vartype == BOOLOID);

1922

1923 /*

1924 * Walk through the MCV items and evaluate the current clause. We

1925 * can skip items that were already ruled out, and terminate if

1926 * there are no remaining MCV items that might possibly match.

1927 */

1928 for (int i = 0; i < mcvlist->nitems; i++)

1929 {

1930 MCVItem *item = &mcvlist->items[i];

1931 bool match = false;

1932

1933 /* if the item is NULL, it's a mismatch */

1934 if (!item->isnull[idx] && DatumGetBool(item->values[idx]))

1935 match = true;

1936

1937 /* update the result bitmap */

1938 matches[i] = RESULT_MERGE(matches[i], is_or, match);

1939 }

1940 }

1941 else

1942 {

1943 /* Otherwise, it must be a bare boolean-returning expression */

1944 int idx;

1945

1946 /* match the expression to a dimension of the statistic */

1947 idx = mcv_match_expression(clause, keys, exprs, NULL);

1948

1949 /*

1950 * Walk through the MCV items and evaluate the current clause. We

1951 * can skip items that were already ruled out, and terminate if

1952 * there are no remaining MCV items that might possibly match.

1953 */

1954 for (int i = 0; i < mcvlist->nitems; i++)

1955 {

1956 bool match;

1957 MCVItem *item = &mcvlist->items[i];

1958

1959 /* "match" just means it's bool TRUE */

1960 match = !item->isnull[idx] && DatumGetBool(item->values[idx]);

1961

1962 /* now, update the match bitmap, depending on OR/AND type */

1963 matches[i] = RESULT_MERGE(matches[i], is_or, match);

1964 }

1965 }

1966 }

1967

1968 return matches;

1969}

1970

1971

1972/*

1973 * mcv_combine_selectivities

1974 * Combine per-column and multi-column MCV selectivity estimates.

1975 *

1976 * simple_sel is a "simple" selectivity estimate (produced without using any

1977 * extended statistics, essentially assuming independence of columns/clauses).

1978 *

1979 * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of

1980 * all matching MCV items. The difference (mcv_sel - mcv_basesel) is then

1981 * essentially interpreted as a correction to be added to simple_sel, as

1982 * described below.

1983 *

1984 * mcv_totalsel is the sum of the frequencies of all MCV items (not just the

1985 * matching ones). This is used as an upper bound on the portion of the

1986 * selectivity estimates not covered by the MCV statistics.

1987 *

1988 * Note: While simple and base selectivities are defined in a quite similar

1989 * way, the values are computed differently and are not therefore equal. The

1990 * simple selectivity is computed as a product of per-clause estimates, while

1991 * the base selectivity is computed by adding up base frequencies of matching

1992 * items of the multi-column MCV list. So the values may differ for two main

1993 * reasons - (a) the MCV list may not cover 100% of the data and (b) some of

1994 * the MCV items did not match the estimated clauses.

1995 *

1996 * As both (a) and (b) reduce the base selectivity value, it generally holds

1997 * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the

1998 * values may be equal.

1999 *

2000 * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not

2001 * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a

2002 * correction for the part covered by the MCV list. Those two statements are

2003 * actually equivalent.

2004 */

2005Selectivity

2006 mcv_combine_selectivities(Selectivity simple_sel,

2007 Selectivity mcv_sel,

2008 Selectivity mcv_basesel,

2009 Selectivity mcv_totalsel)

2010{

2011 Selectivity other_sel;

2012 Selectivity sel;

2013

2014 /* estimated selectivity of values not covered by MCV matches */

2015 other_sel = simple_sel - mcv_basesel;

2016 CLAMP_PROBABILITY(other_sel);

2017

2018 /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */

2019 if (other_sel > 1.0 - mcv_totalsel)

2020 other_sel = 1.0 - mcv_totalsel;

2021

2022 /* overall selectivity is the sum of the MCV and non-MCV parts */

2023 sel = mcv_sel + other_sel;

2024 CLAMP_PROBABILITY(sel);

2025

2026 return sel;

2027}

2028

2029

2030/*

2031 * mcv_clauselist_selectivity

2032 * Use MCV statistics to estimate the selectivity of an implicitly-ANDed

2033 * list of clauses.

2034 *

2035 * This determines which MCV items match every clause in the list and returns

2036 * the sum of the frequencies of those items.

2037 *

2038 * In addition, it returns the sum of the base frequencies of each of those

2039 * items (that is the sum of the selectivities that each item would have if

2040 * the columns were independent of one another), and the total selectivity of

2041 * all the MCV items (not just the matching ones). These are expected to be

2042 * used together with a "simple" selectivity estimate (one based only on

2043 * per-column statistics) to produce an overall selectivity estimate that

2044 * makes use of both per-column and multi-column statistics --- see

2045 * mcv_combine_selectivities().

2046 */

2047Selectivity

2048 mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,

2049 List *clauses, int varRelid,

2050 JoinType jointype, SpecialJoinInfo *sjinfo,

2051 RelOptInfo *rel,

2052 Selectivity *basesel, Selectivity *totalsel)

2053{

2054 int i;

2055 MCVList *mcv;

2056 Selectivity s = 0.0;

2057 RangeTblEntry *rte = root->simple_rte_array[rel->relid];

2058

2059 /* match/mismatch bitmap for each MCV item */

2060 bool *matches = NULL;

2061

2062 /* load the MCV list stored in the statistics object */

2063 mcv = statext_mcv_load(stat->statOid, rte->inh);

2064

2065 /* build a match bitmap for the clauses */

2066 matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,

2067 mcv, false);

2068

2069 /* sum frequencies for all the matching MCV items */

2070 *basesel = 0.0;

2071 *totalsel = 0.0;

2072 for (i = 0; i < mcv->nitems; i++)

2073 {

2074 *totalsel += mcv->items[i].frequency;

2075

2076 if (matches[i] != false)

2077 {

2078 *basesel += mcv->items[i].base_frequency;

2079 s += mcv->items[i].frequency;

2080 }

2081 }

2082

2083 return s;

2084}

2085

2086

2087/*

2088 * mcv_clause_selectivity_or

2089 * Use MCV statistics to estimate the selectivity of a clause that

2090 * appears in an ORed list of clauses.

2091 *

2092 * As with mcv_clauselist_selectivity() this determines which MCV items match

2093 * the clause and returns both the sum of the frequencies and the sum of the

2094 * base frequencies of those items, as well as the sum of the frequencies of

2095 * all MCV items (not just the matching ones) so that this information can be

2096 * used by mcv_combine_selectivities() to produce a selectivity estimate that

2097 * makes use of both per-column and multi-column statistics.

2098 *

2099 * Additionally, we return information to help compute the overall selectivity

2100 * of the ORed list of clauses assumed to contain this clause. This function

2101 * is intended to be called for each clause in the ORed list of clauses,

2102 * allowing the overall selectivity to be computed using the following

2103 * algorithm:

2104 *

2105 * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity

2106 * of the first n clauses in the list. Then the combined selectivity taking

2107 * into account the next clause C[n+1] can be written as

2108 *

2109 * P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])

2110 *

2111 * The final term above represents the overlap between the clauses examined so

2112 * far and the (n+1)'th clause. To estimate its selectivity, we track the

2113 * match bitmap for the ORed list of clauses examined so far and examine its

2114 * intersection with the match bitmap for the (n+1)'th clause.

2115 *

2116 * We then also return the sums of the MCV item frequencies and base

2117 * frequencies for the match bitmap intersection corresponding to the overlap

2118 * term above, so that they can be combined with a simple selectivity estimate

2119 * for that term.

2120 *

2121 * The parameter "or_matches" is an in/out parameter tracking the match bitmap

2122 * for the clauses examined so far. The caller is expected to set it to NULL

2123 * the first time it calls this function.

2124 */

2125Selectivity

2126 mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,

2127 MCVList *mcv, Node *clause, bool **or_matches,

2128 Selectivity *basesel, Selectivity *overlap_mcvsel,

2129 Selectivity *overlap_basesel, Selectivity *totalsel)

2130{

2131 Selectivity s = 0.0;

2132 bool *new_matches;

2133 int i;

2134

2135 /* build the OR-matches bitmap, if not built already */

2136 if (*or_matches == NULL)

2137 *or_matches = palloc0(sizeof(bool) * mcv->nitems);

2138

2139 /* build the match bitmap for the new clause */

2140 new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,

2141 stat->exprs, mcv, false);

2142

2143 /*

2144 * Sum the frequencies for all the MCV items matching this clause and also

2145 * those matching the overlap between this clause and any of the preceding

2146 * clauses as described above.

2147 */

2148 *basesel = 0.0;

2149 *overlap_mcvsel = 0.0;

2150 *overlap_basesel = 0.0;

2151 *totalsel = 0.0;

2152 for (i = 0; i < mcv->nitems; i++)

2153 {

2154 *totalsel += mcv->items[i].frequency;

2155

2156 if (new_matches[i])

2157 {

2158 s += mcv->items[i].frequency;

2159 *basesel += mcv->items[i].base_frequency;

2160

2161 if ((*or_matches)[i])

2162 {

2163 *overlap_mcvsel += mcv->items[i].frequency;

2164 *overlap_basesel += mcv->items[i].base_frequency;

2165 }

2166 }

2167

2168 /* update the OR-matches bitmap for the next clause */

2169 (*or_matches)[i] = (*or_matches)[i] || new_matches[i];

2170 }

2171

2172 pfree(new_matches);

2173

2174 return s;

2175}

idx

Datum idx(PG_FUNCTION_ARGS)

Definition: _int_op.c:262

array.h

DatumGetArrayTypeP

#define DatumGetArrayTypeP(X)

Definition: array.h:261

ARR_ELEMTYPE

#define ARR_ELEMTYPE(a)

Definition: array.h:292

accumArrayResult

ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)

Definition: arrayfuncs.c:5350

deconstruct_array

void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)

Definition: arrayfuncs.c:3631

makeArrayResult

Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)

Definition: arrayfuncs.c:5420

AttrNumber

int16 AttrNumber

Definition: attnum.h:21

bms_num_members

int bms_num_members(const Bitmapset *a)

Definition: bitmapset.c:751

bms_member_index

int bms_member_index(Bitmapset *a, int x)

Definition: bitmapset.c:539

values

static Datum values[MAXATTR]

Definition: bootstrap.c:153

builtins.h

byteaout

Datum byteaout(PG_FUNCTION_ARGS)

Definition: bytea.c:261

byteasend

Datum byteasend(PG_FUNCTION_ARGS)

Definition: bytea.c:363

MAXALIGN

#define MAXALIGN(LEN)

Definition: c.h:810

PG_USED_FOR_ASSERTS_ONLY

#define PG_USED_FOR_ASSERTS_ONLY

Definition: c.h:223

VARHDRSZ

#define VARHDRSZ

Definition: c.h:697

int16

int16_t int16

Definition: c.h:533

uint16

uint16_t uint16

Definition: c.h:537

uint32

uint32_t uint32

Definition: c.h:538

PG_UINT16_MAX

#define PG_UINT16_MAX

Definition: c.h:592

Size

size_t Size

Definition: c.h:610

collid

Oid collid

Definition: collationcmds.c:700

errcode

int errcode(int sqlerrcode)

Definition: elog.c:854

errmsg

int errmsg(const char *fmt,...)

Definition: elog.c:1071

ERROR

#define ERROR

Definition: elog.h:39

elog

#define elog(elevel,...)

Definition: elog.h:226

ereport

#define ereport(elevel,...)

Definition: elog.h:150

equal

bool equal(const void *a, const void *b)

Definition: equalfuncs.c:223

BlessTupleDesc

TupleDesc BlessTupleDesc(TupleDesc tupdesc)

Definition: execTuples.c:2260

TupleDescGetAttInMetadata

AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)

Definition: execTuples.c:2275

compare_scalars_simple

int compare_scalars_simple(const void *a, const void *b, void *arg)

Definition: extended_stats.c:916

compare_datums_simple

int compare_datums_simple(Datum a, Datum b, SortSupport ssup)

Definition: extended_stats.c:924

build_sorted_items

SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)

Definition: extended_stats.c:983

multi_sort_compare

int multi_sort_compare(const void *a, const void *b, void *arg)

Definition: extended_stats.c:862

multi_sort_init

MultiSortSupport multi_sort_init(int ndims)

Definition: extended_stats.c:829

multi_sort_add_dimension

void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)

Definition: extended_stats.c:848

examine_opclause_args

bool examine_opclause_args(List *args, Node **exprp, Const **cstp, bool *expronleftp)

Definition: extended_stats.c:2033

extended_stats_internal.h

MultiSortSupport

MultiSortSupportData * MultiSortSupport

Definition: extended_stats_internal.h:51

DimensionInfo

struct DimensionInfo DimensionInfo

FunctionCall2Coll

Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)

Definition: fmgr.c:1149

fmgr_info

void fmgr_info(Oid functionId, FmgrInfo *finfo)

Definition: fmgr.c:127

fmgr.h

PG_RETURN_VOID

#define PG_RETURN_VOID()

Definition: fmgr.h:349

PG_DETOAST_DATUM

#define PG_DETOAST_DATUM(datum)

Definition: fmgr.h:240

FunctionCall1

#define FunctionCall1(flinfo, arg1)

Definition: fmgr.h:700

PG_GETARG_BYTEA_P

#define PG_GETARG_BYTEA_P(n)

Definition: fmgr.h:335

PG_FUNCTION_ARGS

#define PG_FUNCTION_ARGS

Definition: fmgr.h:193

DatumGetByteaP

#define DatumGetByteaP(X)

Definition: fmgr.h:331

get_call_result_type

TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)

Definition: funcapi.c:276

funcapi.h

SRF_IS_FIRSTCALL

#define SRF_IS_FIRSTCALL()

Definition: funcapi.h:304

SRF_PERCALL_SETUP

#define SRF_PERCALL_SETUP()

Definition: funcapi.h:308

TYPEFUNC_COMPOSITE

@ TYPEFUNC_COMPOSITE

Definition: funcapi.h:149

SRF_RETURN_NEXT

#define SRF_RETURN_NEXT(_funcctx, _result)

Definition: funcapi.h:310

SRF_FIRSTCALL_INIT

#define SRF_FIRSTCALL_INIT()

Definition: funcapi.h:306

HeapTupleGetDatum

static Datum HeapTupleGetDatum(const HeapTupleData *tuple)

Definition: funcapi.h:230

SRF_RETURN_DONE

#define SRF_RETURN_DONE(_funcctx)

Definition: funcapi.h:328

Assert

Assert(PointerIsAligned(start, uint64))

start

return str start

Definition: hashfn_unstable.h:282

for

for(;;)

Definition: hashfn_unstable.h:265

heap_form_tuple

HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)

Definition: heaptuple.c:1117

HeapTupleIsValid

#define HeapTupleIsValid(tuple)

Definition: htup.h:78

htup_details.h

nitems

#define nitems(x)

Definition: indent.h:31

value

static struct @169 value

val

long val

Definition: informix.c:689

b

int b

Definition: isn.c:74

a

int a

Definition: isn.c:73

j

int j

Definition: isn.c:78

i

int i

Definition: isn.c:77

getTypeOutputInfo

void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)

Definition: lsyscache.c:3074

get_typlenbyvalalign

void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)

Definition: lsyscache.c:2438

get_opcode

RegProcedure get_opcode(Oid opno)

Definition: lsyscache.c:1452

lsyscache.h

pg_stats_ext_mcvlist_items

Datum pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)

Definition: mcv.c:1338

pg_mcv_list_in

Datum pg_mcv_list_in(PG_FUNCTION_ARGS)

Definition: mcv.c:1472

ITEM_SIZE

#define ITEM_SIZE(ndims)

Definition: mcv.c:53

statext_mcv_deserialize

MCVList * statext_mcv_deserialize(bytea *data)

Definition: mcv.c:996

mcv_clauselist_selectivity

Selectivity mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)

Definition: mcv.c:2048

build_mss

static MultiSortSupport build_mss(StatsBuildData *data)

Definition: mcv.c:347

compare_sort_item_count

static int compare_sort_item_count(const void *a, const void *b, void *arg)

Definition: mcv.c:403

statext_mcv_load

MCVList * statext_mcv_load(Oid mvoid, bool inh)

Definition: mcv.c:558

pg_mcv_list_out

Datum pg_mcv_list_out(PG_FUNCTION_ARGS)

Definition: mcv.c:1498

count_distinct_groups

static int count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)

Definition: mcv.c:379

MinSizeOfMCVList

#define MinSizeOfMCVList

Definition: mcv.c:59

pg_mcv_list_send

Datum pg_mcv_list_send(PG_FUNCTION_ARGS)

Definition: mcv.c:1523

SizeOfMCVList

#define SizeOfMCVList(ndims, nitems)

Definition: mcv.c:68

mcv_get_match_bitmap

static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)

Definition: mcv.c:1599

RESULT_MERGE

#define RESULT_MERGE(value, is_or, match)

Definition: mcv.c:88

get_mincount_for_mcv_list

static double get_mincount_for_mcv_list(int samplerows, double totalrows)

Definition: mcv.c:148

mcv_combine_selectivities

Selectivity mcv_combine_selectivities(Selectivity simple_sel, Selectivity mcv_sel, Selectivity mcv_basesel, Selectivity mcv_totalsel)

Definition: mcv.c:2006

mcv_clause_selectivity_or

Selectivity mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat, MCVList *mcv, Node *clause, bool **or_matches, Selectivity *basesel, Selectivity *overlap_mcvsel, Selectivity *overlap_basesel, Selectivity *totalsel)

Definition: mcv.c:2126

RESULT_IS_FINAL

#define RESULT_IS_FINAL(value, is_or)

Definition: mcv.c:100

mcv_match_expression

static int mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)

Definition: mcv.c:1535

statext_mcv_build

MCVList * statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)

Definition: mcv.c:180

pg_mcv_list_recv

Datum pg_mcv_list_recv(PG_FUNCTION_ARGS)

Definition: mcv.c:1507

sort_item_compare

static int sort_item_compare(const void *a, const void *b, void *arg)

Definition: mcv.c:465

build_column_frequencies

static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)

Definition: mcv.c:490

statext_mcv_serialize

bytea * statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)

Definition: mcv.c:621

build_distinct_groups

static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)

Definition: mcv.c:424

repalloc

void * repalloc(void *pointer, Size size)

Definition: mcxt.c:1610

pfree

void pfree(void *pointer)

Definition: mcxt.c:1594

palloc0

void * palloc0(Size size)

Definition: mcxt.c:1395

palloc

void * palloc(Size size)

Definition: mcxt.c:1365

CurrentMemoryContext

MemoryContext CurrentMemoryContext

Definition: mcxt.c:160

sort-test.key

key

Definition: sort-test.py:19

exprCollation

Oid exprCollation(const Node *expr)

Definition: nodeFuncs.c:821

nodeFuncs.h

is_andclause

static bool is_andclause(const void *clause)

Definition: nodeFuncs.h:107

is_orclause

static bool is_orclause(const void *clause)

Definition: nodeFuncs.h:116

is_opclause

static bool is_opclause(const void *clause)

Definition: nodeFuncs.h:76

is_notclause

static bool is_notclause(const void *clause)

Definition: nodeFuncs.h:125

IsA

#define IsA(nodeptr, _type_)

Definition: nodes.h:164

Selectivity

double Selectivity

Definition: nodes.h:260

JoinType

Definition: nodes.h:298

MemoryContextSwitchTo

static MemoryContext MemoryContextSwitchTo(MemoryContext context)

Definition: palloc.h:124

arg

void * arg

Definition: pg_backup_utils.c:29

len

const void size_t len

Definition: pg_crc32c_sse42.c:28

data

const void * data

Definition: pg_crc32c_sse42.c:27

lfirst

#define lfirst(lc)

Definition: pg_list.h:172

list_length

static int list_length(const List *l)

Definition: pg_list.h:152

NIL

#define NIL

Definition: pg_list.h:68

list_make1

#define list_make1(x1)

Definition: pg_list.h:212

pg_statistic_ext.h

pg_statistic_ext_data.h

bsearch_arg

void * bsearch_arg(const void *key, const void *base0, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)

Definition: bsearch_arg.c:55

qsort_interruptible

void qsort_interruptible(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)

postgres.h

DatumGetBool

static bool DatumGetBool(Datum X)

Definition: postgres.h:100

PointerGetDatum

static Datum PointerGetDatum(const void *X)

Definition: postgres.h:332

BoolGetDatum

static Datum BoolGetDatum(bool X)

Definition: postgres.h:112

ObjectIdGetDatum

static Datum ObjectIdGetDatum(Oid X)

Definition: postgres.h:262

DatumGetCString

static char * DatumGetCString(Datum X)

Definition: postgres.h:345

Datum

uint64_t Datum

Definition: postgres.h:70

DatumGetPointer

static Pointer DatumGetPointer(Datum X)

Definition: postgres.h:322

Float8GetDatum

static Datum Float8GetDatum(float8 X)

Definition: postgres.h:492

Int32GetDatum

static Datum Int32GetDatum(int32 X)

Definition: postgres.h:222

InvalidOid

#define InvalidOid

Definition: postgres_ext.h:37

Oid

unsigned int Oid

Definition: postgres_ext.h:32

IS_NULL

@ IS_NULL

Definition: primnodes.h:1976

IS_NOT_NULL

@ IS_NOT_NULL

Definition: primnodes.h:1976

root

tree ctl root

Definition: radixtree.h:1857

selfuncs.h

CLAMP_PROBABILITY

#define CLAMP_PROBABILITY(p)

Definition: selfuncs.h:63

PrepareSortSupportFromOrderingOp

void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)

Definition: sortsupport.c:134

SortSupportData

struct SortSupportData SortSupportData

SortSupport

struct SortSupportData * SortSupport

Definition: sortsupport.h:58

ApplySortComparator

static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)

Definition: sortsupport.h:200

statistics.h

STATS_MCV_TYPE_BASIC

#define STATS_MCV_TYPE_BASIC

Definition: statistics.h:67

STATS_MCV_MAGIC

#define STATS_MCV_MAGIC

Definition: statistics.h:66

STATS_MCVLIST_MAX_ITEMS

#define STATS_MCVLIST_MAX_ITEMS

Definition: statistics.h:70

STATS_MAX_DIMENSIONS

#define STATS_MAX_DIMENSIONS

Definition: statistics.h:19

MCVItem

struct MCVItem MCVItem

ArrayBuildState

Definition: array.h:188

ArrayType

Definition: array.h:93

AttInMetadata::tupdesc

TupleDesc tupdesc

Definition: funcapi.h:38

Bitmapset

Definition: bitmapset.h:50

BoolExpr

Definition: primnodes.h:967

BoolExpr::args

List * args

Definition: primnodes.h:972

Const

Definition: primnodes.h:324

DimensionInfo

Definition: extended_stats_internal.h:35

DimensionInfo::typlen

int typlen

Definition: extended_stats_internal.h:39

DimensionInfo::nbytes_aligned

int nbytes_aligned

Definition: extended_stats_internal.h:38

DimensionInfo::nbytes

int nbytes

Definition: extended_stats_internal.h:37

DimensionInfo::nvalues

int nvalues

Definition: extended_stats_internal.h:36

DimensionInfo::typbyval

bool typbyval

Definition: extended_stats_internal.h:40

FmgrInfo

Definition: fmgr.h:57

FuncCallContext

Definition: funcapi.h:58

FuncCallContext::user_fctx

void * user_fctx

Definition: funcapi.h:82

FuncCallContext::max_calls

uint64 max_calls

Definition: funcapi.h:74

FuncCallContext::call_cntr

uint64 call_cntr

Definition: funcapi.h:65

FuncCallContext::attinmeta

AttInMetadata * attinmeta

Definition: funcapi.h:91

FuncCallContext::multi_call_memory_ctx

MemoryContext multi_call_memory_ctx

Definition: funcapi.h:101

HeapTupleData

Definition: htup.h:63

List

Definition: pg_list.h:54

MCVItem

Definition: statistics.h:79

MCVItem::isnull

bool * isnull

Definition: statistics.h:82

MCVItem::frequency

double frequency

Definition: statistics.h:80

MCVItem::base_frequency

double base_frequency

Definition: statistics.h:81

MCVItem::values

Datum * values

Definition: statistics.h:83

MCVList

Definition: statistics.h:88

MCVList::type

uint32 type

Definition: statistics.h:90

MCVList::magic

uint32 magic

Definition: statistics.h:89

MCVList::nitems

uint32 nitems

Definition: statistics.h:91

MCVList::items

MCVItem items[FLEXIBLE_ARRAY_MEMBER]

Definition: statistics.h:94

MCVList::ndimensions

AttrNumber ndimensions

Definition: statistics.h:92

MCVList::types

Oid types[STATS_MAX_DIMENSIONS]

Definition: statistics.h:93

MemoryContextData

Definition: memnodes.h:118

MultiSortSupportData

Definition: extended_stats_internal.h:45

MultiSortSupportData::ssup

SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]

Definition: extended_stats_internal.h:48

MultiSortSupportData::ndims

int ndims

Definition: extended_stats_internal.h:46

Node

Definition: nodes.h:135

NullTest

Definition: primnodes.h:1980

NullTest::nulltesttype

NullTestType nulltesttype

Definition: primnodes.h:1983

NullTest::arg

Expr * arg

Definition: primnodes.h:1982

OpExpr

Definition: primnodes.h:846

OpExpr::opno

Oid opno

Definition: primnodes.h:850

OpExpr::args

List * args

Definition: primnodes.h:868

PlannerInfo

Definition: pathnodes.h:214

RangeTblEntry

Definition: parsenodes.h:1058

RangeTblEntry::inh

bool inh

Definition: parsenodes.h:1121

RelOptInfo

Definition: pathnodes.h:897

RelOptInfo::relid

Index relid

Definition: pathnodes.h:954

RestrictInfo

Definition: pathnodes.h:2698

ScalarArrayOpExpr

Definition: primnodes.h:926

ScalarArrayOpExpr::useOr

bool useOr

Definition: primnodes.h:942

ScalarArrayOpExpr::opno

Oid opno

Definition: primnodes.h:930

ScalarArrayOpExpr::args

List * args

Definition: primnodes.h:948

SortItem

Definition: extended_stats_internal.h:54

SortItem::values

Datum * values

Definition: extended_stats_internal.h:55

SortItem::isnull

bool * isnull

Definition: extended_stats_internal.h:56

SortItem::count

int count

Definition: extended_stats_internal.h:57

SortSupportData

Definition: sortsupport.h:61

SortSupportData::ssup_nulls_first

bool ssup_nulls_first

Definition: sortsupport.h:75

SortSupportData::ssup_collation

Oid ssup_collation

Definition: sortsupport.h:67

SortSupportData::ssup_cxt

MemoryContext ssup_cxt

Definition: sortsupport.h:66

SpecialJoinInfo

Definition: pathnodes.h:3025

StatisticExtInfo

Definition: pathnodes.h:1324

StatsBuildData

Definition: extended_stats_internal.h:62

TupleDescData

Definition: tupdesc.h:136

TypeCacheEntry

Definition: typcache.h:32

TypeCacheEntry::lt_opr

Oid lt_opr

Definition: typcache.h:63

VacAttrStats

Definition: vacuum.h:117

VacAttrStats::attrtype

Form_pg_type attrtype

Definition: vacuum.h:128

VacAttrStats::attrtypid

Oid attrtypid

Definition: vacuum.h:126

VacAttrStats::attrcollid

Oid attrcollid

Definition: vacuum.h:129

Var

Definition: primnodes.h:262

Var::varattno

AttrNumber varattno

Definition: primnodes.h:274

index

Definition: type.h:96

stat

Definition: win32_port.h:255

varlena

Definition: c.h:692

ReleaseSysCache

void ReleaseSysCache(HeapTuple tuple)

Definition: syscache.c:264

SysCacheGetAttr

Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)

Definition: syscache.c:595

SearchSysCache2

HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)

Definition: syscache.c:230

syscache.h

items

static ItemArray items

Definition: test_tidstore.c:48

fetch_att

static Datum fetch_att(const void *T, bool attbyval, int attlen)

Definition: tupmacs.h:50

store_att_byval

static void store_att_byval(void *T, Datum newdatum, int attlen)

Definition: tupmacs.h:206

lookup_type_cache

TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)

Definition: typcache.c:386

typcache.h

TYPECACHE_LT_OPR

#define TYPECACHE_LT_OPR

Definition: typcache.h:139

ListCell

Definition: pg_list.h:46

VARSIZE_ANY

static Size VARSIZE_ANY(const void *PTR)

Definition: varatt.h:460

VARSIZE_ANY_EXHDR

static Size VARSIZE_ANY_EXHDR(const void *PTR)

Definition: varatt.h:472

VARDATA

static char * VARDATA(const void *PTR)

Definition: varatt.h:305

VARDATA_ANY

static char * VARDATA_ANY(const void *PTR)

Definition: varatt.h:486

SET_VARSIZE

static void SET_VARSIZE(void *PTR, Size len)

Definition: varatt.h:432

cstring_to_text

text * cstring_to_text(const char *s)

Definition: varlena.c:181

type

const char * type

Definition: wait_event_funcs.c:27

PostgreSQL Source Code: src/backend/statistics/mcv.c Source File