[フレーム]

copyfromparse.c

Go to the documentation of this file.

1/*-------------------------------------------------------------------------

2 *

3 * copyfromparse.c

4 * Parse CSV/text/binary format for COPY FROM.

5 *

6 * This file contains routines to parse the text, CSV and binary input

7 * formats. The main entry point is NextCopyFrom(), which parses the

8 * next input line and returns it as Datums.

9 *

10 * In text/CSV mode, the parsing happens in multiple stages:

11 *

12 * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf

13 * 1. 2. 3. 4.

14 *

15 * 1. CopyLoadRawBuf() reads raw data from the input file or client, and

16 * places it into 'raw_buf'.

17 *

18 * 2. CopyConvertBuf() calls the encoding conversion function to convert

19 * the data in 'raw_buf' from client to server encoding, placing the

20 * converted result in 'input_buf'.

21 *

22 * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.

23 * It is responsible for finding the next newline marker, taking quote and

24 * escape characters into account according to the COPY options. The line

25 * is copied into 'line_buf', with quotes and escape characters still

26 * intact.

27 *

28 * 4. CopyReadAttributesText/CSV() function takes the input line from

29 * 'line_buf', and splits it into fields, unescaping the data as required.

30 * The fields are stored in 'attribute_buf', and 'raw_fields' array holds

31 * pointers to each field.

32 *

33 * If encoding conversion is not required, a shortcut is taken in step 2 to

34 * avoid copying the data unnecessarily. The 'input_buf' pointer is set to

35 * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data

36 * directly into 'input_buf'. CopyConvertBuf() then merely validates that

37 * the data is valid in the current encoding.

38 *

39 * In binary mode, the pipeline is much simpler. Input is loaded into

40 * 'raw_buf', and encoding conversion is done in the datatype-specific

41 * receive functions, if required. 'input_buf' and 'line_buf' are not used,

42 * but 'attribute_buf' is used as a temporary buffer to hold one attribute's

43 * data when it's passed the receive function.

44 *

45 * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE). 'input_buf' is also

46 * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required. 'line_buf'

47 * and 'attribute_buf' are expanded on demand, to hold the longest line

48 * encountered so far.

49 *

52 *

53 *

54 * IDENTIFICATION

55 * src/backend/commands/copyfromparse.c

56 *

57 *-------------------------------------------------------------------------

58 */

59#include "postgres.h"

60

61#include <ctype.h>

62#include <unistd.h>

63#include <sys/stat.h>

64

65#include "commands/copyapi.h"

66#include "commands/copyfrom_internal.h"

67#include "commands/progress.h"

68#include "executor/executor.h"

69#include "libpq/libpq.h"

70#include "libpq/pqformat.h"

71#include "mb/pg_wchar.h"

72#include "miscadmin.h"

73#include "pgstat.h"

74#include "port/pg_bswap.h"

75#include "utils/builtins.h"

76#include "utils/rel.h"

77

78 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))

79 #define OCTVALUE(c) ((c) - '0')

80

81/*

82 * These macros centralize code used to process line_buf and input_buf buffers.

83 * They are macros because they often do continue/break control and to avoid

84 * function call overhead in tight COPY loops.

85 *

86 * We must use "if (1)" because the usual "do {...} while(0)" wrapper would

87 * prevent the continue/break processing from working. We end the "if (1)"

88 * with "else ((void) 0)" to ensure the "if" does not unintentionally match

89 * any "else" in the calling code, and to avoid any compiler warnings about

90 * empty statements. See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.

91 */

92

93/*

94 * This keeps the character read at the top of the loop in the buffer

95 * even if there is more than one read-ahead.

96 */

97 #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \

98if (1) \

99{ \

100 if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \

101 { \

102 input_buf_ptr = prev_raw_ptr; /* undo fetch */ \

103 need_data = true; \

104 continue; \

105 } \

106} else ((void) 0)

107

108/* This consumes the remainder of the buffer and breaks */

109 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \

110if (1) \

111{ \

112 if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \

113 { \

114 if (extralen) \

115 input_buf_ptr = copy_buf_len; /* consume the partial character */ \

116/* backslash just before EOF, treat as data char */ \

117 result = true; \

118 break; \

119 } \

120} else ((void) 0)

121

122/*

123 * Transfer any approved data to line_buf; must do this to be sure

124 * there is some room in input_buf.

125 */

126 #define REFILL_LINEBUF \

127if (1) \

128{ \

129 if (input_buf_ptr > cstate->input_buf_index) \

130 { \

131 appendBinaryStringInfo(&cstate->line_buf, \

132 cstate->input_buf + cstate->input_buf_index, \

133 input_buf_ptr - cstate->input_buf_index); \

134 cstate->input_buf_index = input_buf_ptr; \

135 } \

136} else ((void) 0)

137

138/* NOTE: there's a copy of this in copyto.c */

139 static const char BinarySignature[11] = "PGCOPY\n377円\r\n0円";

140

141

142/* non-export function prototypes */

143static bool CopyReadLine(CopyFromState cstate, bool is_csv);

144static bool CopyReadLineText(CopyFromState cstate, bool is_csv);

145static int CopyReadAttributesText(CopyFromState cstate);

146static int CopyReadAttributesCSV(CopyFromState cstate);

147static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,

148 Oid typioparam, int32 typmod,

149 bool *isnull);

150static pg_attribute_always_inline bool CopyFromTextLikeOneRow(CopyFromState cstate,

151 ExprContext *econtext,

152 Datum *values,

153 bool *nulls,

154 bool is_csv);

155static pg_attribute_always_inline bool NextCopyFromRawFieldsInternal(CopyFromState cstate,

156 char ***fields,

157 int *nfields,

158 bool is_csv);

159

160

161/* Low-level communications functions */

162static int CopyGetData(CopyFromState cstate, void *databuf,

163 int minread, int maxread);

164static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);

165static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);

166static void CopyLoadInputBuf(CopyFromState cstate);

167static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);

168

169void

170 ReceiveCopyBegin(CopyFromState cstate)

171{

172 StringInfoData buf;

173 int natts = list_length(cstate->attnumlist);

174 int16 format = (cstate->opts.binary ? 1 : 0);

175 int i;

176

177 pq_beginmessage(&buf, PqMsg_CopyInResponse);

178 pq_sendbyte(&buf, format); /* overall format */

179 pq_sendint16(&buf, natts);

180 for (i = 0; i < natts; i++)

181 pq_sendint16(&buf, format); /* per-column formats */

182 pq_endmessage(&buf);

183 cstate->copy_src = COPY_FRONTEND;

184 cstate->fe_msgbuf = makeStringInfo();

185 /* We *must* flush here to ensure FE knows it can send. */

186 pq_flush();

187}

188

189void

190 ReceiveCopyBinaryHeader(CopyFromState cstate)

191{

192 char readSig[11];

193 int32 tmp;

194

195 /* Signature */

196 if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||

197 memcmp(readSig, BinarySignature, 11) != 0)

198 ereport(ERROR,

199 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

200 errmsg("COPY file signature not recognized")));

201 /* Flags field */

202 if (!CopyGetInt32(cstate, &tmp))

203 ereport(ERROR,

204 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

205 errmsg("invalid COPY file header (missing flags)")));

206 if ((tmp & (1 << 16)) != 0)

207 ereport(ERROR,

208 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

209 errmsg("invalid COPY file header (WITH OIDS)")));

210 tmp &= ~(1 << 16);

211 if ((tmp >> 16) != 0)

212 ereport(ERROR,

213 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

214 errmsg("unrecognized critical flags in COPY file header")));

215 /* Header extension length */

216 if (!CopyGetInt32(cstate, &tmp) ||

217 tmp < 0)

218 ereport(ERROR,

219 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

220 errmsg("invalid COPY file header (missing length)")));

221 /* Skip extension header, if present */

222 while (tmp-- > 0)

223 {

224 if (CopyReadBinaryData(cstate, readSig, 1) != 1)

225 ereport(ERROR,

226 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

227 errmsg("invalid COPY file header (wrong length)")));

228 }

229}

230

231/*

232 * CopyGetData reads data from the source (file or frontend)

233 *

234 * We attempt to read at least minread, and at most maxread, bytes from

235 * the source. The actual number of bytes read is returned; if this is

236 * less than minread, EOF was detected.

237 *

238 * Note: when copying from the frontend, we expect a proper EOF mark per

239 * protocol; if the frontend simply drops the connection, we raise error.

240 * It seems unwise to allow the COPY IN to complete normally in that case.

241 *

242 * NB: no data conversion is applied here.

243 */

244static int

245 CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)

246{

247 int bytesread = 0;

248

249 switch (cstate->copy_src)

250 {

251 case COPY_FILE:

252 bytesread = fread(databuf, 1, maxread, cstate->copy_file);

253 if (ferror(cstate->copy_file))

254 ereport(ERROR,

255 (errcode_for_file_access(),

256 errmsg("could not read from COPY file: %m")));

257 if (bytesread == 0)

258 cstate->raw_reached_eof = true;

259 break;

260 case COPY_FRONTEND:

261 while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)

262 {

263 int avail;

264

265 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)

266 {

267 /* Try to receive another message */

268 int mtype;

269 int maxmsglen;

270

271 readmessage:

272 HOLD_CANCEL_INTERRUPTS();

273 pq_startmsgread();

274 mtype = pq_getbyte();

275 if (mtype == EOF)

276 ereport(ERROR,

277 (errcode(ERRCODE_CONNECTION_FAILURE),

278 errmsg("unexpected EOF on client connection with an open transaction")));

279 /* Validate message type and set packet size limit */

280 switch (mtype)

281 {

282 case PqMsg_CopyData:

283 maxmsglen = PQ_LARGE_MESSAGE_LIMIT;

284 break;

285 case PqMsg_CopyDone:

286 case PqMsg_CopyFail:

287 case PqMsg_Flush:

288 case PqMsg_Sync:

289 maxmsglen = PQ_SMALL_MESSAGE_LIMIT;

290 break;

291 default:

292 ereport(ERROR,

293 (errcode(ERRCODE_PROTOCOL_VIOLATION),

294 errmsg("unexpected message type 0x%02X during COPY from stdin",

295 mtype)));

296 maxmsglen = 0; /* keep compiler quiet */

297 break;

298 }

299 /* Now collect the message body */

300 if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))

301 ereport(ERROR,

302 (errcode(ERRCODE_CONNECTION_FAILURE),

303 errmsg("unexpected EOF on client connection with an open transaction")));

304 RESUME_CANCEL_INTERRUPTS();

305 /* ... and process it */

306 switch (mtype)

307 {

308 case PqMsg_CopyData:

309 break;

310 case PqMsg_CopyDone:

311 /* COPY IN correctly terminated by frontend */

312 cstate->raw_reached_eof = true;

313 return bytesread;

314 case PqMsg_CopyFail:

315 ereport(ERROR,

316 (errcode(ERRCODE_QUERY_CANCELED),

317 errmsg("COPY from stdin failed: %s",

318 pq_getmsgstring(cstate->fe_msgbuf))));

319 break;

320 case PqMsg_Flush:

321 case PqMsg_Sync:

322

323 /*

324 * Ignore Flush/Sync for the convenience of client

325 * libraries (such as libpq) that may send those

326 * without noticing that the command they just

327 * sent was COPY.

328 */

329 goto readmessage;

330 default:

331 Assert(false); /* NOT REACHED */

332 }

333 }

334 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;

335 if (avail > maxread)

336 avail = maxread;

337 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);

338 databuf = (void *) ((char *) databuf + avail);

339 maxread -= avail;

340 bytesread += avail;

341 }

342 break;

343 case COPY_CALLBACK:

344 bytesread = cstate->data_source_cb(databuf, minread, maxread);

345 break;

346 }

347

348 return bytesread;

349}

350

351

352/*

353 * These functions do apply some data conversion

354 */

355

356/*

357 * CopyGetInt32 reads an int32 that appears in network byte order

358 *

359 * Returns true if OK, false if EOF

360 */

361static inline bool

362 CopyGetInt32(CopyFromState cstate, int32 *val)

363{

364 uint32 buf;

365

366 if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))

367 {

368 *val = 0; /* suppress compiler warning */

369 return false;

370 }

371 *val = (int32) pg_ntoh32(buf);

372 return true;

373}

374

375/*

376 * CopyGetInt16 reads an int16 that appears in network byte order

377 */

378static inline bool

379 CopyGetInt16(CopyFromState cstate, int16 *val)

380{

381 uint16 buf;

382

383 if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))

384 {

385 *val = 0; /* suppress compiler warning */

386 return false;

387 }

388 *val = (int16) pg_ntoh16(buf);

389 return true;

390}

391

392

393/*

394 * Perform encoding conversion on data in 'raw_buf', writing the converted

395 * data into 'input_buf'.

396 *

397 * On entry, there must be some data to convert in 'raw_buf'.

398 */

399static void

400 CopyConvertBuf(CopyFromState cstate)

401{

402 /*

403 * If the file and server encoding are the same, no encoding conversion is

404 * required. However, we still need to verify that the input is valid for

405 * the encoding.

406 */

407 if (!cstate->need_transcoding)

408 {

409 /*

410 * When conversion is not required, input_buf and raw_buf are the

411 * same. raw_buf_len is the total number of bytes in the buffer, and

412 * input_buf_len tracks how many of those bytes have already been

413 * verified.

414 */

415 int preverifiedlen = cstate->input_buf_len;

416 int unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;

417 int nverified;

418

419 if (unverifiedlen == 0)

420 {

421 /*

422 * If no more raw data is coming, report the EOF to the caller.

423 */

424 if (cstate->raw_reached_eof)

425 cstate->input_reached_eof = true;

426 return;

427 }

428

429 /*

430 * Verify the new data, including any residual unverified bytes from

431 * previous round.

432 */

433 nverified = pg_encoding_verifymbstr(cstate->file_encoding,

434 cstate->raw_buf + preverifiedlen,

435 unverifiedlen);

436 if (nverified == 0)

437 {

438 /*

439 * Could not verify anything.

440 *

441 * If there is no more raw input data coming, it means that there

442 * was an incomplete multi-byte sequence at the end. Also, if

443 * there's "enough" input left, we should be able to verify at

444 * least one character, and a failure to do so means that we've

445 * hit an invalid byte sequence.

446 */

447 if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))

448 cstate->input_reached_error = true;

449 return;

450 }

451 cstate->input_buf_len += nverified;

452 }

453 else

454 {

455 /*

456 * Encoding conversion is needed.

457 */

458 int nbytes;

459 unsigned char *src;

460 int srclen;

461 unsigned char *dst;

462 int dstlen;

463 int convertedlen;

464

465 if (RAW_BUF_BYTES(cstate) == 0)

466 {

467 /*

468 * If no more raw data is coming, report the EOF to the caller.

469 */

470 if (cstate->raw_reached_eof)

471 cstate->input_reached_eof = true;

472 return;

473 }

474

475 /*

476 * First, copy down any unprocessed data.

477 */

478 nbytes = INPUT_BUF_BYTES(cstate);

479 if (nbytes > 0 && cstate->input_buf_index > 0)

480 memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,

481 nbytes);

482 cstate->input_buf_index = 0;

483 cstate->input_buf_len = nbytes;

484 cstate->input_buf[nbytes] = '0円';

485

486 src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;

487 srclen = cstate->raw_buf_len - cstate->raw_buf_index;

488 dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;

489 dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;

490

491 /*

492 * Do the conversion. This might stop short, if there is an invalid

493 * byte sequence in the input. We'll convert as much as we can in

494 * that case.

495 *

496 * Note: Even if we hit an invalid byte sequence, we don't report the

497 * error until all the valid bytes have been consumed. The input

498 * might contain an end-of-input marker (\.), and we don't want to

499 * report an error if the invalid byte sequence is after the

500 * end-of-input marker. We might unnecessarily convert some data

501 * after the end-of-input marker as long as it's valid for the

502 * encoding, but that's harmless.

503 */

504 convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,

505 cstate->file_encoding,

506 GetDatabaseEncoding(),

507 src, srclen,

508 dst, dstlen,

509 true);

510 if (convertedlen == 0)

511 {

512 /*

513 * Could not convert anything. If there is no more raw input data

514 * coming, it means that there was an incomplete multi-byte

515 * sequence at the end. Also, if there is plenty of input left,

516 * we should be able to convert at least one character, so a

517 * failure to do so must mean that we've hit a byte sequence

518 * that's invalid.

519 */

520 if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)

521 cstate->input_reached_error = true;

522 return;

523 }

524 cstate->raw_buf_index += convertedlen;

525 cstate->input_buf_len += strlen((char *) dst);

526 }

527}

528

529/*

530 * Report an encoding or conversion error.

531 */

532static void

533 CopyConversionError(CopyFromState cstate)

534{

535 Assert(cstate->raw_buf_len > 0);

536 Assert(cstate->input_reached_error);

537

538 if (!cstate->need_transcoding)

539 {

540 /*

541 * Everything up to input_buf_len was successfully verified, and

542 * input_buf_len points to the invalid or incomplete character.

543 */

544 report_invalid_encoding(cstate->file_encoding,

545 cstate->raw_buf + cstate->input_buf_len,

546 cstate->raw_buf_len - cstate->input_buf_len);

547 }

548 else

549 {

550 /*

551 * raw_buf_index points to the invalid or untranslatable character. We

552 * let the conversion routine report the error, because it can provide

553 * a more specific error message than we could here. An earlier call

554 * to the conversion routine in CopyConvertBuf() detected that there

555 * is an error, now we call the conversion routine again with

556 * noError=false, to have it throw the error.

557 */

558 unsigned char *src;

559 int srclen;

560 unsigned char *dst;

561 int dstlen;

562

563 src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;

564 srclen = cstate->raw_buf_len - cstate->raw_buf_index;

565 dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;

566 dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;

567

568 (void) pg_do_encoding_conversion_buf(cstate->conversion_proc,

569 cstate->file_encoding,

570 GetDatabaseEncoding(),

571 src, srclen,

572 dst, dstlen,

573 false);

574

575 /*

576 * The conversion routine should have reported an error, so this

577 * should not be reached.

578 */

579 elog(ERROR, "encoding conversion failed without error");

580 }

581}

582

583/*

584 * Load more data from data source to raw_buf.

585 *

586 * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the

587 * beginning of the buffer, and we load new data after that.

588 */

589static void

590 CopyLoadRawBuf(CopyFromState cstate)

591{

592 int nbytes;

593 int inbytes;

594

595 /*

596 * In text mode, if encoding conversion is not required, raw_buf and

597 * input_buf point to the same buffer. Their len/index better agree, too.

598 */

599 if (cstate->raw_buf == cstate->input_buf)

600 {

601 Assert(!cstate->need_transcoding);

602 Assert(cstate->raw_buf_index == cstate->input_buf_index);

603 Assert(cstate->input_buf_len <= cstate->raw_buf_len);

604 }

605

606 /*

607 * Copy down the unprocessed data if any.

608 */

609 nbytes = RAW_BUF_BYTES(cstate);

610 if (nbytes > 0 && cstate->raw_buf_index > 0)

611 memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,

612 nbytes);

613 cstate->raw_buf_len -= cstate->raw_buf_index;

614 cstate->raw_buf_index = 0;

615

616 /*

617 * If raw_buf and input_buf are in fact the same buffer, adjust the

618 * input_buf variables, too.

619 */

620 if (cstate->raw_buf == cstate->input_buf)

621 {

622 cstate->input_buf_len -= cstate->input_buf_index;

623 cstate->input_buf_index = 0;

624 }

625

626 /* Load more data */

627 inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,

628 1, RAW_BUF_SIZE - cstate->raw_buf_len);

629 nbytes += inbytes;

630 cstate->raw_buf[nbytes] = '0円';

631 cstate->raw_buf_len = nbytes;

632

633 cstate->bytes_processed += inbytes;

634 pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);

635

636 if (inbytes == 0)

637 cstate->raw_reached_eof = true;

638}

639

640/*

641 * CopyLoadInputBuf loads some more data into input_buf

642 *

643 * On return, at least one more input character is loaded into

644 * input_buf, or input_reached_eof is set.

645 *

646 * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start

647 * of the buffer and then we load more data after that.

648 */

649static void

650 CopyLoadInputBuf(CopyFromState cstate)

651{

652 int nbytes = INPUT_BUF_BYTES(cstate);

653

654 /*

655 * The caller has updated input_buf_index to indicate how much of the

656 * input has been consumed and isn't needed anymore. If input_buf is the

657 * same physical area as raw_buf, update raw_buf_index accordingly.

658 */

659 if (cstate->raw_buf == cstate->input_buf)

660 {

661 Assert(!cstate->need_transcoding);

662 Assert(cstate->input_buf_index >= cstate->raw_buf_index);

663 cstate->raw_buf_index = cstate->input_buf_index;

664 }

665

666 for (;;)

667 {

668 /* If we now have some unconverted data, try to convert it */

669 CopyConvertBuf(cstate);

670

671 /* If we now have some more input bytes ready, return them */

672 if (INPUT_BUF_BYTES(cstate) > nbytes)

673 return;

674

675 /*

676 * If we reached an invalid byte sequence, or we're at an incomplete

677 * multi-byte character but there is no more raw input data, report

678 * conversion error.

679 */

680 if (cstate->input_reached_error)

681 CopyConversionError(cstate);

682

683 /* no more input, and everything has been converted */

684 if (cstate->input_reached_eof)

685 break;

686

687 /* Try to load more raw data */

688 Assert(!cstate->raw_reached_eof);

689 CopyLoadRawBuf(cstate);

690 }

691}

692

693/*

694 * CopyReadBinaryData

695 *

696 * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf

697 * and writes them to 'dest'. Returns the number of bytes read (which

698 * would be less than 'nbytes' only if we reach EOF).

699 */

700static int

701 CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)

702{

703 int copied_bytes = 0;

704

705 if (RAW_BUF_BYTES(cstate) >= nbytes)

706 {

707 /* Enough bytes are present in the buffer. */

708 memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);

709 cstate->raw_buf_index += nbytes;

710 copied_bytes = nbytes;

711 }

712 else

713 {

714 /*

715 * Not enough bytes in the buffer, so must read from the file. Need

716 * to loop since 'nbytes' could be larger than the buffer size.

717 */

718 do

719 {

720 int copy_bytes;

721

722 /* Load more data if buffer is empty. */

723 if (RAW_BUF_BYTES(cstate) == 0)

724 {

725 CopyLoadRawBuf(cstate);

726 if (cstate->raw_reached_eof)

727 break; /* EOF */

728 }

729

730 /* Transfer some bytes. */

731 copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));

732 memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);

733 cstate->raw_buf_index += copy_bytes;

734 dest += copy_bytes;

735 copied_bytes += copy_bytes;

736 } while (copied_bytes < nbytes);

737 }

738

739 return copied_bytes;

740}

741

742/*

743 * This function is exposed for use by extensions that read raw fields in the

744 * next line. See NextCopyFromRawFieldsInternal() for details.

745 */

746bool

747 NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)

748{

749 return NextCopyFromRawFieldsInternal(cstate, fields, nfields,

750 cstate->opts.csv_mode);

751}

752

753/*

754 * Workhorse for NextCopyFromRawFields().

755 *

756 * Read raw fields in the next line for COPY FROM in text or csv mode. Return

757 * false if no more lines.

758 *

759 * An internal temporary buffer is returned via 'fields'. It is valid until

760 * the next call of the function. Since the function returns all raw fields

761 * in the input file, 'nfields' could be different from the number of columns

762 * in the relation.

763 *

764 * NOTE: force_not_null option are not applied to the returned fields.

765 *

766 * We use pg_attribute_always_inline to reduce function call overhead

767 * and to help compilers to optimize away the 'is_csv' condition when called

768 * by internal functions such as CopyFromTextLikeOneRow().

769 */

770static pg_attribute_always_inline bool

771 NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)

772{

773 int fldct;

774 bool done = false;

775

776 /* only available for text or csv input */

777 Assert(!cstate->opts.binary);

778

779 /* on input check that the header line is correct if needed */

780 if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)

781 {

782 ListCell *cur;

783 TupleDesc tupDesc;

784 int lines_to_skip = cstate->opts.header_line;

785

786 /* If set to "match", one header line is skipped */

787 if (cstate->opts.header_line == COPY_HEADER_MATCH)

788 lines_to_skip = 1;

789

790 tupDesc = RelationGetDescr(cstate->rel);

791

792 for (int i = 0; i < lines_to_skip; i++)

793 {

794 cstate->cur_lineno++;

795 if ((done = CopyReadLine(cstate, is_csv)))

796 break;

797 }

798

799 if (cstate->opts.header_line == COPY_HEADER_MATCH)

800 {

801 int fldnum;

802

803 if (is_csv)

804 fldct = CopyReadAttributesCSV(cstate);

805 else

806 fldct = CopyReadAttributesText(cstate);

807

808 if (fldct != list_length(cstate->attnumlist))

809 ereport(ERROR,

810 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

811 errmsg("wrong number of fields in header line: got %d, expected %d",

812 fldct, list_length(cstate->attnumlist))));

813

814 fldnum = 0;

815 foreach(cur, cstate->attnumlist)

816 {

817 int attnum = lfirst_int(cur);

818 char *colName;

819 Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);

820

821 Assert(fldnum < cstate->max_fields);

822

823 colName = cstate->raw_fields[fldnum++];

824 if (colName == NULL)

825 ereport(ERROR,

826 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

827 errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",

828 fldnum, cstate->opts.null_print, NameStr(attr->attname))));

829

830 if (namestrcmp(&attr->attname, colName) != 0)

831 {

832 ereport(ERROR,

833 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

834 errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",

835 fldnum, colName, NameStr(attr->attname))));

836 }

837 }

838 }

839

840 if (done)

841 return false;

842 }

843

844 cstate->cur_lineno++;

845

846 /* Actually read the line into memory here */

847 done = CopyReadLine(cstate, is_csv);

848

849 /*

850 * EOF at start of line means we're done. If we see EOF after some

851 * characters, we act as though it was newline followed by EOF, ie,

852 * process the line and then exit loop on next iteration.

853 */

854 if (done && cstate->line_buf.len == 0)

855 return false;

856

857 /* Parse the line into de-escaped field values */

858 if (is_csv)

859 fldct = CopyReadAttributesCSV(cstate);

860 else

861 fldct = CopyReadAttributesText(cstate);

862

863 *fields = cstate->raw_fields;

864 *nfields = fldct;

865 return true;

866}

867

868/*

869 * Read next tuple from file for COPY FROM. Return false if no more tuples.

870 *

871 * 'econtext' is used to evaluate default expression for each column that is

872 * either not read from the file or is using the DEFAULT option of COPY FROM.

873 * It can be NULL when no default values are used, i.e. when all columns are

874 * read from the file, and DEFAULT option is unset.

875 *

876 * 'values' and 'nulls' arrays must be the same length as columns of the

877 * relation passed to BeginCopyFrom. This function fills the arrays.

878 */

879bool

880 NextCopyFrom(CopyFromState cstate, ExprContext *econtext,

881 Datum *values, bool *nulls)

882{

883 TupleDesc tupDesc;

884 AttrNumber num_phys_attrs,

885 num_defaults = cstate->num_defaults;

886 int i;

887 int *defmap = cstate->defmap;

888 ExprState **defexprs = cstate->defexprs;

889

890 tupDesc = RelationGetDescr(cstate->rel);

891 num_phys_attrs = tupDesc->natts;

892

893 /* Initialize all values for row to NULL */

894 MemSet(values, 0, num_phys_attrs * sizeof(Datum));

895 MemSet(nulls, true, num_phys_attrs * sizeof(bool));

896 MemSet(cstate->defaults, false, num_phys_attrs * sizeof(bool));

897

898 /* Get one row from source */

899 if (!cstate->routine->CopyFromOneRow(cstate, econtext, values, nulls))

900 return false;

901

902 /*

903 * Now compute and insert any defaults available for the columns not

904 * provided by the input data. Anything not processed here or above will

905 * remain NULL.

906 */

907 for (i = 0; i < num_defaults; i++)

908 {

909 /*

910 * The caller must supply econtext and have switched into the

911 * per-tuple memory context in it.

912 */

913 Assert(econtext != NULL);

914 Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);

915

916 values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,

917 &nulls[defmap[i]]);

918 }

919

920 return true;

921}

922

923/* Implementation of the per-row callback for text format */

924bool

925 CopyFromTextOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,

926 bool *nulls)

927{

928 return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, false);

929}

930

931/* Implementation of the per-row callback for CSV format */

932bool

933 CopyFromCSVOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,

934 bool *nulls)

935{

936 return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, true);

937}

938

939/*

940 * Workhorse for CopyFromTextOneRow() and CopyFromCSVOneRow().

941 *

942 * We use pg_attribute_always_inline to reduce function call overhead

943 * and to help compilers to optimize away the 'is_csv' condition.

944 */

945static pg_attribute_always_inline bool

946 CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext,

947 Datum *values, bool *nulls, bool is_csv)

948{

949 TupleDesc tupDesc;

950 AttrNumber attr_count;

951 FmgrInfo *in_functions = cstate->in_functions;

952 Oid *typioparams = cstate->typioparams;

953 ExprState **defexprs = cstate->defexprs;

954 char **field_strings;

955 ListCell *cur;

956 int fldct;

957 int fieldno;

958 char *string;

959

960 tupDesc = RelationGetDescr(cstate->rel);

961 attr_count = list_length(cstate->attnumlist);

962

963 /* read raw fields in the next line */

964 if (!NextCopyFromRawFieldsInternal(cstate, &field_strings, &fldct, is_csv))

965 return false;

966

967 /* check for overflowing fields */

968 if (attr_count > 0 && fldct > attr_count)

969 ereport(ERROR,

970 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

971 errmsg("extra data after last expected column")));

972

973 fieldno = 0;

974

975 /* Loop to read the user attributes on the line. */

976 foreach(cur, cstate->attnumlist)

977 {

978 int attnum = lfirst_int(cur);

979 int m = attnum - 1;

980 Form_pg_attribute att = TupleDescAttr(tupDesc, m);

981

982 if (fieldno >= fldct)

983 ereport(ERROR,

984 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

985 errmsg("missing data for column \"%s\"",

986 NameStr(att->attname))));

987 string = field_strings[fieldno++];

988

989 if (cstate->convert_select_flags &&

990 !cstate->convert_select_flags[m])

991 {

992 /* ignore input field, leaving column as NULL */

993 continue;

994 }

995

996 if (is_csv)

997 {

998 if (string == NULL &&

999 cstate->opts.force_notnull_flags[m])

1000 {

1001 /*

1002 * FORCE_NOT_NULL option is set and column is NULL - convert

1003 * it to the NULL string.

1004 */

1005 string = cstate->opts.null_print;

1006 }

1007 else if (string != NULL && cstate->opts.force_null_flags[m]

1008 && strcmp(string, cstate->opts.null_print) == 0)

1009 {

1010 /*

1011 * FORCE_NULL option is set and column matches the NULL

1012 * string. It must have been quoted, or otherwise the string

1013 * would already have been set to NULL. Convert it to NULL as

1014 * specified.

1015 */

1016 string = NULL;

1017 }

1018 }

1019

1020 cstate->cur_attname = NameStr(att->attname);

1021 cstate->cur_attval = string;

1022

1023 if (string != NULL)

1024 nulls[m] = false;

1025

1026 if (cstate->defaults[m])

1027 {

1028 /* We must have switched into the per-tuple memory context */

1029 Assert(econtext != NULL);

1030 Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);

1031

1032 values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);

1033 }

1034

1035 /*

1036 * If ON_ERROR is specified with IGNORE, skip rows with soft errors

1037 */

1038 else if (!InputFunctionCallSafe(&in_functions[m],

1039 string,

1040 typioparams[m],

1041 att->atttypmod,

1042 (Node *) cstate->escontext,

1043 &values[m]))

1044 {

1045 Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);

1046

1047 cstate->num_errors++;

1048

1049 if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)

1050 {

1051 /*

1052 * Since we emit line number and column info in the below

1053 * notice message, we suppress error context information other

1054 * than the relation name.

1055 */

1056 Assert(!cstate->relname_only);

1057 cstate->relname_only = true;

1058

1059 if (cstate->cur_attval)

1060 {

1061 char *attval;

1062

1063 attval = CopyLimitPrintoutLength(cstate->cur_attval);

1064 ereport(NOTICE,

1065 errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"",

1066 cstate->cur_lineno,

1067 cstate->cur_attname,

1068 attval));

1069 pfree(attval);

1070 }

1071 else

1072 ereport(NOTICE,

1073 errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": null input",

1074 cstate->cur_lineno,

1075 cstate->cur_attname));

1076

1077 /* reset relname_only */

1078 cstate->relname_only = false;

1079 }

1080

1081 return true;

1082 }

1083

1084 cstate->cur_attname = NULL;

1085 cstate->cur_attval = NULL;

1086 }

1087

1088 Assert(fieldno == attr_count);

1089

1090 return true;

1091}

1092

1093/* Implementation of the per-row callback for binary format */

1094bool

1095 CopyFromBinaryOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,

1096 bool *nulls)

1097{

1098 TupleDesc tupDesc;

1099 AttrNumber attr_count;

1100 FmgrInfo *in_functions = cstate->in_functions;

1101 Oid *typioparams = cstate->typioparams;

1102 int16 fld_count;

1103 ListCell *cur;

1104

1105 tupDesc = RelationGetDescr(cstate->rel);

1106 attr_count = list_length(cstate->attnumlist);

1107

1108 cstate->cur_lineno++;

1109

1110 if (!CopyGetInt16(cstate, &fld_count))

1111 {

1112 /* EOF detected (end of file, or protocol-level EOF) */

1113 return false;

1114 }

1115

1116 if (fld_count == -1)

1117 {

1118 /*

1119 * Received EOF marker. Wait for the protocol-level EOF, and complain

1120 * if it doesn't come immediately. In COPY FROM STDIN, this ensures

1121 * that we correctly handle CopyFail, if client chooses to send that

1122 * now. When copying from file, we could ignore the rest of the file

1123 * like in text mode, but we choose to be consistent with the COPY

1124 * FROM STDIN case.

1125 */

1126 char dummy;

1127

1128 if (CopyReadBinaryData(cstate, &dummy, 1) > 0)

1129 ereport(ERROR,

1130 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1131 errmsg("received copy data after EOF marker")));

1132 return false;

1133 }

1134

1135 if (fld_count != attr_count)

1136 ereport(ERROR,

1137 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1138 errmsg("row field count is %d, expected %d",

1139 (int) fld_count, attr_count)));

1140

1141 foreach(cur, cstate->attnumlist)

1142 {

1143 int attnum = lfirst_int(cur);

1144 int m = attnum - 1;

1145 Form_pg_attribute att = TupleDescAttr(tupDesc, m);

1146

1147 cstate->cur_attname = NameStr(att->attname);

1148 values[m] = CopyReadBinaryAttribute(cstate,

1149 &in_functions[m],

1150 typioparams[m],

1151 att->atttypmod,

1152 &nulls[m]);

1153 cstate->cur_attname = NULL;

1154 }

1155

1156 return true;

1157}

1158

1159/*

1160 * Read the next input line and stash it in line_buf.

1161 *

1162 * Result is true if read was terminated by EOF, false if terminated

1163 * by newline. The terminating newline or EOF marker is not included

1164 * in the final value of line_buf.

1165 */

1166static bool

1167 CopyReadLine(CopyFromState cstate, bool is_csv)

1168{

1169 bool result;

1170

1171 resetStringInfo(&cstate->line_buf);

1172 cstate->line_buf_valid = false;

1173

1174 /* Parse data and transfer into line_buf */

1175 result = CopyReadLineText(cstate, is_csv);

1176

1177 if (result)

1178 {

1179 /*

1180 * Reached EOF. In protocol version 3, we should ignore anything

1181 * after \. up to the protocol end of copy data. (XXX maybe better

1182 * not to treat \. as special?)

1183 */

1184 if (cstate->copy_src == COPY_FRONTEND)

1185 {

1186 int inbytes;

1187

1188 do

1189 {

1190 inbytes = CopyGetData(cstate, cstate->input_buf,

1191 1, INPUT_BUF_SIZE);

1192 } while (inbytes > 0);

1193 cstate->input_buf_index = 0;

1194 cstate->input_buf_len = 0;

1195 cstate->raw_buf_index = 0;

1196 cstate->raw_buf_len = 0;

1197 }

1198 }

1199 else

1200 {

1201 /*

1202 * If we didn't hit EOF, then we must have transferred the EOL marker

1203 * to line_buf along with the data. Get rid of it.

1204 */

1205 switch (cstate->eol_type)

1206 {

1207 case EOL_NL:

1208 Assert(cstate->line_buf.len >= 1);

1209 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');

1210 cstate->line_buf.len--;

1211 cstate->line_buf.data[cstate->line_buf.len] = '0円';

1212 break;

1213 case EOL_CR:

1214 Assert(cstate->line_buf.len >= 1);

1215 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');

1216 cstate->line_buf.len--;

1217 cstate->line_buf.data[cstate->line_buf.len] = '0円';

1218 break;

1219 case EOL_CRNL:

1220 Assert(cstate->line_buf.len >= 2);

1221 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');

1222 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');

1223 cstate->line_buf.len -= 2;

1224 cstate->line_buf.data[cstate->line_buf.len] = '0円';

1225 break;

1226 case EOL_UNKNOWN:

1227 /* shouldn't get here */

1228 Assert(false);

1229 break;

1230 }

1231 }

1232

1233 /* Now it's safe to use the buffer in error messages */

1234 cstate->line_buf_valid = true;

1235

1236 return result;

1237}

1238

1239/*

1240 * CopyReadLineText - inner loop of CopyReadLine for text mode

1241 */

1242static bool

1243 CopyReadLineText(CopyFromState cstate, bool is_csv)

1244{

1245 char *copy_input_buf;

1246 int input_buf_ptr;

1247 int copy_buf_len;

1248 bool need_data = false;

1249 bool hit_eof = false;

1250 bool result = false;

1251

1252 /* CSV variables */

1253 bool in_quote = false,

1254 last_was_esc = false;

1255 char quotec = '0円';

1256 char escapec = '0円';

1257

1258 if (is_csv)

1259 {

1260 quotec = cstate->opts.quote[0];

1261 escapec = cstate->opts.escape[0];

1262 /* ignore special escape processing if it's the same as quotec */

1263 if (quotec == escapec)

1264 escapec = '0円';

1265 }

1266

1267 /*

1268 * The objective of this loop is to transfer the entire next input line

1269 * into line_buf. Hence, we only care for detecting newlines (\r and/or

1270 * \n) and the end-of-copy marker (\.).

1271 *

1272 * In CSV mode, \r and \n inside a quoted field are just part of the data

1273 * value and are put in line_buf. We keep just enough state to know if we

1274 * are currently in a quoted field or not.

1275 *

1276 * The input has already been converted to the database encoding. All

1277 * supported server encodings have the property that all bytes in a

1278 * multi-byte sequence have the high bit set, so a multibyte character

1279 * cannot contain any newline or escape characters embedded in the

1280 * multibyte sequence. Therefore, we can process the input byte-by-byte,

1281 * regardless of the encoding.

1282 *

1283 * For speed, we try to move data from input_buf to line_buf in chunks

1284 * rather than one character at a time. input_buf_ptr points to the next

1285 * character to examine; any characters from input_buf_index to

1286 * input_buf_ptr have been determined to be part of the line, but not yet

1287 * transferred to line_buf.

1288 *

1289 * For a little extra speed within the loop, we copy input_buf and

1290 * input_buf_len into local variables.

1291 */

1292 copy_input_buf = cstate->input_buf;

1293 input_buf_ptr = cstate->input_buf_index;

1294 copy_buf_len = cstate->input_buf_len;

1295

1296 for (;;)

1297 {

1298 int prev_raw_ptr;

1299 char c;

1300

1301 /*

1302 * Load more data if needed.

1303 *

1304 * TODO: We could just force four bytes of read-ahead and avoid the

1305 * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(). That was

1306 * unsafe with the old v2 COPY protocol, but we don't support that

1307 * anymore.

1308 */

1309 if (input_buf_ptr >= copy_buf_len || need_data)

1310 {

1311 REFILL_LINEBUF;

1312

1313 CopyLoadInputBuf(cstate);

1314 /* update our local variables */

1315 hit_eof = cstate->input_reached_eof;

1316 input_buf_ptr = cstate->input_buf_index;

1317 copy_buf_len = cstate->input_buf_len;

1318

1319 /*

1320 * If we are completely out of data, break out of the loop,

1321 * reporting EOF.

1322 */

1323 if (INPUT_BUF_BYTES(cstate) <= 0)

1324 {

1325 result = true;

1326 break;

1327 }

1328 need_data = false;

1329 }

1330

1331 /* OK to fetch a character */

1332 prev_raw_ptr = input_buf_ptr;

1333 c = copy_input_buf[input_buf_ptr++];

1334

1335 if (is_csv)

1336 {

1337 /*

1338 * If character is '\r', we may need to look ahead below. Force

1339 * fetch of the next character if we don't already have it. We

1340 * need to do this before changing CSV state, in case '\r' is also

1341 * the quote or escape character.

1342 */

1343 if (c == '\r')

1344 {

1345 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);

1346 }

1347

1348 /*

1349 * Dealing with quotes and escapes here is mildly tricky. If the

1350 * quote char is also the escape char, there's no problem - we

1351 * just use the char as a toggle. If they are different, we need

1352 * to ensure that we only take account of an escape inside a

1353 * quoted field and immediately preceding a quote char, and not

1354 * the second in an escape-escape sequence.

1355 */

1356 if (in_quote && c == escapec)

1357 last_was_esc = !last_was_esc;

1358 if (c == quotec && !last_was_esc)

1359 in_quote = !in_quote;

1360 if (c != escapec)

1361 last_was_esc = false;

1362

1363 /*

1364 * Updating the line count for embedded CR and/or LF chars is

1365 * necessarily a little fragile - this test is probably about the

1366 * best we can do. (XXX it's arguable whether we should do this

1367 * at all --- is cur_lineno a physical or logical count?)

1368 */

1369 if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))

1370 cstate->cur_lineno++;

1371 }

1372

1373 /* Process \r */

1374 if (c == '\r' && (!is_csv || !in_quote))

1375 {

1376 /* Check for \r\n on first line, _and_ handle \r\n. */

1377 if (cstate->eol_type == EOL_UNKNOWN ||

1378 cstate->eol_type == EOL_CRNL)

1379 {

1380 /*

1381 * If need more data, go back to loop top to load it.

1382 *

1383 * Note that if we are at EOF, c will wind up as '0円' because

1384 * of the guaranteed pad of input_buf.

1385 */

1386 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);

1387

1388 /* get next char */

1389 c = copy_input_buf[input_buf_ptr];

1390

1391 if (c == '\n')

1392 {

1393 input_buf_ptr++; /* eat newline */

1394 cstate->eol_type = EOL_CRNL; /* in case not set yet */

1395 }

1396 else

1397 {

1398 /* found \r, but no \n */

1399 if (cstate->eol_type == EOL_CRNL)

1400 ereport(ERROR,

1401 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1402 !is_csv ?

1403 errmsg("literal carriage return found in data") :

1404 errmsg("unquoted carriage return found in data"),

1405 !is_csv ?

1406 errhint("Use \"\\r\" to represent carriage return.") :

1407 errhint("Use quoted CSV field to represent carriage return.")));

1408

1409 /*

1410 * if we got here, it is the first line and we didn't find

1411 * \n, so don't consume the peeked character

1412 */

1413 cstate->eol_type = EOL_CR;

1414 }

1415 }

1416 else if (cstate->eol_type == EOL_NL)

1417 ereport(ERROR,

1418 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1419 !is_csv ?

1420 errmsg("literal carriage return found in data") :

1421 errmsg("unquoted carriage return found in data"),

1422 !is_csv ?

1423 errhint("Use \"\\r\" to represent carriage return.") :

1424 errhint("Use quoted CSV field to represent carriage return.")));

1425 /* If reach here, we have found the line terminator */

1426 break;

1427 }

1428

1429 /* Process \n */

1430 if (c == '\n' && (!is_csv || !in_quote))

1431 {

1432 if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)

1433 ereport(ERROR,

1434 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1435 !is_csv ?

1436 errmsg("literal newline found in data") :

1437 errmsg("unquoted newline found in data"),

1438 !is_csv ?

1439 errhint("Use \"\\n\" to represent newline.") :

1440 errhint("Use quoted CSV field to represent newline.")));

1441 cstate->eol_type = EOL_NL; /* in case not set yet */

1442 /* If reach here, we have found the line terminator */

1443 break;

1444 }

1445

1446 /*

1447 * Process backslash, except in CSV mode where backslash is a normal

1448 * character.

1449 */

1450 if (c == '\\' && !is_csv)

1451 {

1452 char c2;

1453

1454 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);

1455 IF_NEED_REFILL_AND_EOF_BREAK(0);

1456

1457 /* -----

1458 * get next character

1459 * Note: we do not change c so if it isn't \., we can fall

1460 * through and continue processing.

1461 * -----

1462 */

1463 c2 = copy_input_buf[input_buf_ptr];

1464

1465 if (c2 == '.')

1466 {

1467 input_buf_ptr++; /* consume the '.' */

1468 if (cstate->eol_type == EOL_CRNL)

1469 {

1470 /* Get the next character */

1471 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);

1472 /* if hit_eof, c2 will become '0円' */

1473 c2 = copy_input_buf[input_buf_ptr++];

1474

1475 if (c2 == '\n')

1476 ereport(ERROR,

1477 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1478 errmsg("end-of-copy marker does not match previous newline style")));

1479 else if (c2 != '\r')

1480 ereport(ERROR,

1481 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1482 errmsg("end-of-copy marker is not alone on its line")));

1483 }

1484

1485 /* Get the next character */

1486 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);

1487 /* if hit_eof, c2 will become '0円' */

1488 c2 = copy_input_buf[input_buf_ptr++];

1489

1490 if (c2 != '\r' && c2 != '\n')

1491 ereport(ERROR,

1492 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1493 errmsg("end-of-copy marker is not alone on its line")));

1494

1495 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||

1496 (cstate->eol_type == EOL_CRNL && c2 != '\n') ||

1497 (cstate->eol_type == EOL_CR && c2 != '\r'))

1498 ereport(ERROR,

1499 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1500 errmsg("end-of-copy marker does not match previous newline style")));

1501

1502 /*

1503 * If there is any data on this line before the \., complain.

1504 */

1505 if (cstate->line_buf.len > 0 ||

1506 prev_raw_ptr > cstate->input_buf_index)

1507 ereport(ERROR,

1508 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1509 errmsg("end-of-copy marker is not alone on its line")));

1510

1511 /*

1512 * Discard the \. and newline, then report EOF.

1513 */

1514 cstate->input_buf_index = input_buf_ptr;

1515 result = true; /* report EOF */

1516 break;

1517 }

1518 else

1519 {

1520 /*

1521 * If we are here, it means we found a backslash followed by

1522 * something other than a period. In non-CSV mode, anything

1523 * after a backslash is special, so we skip over that second

1524 * character too. If we didn't do that \\. would be

1525 * considered an eof-of copy, while in non-CSV mode it is a

1526 * literal backslash followed by a period.

1527 */

1528 input_buf_ptr++;

1529 }

1530 }

1531 } /* end of outer loop */

1532

1533 /*

1534 * Transfer any still-uncopied data to line_buf.

1535 */

1536 REFILL_LINEBUF;

1537

1538 return result;

1539}

1540

1541/*

1542 * Return decimal value for a hexadecimal digit

1543 */

1544static int

1545 GetDecimalFromHex(char hex)

1546{

1547 if (isdigit((unsigned char) hex))

1548 return hex - '0';

1549 else

1550 return pg_ascii_tolower((unsigned char) hex) - 'a' + 10;

1551}

1552

1553/*

1554 * Parse the current line into separate attributes (fields),

1555 * performing de-escaping as needed.

1556 *

1557 * The input is in line_buf. We use attribute_buf to hold the result

1558 * strings. cstate->raw_fields[k] is set to point to the k'th attribute

1559 * string, or NULL when the input matches the null marker string.

1560 * This array is expanded as necessary.

1561 *

1562 * (Note that the caller cannot check for nulls since the returned

1563 * string would be the post-de-escaping equivalent, which may look

1564 * the same as some valid data string.)

1565 *

1566 * delim is the column delimiter string (must be just one byte for now).

1567 * null_print is the null marker string. Note that this is compared to

1568 * the pre-de-escaped input string.

1569 *

1570 * The return value is the number of fields actually read.

1571 */

1572static int

1573 CopyReadAttributesText(CopyFromState cstate)

1574{

1575 char delimc = cstate->opts.delim[0];

1576 int fieldno;

1577 char *output_ptr;

1578 char *cur_ptr;

1579 char *line_end_ptr;

1580

1581 /*

1582 * We need a special case for zero-column tables: check that the input

1583 * line is empty, and return.

1584 */

1585 if (cstate->max_fields <= 0)

1586 {

1587 if (cstate->line_buf.len != 0)

1588 ereport(ERROR,

1589 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1590 errmsg("extra data after last expected column")));

1591 return 0;

1592 }

1593

1594 resetStringInfo(&cstate->attribute_buf);

1595

1596 /*

1597 * The de-escaped attributes will certainly not be longer than the input

1598 * data line, so we can just force attribute_buf to be large enough and

1599 * then transfer data without any checks for enough space. We need to do

1600 * it this way because enlarging attribute_buf mid-stream would invalidate

1601 * pointers already stored into cstate->raw_fields[].

1602 */

1603 if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)

1604 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);

1605 output_ptr = cstate->attribute_buf.data;

1606

1607 /* set pointer variables for loop */

1608 cur_ptr = cstate->line_buf.data;

1609 line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;

1610

1611 /* Outer loop iterates over fields */

1612 fieldno = 0;

1613 for (;;)

1614 {

1615 bool found_delim = false;

1616 char *start_ptr;

1617 char *end_ptr;

1618 int input_len;

1619 bool saw_non_ascii = false;

1620

1621 /* Make sure there is enough space for the next value */

1622 if (fieldno >= cstate->max_fields)

1623 {

1624 cstate->max_fields *= 2;

1625 cstate->raw_fields =

1626 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));

1627 }

1628

1629 /* Remember start of field on both input and output sides */

1630 start_ptr = cur_ptr;

1631 cstate->raw_fields[fieldno] = output_ptr;

1632

1633 /*

1634 * Scan data for field.

1635 *

1636 * Note that in this loop, we are scanning to locate the end of field

1637 * and also speculatively performing de-escaping. Once we find the

1638 * end-of-field, we can match the raw field contents against the null

1639 * marker string. Only after that comparison fails do we know that

1640 * de-escaping is actually the right thing to do; therefore we *must

1641 * not* throw any syntax errors before we've done the null-marker

1642 * check.

1643 */

1644 for (;;)

1645 {

1646 char c;

1647

1648 end_ptr = cur_ptr;

1649 if (cur_ptr >= line_end_ptr)

1650 break;

1651 c = *cur_ptr++;

1652 if (c == delimc)

1653 {

1654 found_delim = true;

1655 break;

1656 }

1657 if (c == '\\')

1658 {

1659 if (cur_ptr >= line_end_ptr)

1660 break;

1661 c = *cur_ptr++;

1662 switch (c)

1663 {

1664 case '0':

1665 case '1':

1666 case '2':

1667 case '3':

1668 case '4':

1669 case '5':

1670 case '6':

1671 case '7':

1672 {

1673 /* handle 013円 */

1674 int val;

1675

1676 val = OCTVALUE(c);

1677 if (cur_ptr < line_end_ptr)

1678 {

1679 c = *cur_ptr;

1680 if (ISOCTAL(c))

1681 {

1682 cur_ptr++;

1683 val = (val << 3) + OCTVALUE(c);

1684 if (cur_ptr < line_end_ptr)

1685 {

1686 c = *cur_ptr;

1687 if (ISOCTAL(c))

1688 {

1689 cur_ptr++;

1690 val = (val << 3) + OCTVALUE(c);

1691 }

1692 }

1693 }

1694 }

1695 c = val & 0377;

1696 if (c == '0円' || IS_HIGHBIT_SET(c))

1697 saw_non_ascii = true;

1698 }

1699 break;

1700 case 'x':

1701 /* Handle \x3F */

1702 if (cur_ptr < line_end_ptr)

1703 {

1704 char hexchar = *cur_ptr;

1705

1706 if (isxdigit((unsigned char) hexchar))

1707 {

1708 int val = GetDecimalFromHex(hexchar);

1709

1710 cur_ptr++;

1711 if (cur_ptr < line_end_ptr)

1712 {

1713 hexchar = *cur_ptr;

1714 if (isxdigit((unsigned char) hexchar))

1715 {

1716 cur_ptr++;

1717 val = (val << 4) + GetDecimalFromHex(hexchar);

1718 }

1719 }

1720 c = val & 0xff;

1721 if (c == '0円' || IS_HIGHBIT_SET(c))

1722 saw_non_ascii = true;

1723 }

1724 }

1725 break;

1726 case 'b':

1727 c = '\b';

1728 break;

1729 case 'f':

1730 c = '\f';

1731 break;

1732 case 'n':

1733 c = '\n';

1734 break;

1735 case 'r':

1736 c = '\r';

1737 break;

1738 case 't':

1739 c = '\t';

1740 break;

1741 case 'v':

1742 c = '\v';

1743 break;

1744

1745 /*

1746 * in all other cases, take the char after '\'

1747 * literally

1748 */

1749 }

1750 }

1751

1752 /* Add c to output string */

1753 *output_ptr++ = c;

1754 }

1755

1756 /* Check whether raw input matched null marker */

1757 input_len = end_ptr - start_ptr;

1758 if (input_len == cstate->opts.null_print_len &&

1759 strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)

1760 cstate->raw_fields[fieldno] = NULL;

1761 /* Check whether raw input matched default marker */

1762 else if (fieldno < list_length(cstate->attnumlist) &&

1763 cstate->opts.default_print &&

1764 input_len == cstate->opts.default_print_len &&

1765 strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)

1766 {

1767 /* fieldno is 0-indexed and attnum is 1-indexed */

1768 int m = list_nth_int(cstate->attnumlist, fieldno) - 1;

1769

1770 if (cstate->defexprs[m] != NULL)

1771 {

1772 /* defaults contain entries for all physical attributes */

1773 cstate->defaults[m] = true;

1774 }

1775 else

1776 {

1777 TupleDesc tupDesc = RelationGetDescr(cstate->rel);

1778 Form_pg_attribute att = TupleDescAttr(tupDesc, m);

1779

1780 ereport(ERROR,

1781 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1782 errmsg("unexpected default marker in COPY data"),

1783 errdetail("Column \"%s\" has no default value.",

1784 NameStr(att->attname))));

1785 }

1786 }

1787 else

1788 {

1789 /*

1790 * At this point we know the field is supposed to contain data.

1791 *

1792 * If we de-escaped any non-7-bit-ASCII chars, make sure the

1793 * resulting string is valid data for the db encoding.

1794 */

1795 if (saw_non_ascii)

1796 {

1797 char *fld = cstate->raw_fields[fieldno];

1798

1799 pg_verifymbstr(fld, output_ptr - fld, false);

1800 }

1801 }

1802

1803 /* Terminate attribute value in output area */

1804 *output_ptr++ = '0円';

1805

1806 fieldno++;

1807 /* Done if we hit EOL instead of a delim */

1808 if (!found_delim)

1809 break;

1810 }

1811

1812 /* Clean up state of attribute_buf */

1813 output_ptr--;

1814 Assert(*output_ptr == '0円');

1815 cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);

1816

1817 return fieldno;

1818}

1819

1820/*

1821 * Parse the current line into separate attributes (fields),

1822 * performing de-escaping as needed. This has exactly the same API as

1823 * CopyReadAttributesText, except we parse the fields according to

1824 * "standard" (i.e. common) CSV usage.

1825 */

1826static int

1827 CopyReadAttributesCSV(CopyFromState cstate)

1828{

1829 char delimc = cstate->opts.delim[0];

1830 char quotec = cstate->opts.quote[0];

1831 char escapec = cstate->opts.escape[0];

1832 int fieldno;

1833 char *output_ptr;

1834 char *cur_ptr;

1835 char *line_end_ptr;

1836

1837 /*

1838 * We need a special case for zero-column tables: check that the input

1839 * line is empty, and return.

1840 */

1841 if (cstate->max_fields <= 0)

1842 {

1843 if (cstate->line_buf.len != 0)

1844 ereport(ERROR,

1845 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1846 errmsg("extra data after last expected column")));

1847 return 0;

1848 }

1849

1850 resetStringInfo(&cstate->attribute_buf);

1851

1852 /*

1853 * The de-escaped attributes will certainly not be longer than the input

1854 * data line, so we can just force attribute_buf to be large enough and

1855 * then transfer data without any checks for enough space. We need to do

1856 * it this way because enlarging attribute_buf mid-stream would invalidate

1857 * pointers already stored into cstate->raw_fields[].

1858 */

1859 if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)

1860 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);

1861 output_ptr = cstate->attribute_buf.data;

1862

1863 /* set pointer variables for loop */

1864 cur_ptr = cstate->line_buf.data;

1865 line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;

1866

1867 /* Outer loop iterates over fields */

1868 fieldno = 0;

1869 for (;;)

1870 {

1871 bool found_delim = false;

1872 bool saw_quote = false;

1873 char *start_ptr;

1874 char *end_ptr;

1875 int input_len;

1876

1877 /* Make sure there is enough space for the next value */

1878 if (fieldno >= cstate->max_fields)

1879 {

1880 cstate->max_fields *= 2;

1881 cstate->raw_fields =

1882 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));

1883 }

1884

1885 /* Remember start of field on both input and output sides */

1886 start_ptr = cur_ptr;

1887 cstate->raw_fields[fieldno] = output_ptr;

1888

1889 /*

1890 * Scan data for field,

1891 *

1892 * The loop starts in "not quote" mode and then toggles between that

1893 * and "in quote" mode. The loop exits normally if it is in "not

1894 * quote" mode and a delimiter or line end is seen.

1895 */

1896 for (;;)

1897 {

1898 char c;

1899

1900 /* Not in quote */

1901 for (;;)

1902 {

1903 end_ptr = cur_ptr;

1904 if (cur_ptr >= line_end_ptr)

1905 goto endfield;

1906 c = *cur_ptr++;

1907 /* unquoted field delimiter */

1908 if (c == delimc)

1909 {

1910 found_delim = true;

1911 goto endfield;

1912 }

1913 /* start of quoted field (or part of field) */

1914 if (c == quotec)

1915 {

1916 saw_quote = true;

1917 break;

1918 }

1919 /* Add c to output string */

1920 *output_ptr++ = c;

1921 }

1922

1923 /* In quote */

1924 for (;;)

1925 {

1926 end_ptr = cur_ptr;

1927 if (cur_ptr >= line_end_ptr)

1928 ereport(ERROR,

1929 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1930 errmsg("unterminated CSV quoted field")));

1931

1932 c = *cur_ptr++;

1933

1934 /* escape within a quoted field */

1935 if (c == escapec)

1936 {

1937 /*

1938 * peek at the next char if available, and escape it if it

1939 * is an escape char or a quote char

1940 */

1941 if (cur_ptr < line_end_ptr)

1942 {

1943 char nextc = *cur_ptr;

1944

1945 if (nextc == escapec || nextc == quotec)

1946 {

1947 *output_ptr++ = nextc;

1948 cur_ptr++;

1949 continue;

1950 }

1951 }

1952 }

1953

1954 /*

1955 * end of quoted field. Must do this test after testing for

1956 * escape in case quote char and escape char are the same

1957 * (which is the common case).

1958 */

1959 if (c == quotec)

1960 break;

1961

1962 /* Add c to output string */

1963 *output_ptr++ = c;

1964 }

1965 }

1966endfield:

1967

1968 /* Terminate attribute value in output area */

1969 *output_ptr++ = '0円';

1970

1971 /* Check whether raw input matched null marker */

1972 input_len = end_ptr - start_ptr;

1973 if (!saw_quote && input_len == cstate->opts.null_print_len &&

1974 strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)

1975 cstate->raw_fields[fieldno] = NULL;

1976 /* Check whether raw input matched default marker */

1977 else if (fieldno < list_length(cstate->attnumlist) &&

1978 cstate->opts.default_print &&

1979 input_len == cstate->opts.default_print_len &&

1980 strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)

1981 {

1982 /* fieldno is 0-index and attnum is 1-index */

1983 int m = list_nth_int(cstate->attnumlist, fieldno) - 1;

1984

1985 if (cstate->defexprs[m] != NULL)

1986 {

1987 /* defaults contain entries for all physical attributes */

1988 cstate->defaults[m] = true;

1989 }

1990 else

1991 {

1992 TupleDesc tupDesc = RelationGetDescr(cstate->rel);

1993 Form_pg_attribute att = TupleDescAttr(tupDesc, m);

1994

1995 ereport(ERROR,

1996 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

1997 errmsg("unexpected default marker in COPY data"),

1998 errdetail("Column \"%s\" has no default value.",

1999 NameStr(att->attname))));

2000 }

2001 }

2002

2003 fieldno++;

2004 /* Done if we hit EOL instead of a delim */

2005 if (!found_delim)

2006 break;

2007 }

2008

2009 /* Clean up state of attribute_buf */

2010 output_ptr--;

2011 Assert(*output_ptr == '0円');

2012 cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);

2013

2014 return fieldno;

2015}

2016

2017

2018/*

2019 * Read a binary attribute

2020 */

2021static Datum

2022 CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,

2023 Oid typioparam, int32 typmod,

2024 bool *isnull)

2025{

2026 int32 fld_size;

2027 Datum result;

2028

2029 if (!CopyGetInt32(cstate, &fld_size))

2030 ereport(ERROR,

2031 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

2032 errmsg("unexpected EOF in COPY data")));

2033 if (fld_size == -1)

2034 {

2035 *isnull = true;

2036 return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);

2037 }

2038 if (fld_size < 0)

2039 ereport(ERROR,

2040 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

2041 errmsg("invalid field size")));

2042

2043 /* reset attribute_buf to empty, and load raw data in it */

2044 resetStringInfo(&cstate->attribute_buf);

2045

2046 enlargeStringInfo(&cstate->attribute_buf, fld_size);

2047 if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,

2048 fld_size) != fld_size)

2049 ereport(ERROR,

2050 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),

2051 errmsg("unexpected EOF in COPY data")));

2052

2053 cstate->attribute_buf.len = fld_size;

2054 cstate->attribute_buf.data[fld_size] = '0円';

2055

2056 /* Call the column type's binary input converter */

2057 result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,

2058 typioparam, typmod);

2059

2060 /* Trouble if it didn't eat the whole buffer */

2061 if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)

2062 ereport(ERROR,

2063 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),

2064 errmsg("incorrect binary data format")));

2065

2066 *isnull = false;

2067 return result;

2068}

AttrNumber

int16 AttrNumber

Definition: attnum.h:21

pgstat_progress_update_param

void pgstat_progress_update_param(int index, int64 val)

Definition: backend_progress.c:48

values

static Datum values[MAXATTR]

Definition: bootstrap.c:153

builtins.h

NameStr

#define NameStr(name)

Definition: c.h:751

Min

#define Min(x, y)

Definition: c.h:1003

IS_HIGHBIT_SET

#define IS_HIGHBIT_SET(ch)

Definition: c.h:1154

pg_attribute_always_inline

#define pg_attribute_always_inline

Definition: c.h:269

int16

int16_t int16

Definition: c.h:533

int32

int32_t int32

Definition: c.h:534

uint16

uint16_t uint16

Definition: c.h:537

uint32

uint32_t uint32

Definition: c.h:538

MemSet

#define MemSet(start, val, len)

Definition: c.h:1019

copyapi.h

CopyLimitPrintoutLength

char * CopyLimitPrintoutLength(const char *str)

Definition: copyfrom.c:333

copyfrom_internal.h

RAW_BUF_BYTES

#define RAW_BUF_BYTES(cstate)

Definition: copyfrom_internal.h:181

INPUT_BUF_SIZE

#define INPUT_BUF_SIZE

Definition: copyfrom_internal.h:160

EOL_CR

@ EOL_CR

Definition: copyfrom_internal.h:39

EOL_CRNL

@ EOL_CRNL

Definition: copyfrom_internal.h:40

EOL_UNKNOWN

@ EOL_UNKNOWN

Definition: copyfrom_internal.h:37

EOL_NL

@ EOL_NL

Definition: copyfrom_internal.h:38

INPUT_BUF_BYTES

#define INPUT_BUF_BYTES(cstate)

Definition: copyfrom_internal.h:167

RAW_BUF_SIZE

#define RAW_BUF_SIZE

Definition: copyfrom_internal.h:174

CopyFromTextLikeOneRow

static pg_attribute_always_inline bool CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls, bool is_csv)

Definition: copyfromparse.c:946

NextCopyFromRawFieldsInternal

static pg_attribute_always_inline bool NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)

Definition: copyfromparse.c:771

CopyFromTextOneRow

bool CopyFromTextOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)

Definition: copyfromparse.c:925

CopyFromCSVOneRow

bool CopyFromCSVOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)

Definition: copyfromparse.c:933

CopyReadAttributesCSV

static int CopyReadAttributesCSV(CopyFromState cstate)

Definition: copyfromparse.c:1827

CopyGetInt16

static bool CopyGetInt16(CopyFromState cstate, int16 *val)

Definition: copyfromparse.c:379

CopyConversionError

static void CopyConversionError(CopyFromState cstate)

Definition: copyfromparse.c:533

CopyGetInt32

static bool CopyGetInt32(CopyFromState cstate, int32 *val)

Definition: copyfromparse.c:362

CopyLoadRawBuf

static void CopyLoadRawBuf(CopyFromState cstate)

Definition: copyfromparse.c:590

OCTVALUE

#define OCTVALUE(c)

Definition: copyfromparse.c:79

REFILL_LINEBUF

#define REFILL_LINEBUF

Definition: copyfromparse.c:126

CopyLoadInputBuf

static void CopyLoadInputBuf(CopyFromState cstate)

Definition: copyfromparse.c:650

ISOCTAL

#define ISOCTAL(c)

Definition: copyfromparse.c:78

ReceiveCopyBinaryHeader

void ReceiveCopyBinaryHeader(CopyFromState cstate)

Definition: copyfromparse.c:190

CopyGetData

static int CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)

Definition: copyfromparse.c:245

CopyReadBinaryAttribute

static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull)

Definition: copyfromparse.c:2022

GetDecimalFromHex

static int GetDecimalFromHex(char hex)

Definition: copyfromparse.c:1545

ReceiveCopyBegin

void ReceiveCopyBegin(CopyFromState cstate)

Definition: copyfromparse.c:170

CopyReadLineText

static bool CopyReadLineText(CopyFromState cstate, bool is_csv)

Definition: copyfromparse.c:1243

IF_NEED_REFILL_AND_EOF_BREAK

#define IF_NEED_REFILL_AND_EOF_BREAK(extralen)

Definition: copyfromparse.c:109

CopyReadAttributesText

static int CopyReadAttributesText(CopyFromState cstate)

Definition: copyfromparse.c:1573

BinarySignature

static const char BinarySignature[11]

Definition: copyfromparse.c:139

IF_NEED_REFILL_AND_NOT_EOF_CONTINUE

#define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen)

Definition: copyfromparse.c:97

CopyReadLine

static bool CopyReadLine(CopyFromState cstate, bool is_csv)

Definition: copyfromparse.c:1167

CopyReadBinaryData

static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)

Definition: copyfromparse.c:701

CopyFromBinaryOneRow

bool CopyFromBinaryOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)

Definition: copyfromparse.c:1095

CopyConvertBuf

static void CopyConvertBuf(CopyFromState cstate)

Definition: copyfromparse.c:400

NextCopyFrom

bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)

Definition: copyfromparse.c:880

NextCopyFromRawFields

bool NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)

Definition: copyfromparse.c:747

COPY_FILE

@ COPY_FILE

Definition: copyto.c:45

COPY_CALLBACK

@ COPY_CALLBACK

Definition: copyto.c:47

COPY_FRONTEND

@ COPY_FRONTEND

Definition: copyto.c:46

cur

struct cursor * cur

Definition: ecpg.c:29

errcode_for_file_access

int errcode_for_file_access(void)

Definition: elog.c:877

errdetail

int errdetail(const char *fmt,...)

Definition: elog.c:1207

errhint

int errhint(const char *fmt,...)

Definition: elog.c:1321

errcode

int errcode(int sqlerrcode)

Definition: elog.c:854

errmsg

int errmsg(const char *fmt,...)

Definition: elog.c:1071

ERROR

#define ERROR

Definition: elog.h:39

elog

#define elog(elevel,...)

Definition: elog.h:226

NOTICE

#define NOTICE

Definition: elog.h:35

ereport

#define ereport(elevel,...)

Definition: elog.h:150

executor.h

ExecEvalExpr

static Datum ExecEvalExpr(ExprState *state, ExprContext *econtext, bool *isNull)

Definition: executor.h:390

InputFunctionCallSafe

bool InputFunctionCallSafe(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod, Node *escontext, Datum *result)

Definition: fmgr.c:1584

ReceiveFunctionCall

Datum ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf, Oid typioparam, int32 typmod)

Definition: fmgr.c:1696

Assert

Assert(PointerIsAligned(start, uint64))

COPY_ON_ERROR_STOP

@ COPY_ON_ERROR_STOP

Definition: copy.h:36

COPY_LOG_VERBOSITY_VERBOSE

@ COPY_LOG_VERBOSITY_VERBOSE

Definition: copy.h:48

COPY_HEADER_MATCH

#define COPY_HEADER_MATCH

Definition: copy.h:26

COPY_HEADER_FALSE

#define COPY_HEADER_FALSE

Definition: copy.h:27

val

long val

Definition: informix.c:689

i

int i

Definition: isn.c:77

if

if(TABLE==NULL||TABLE_index==NULL)

Definition: isn.c:81

libpq.h

pq_flush

#define pq_flush()

Definition: libpq.h:46

PQ_SMALL_MESSAGE_LIMIT

#define PQ_SMALL_MESSAGE_LIMIT

Definition: libpq.h:30

PQ_LARGE_MESSAGE_LIMIT

#define PQ_LARGE_MESSAGE_LIMIT

Definition: libpq.h:31

GetDatabaseEncoding

int GetDatabaseEncoding(void)

Definition: mbutils.c:1262

pg_verifymbstr

bool pg_verifymbstr(const char *mbstr, int len, bool noError)

Definition: mbutils.c:1557

pg_do_encoding_conversion_buf

int pg_do_encoding_conversion_buf(Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)

Definition: mbutils.c:470

report_invalid_encoding

void report_invalid_encoding(int encoding, const char *mbstr, int len)

Definition: mbutils.c:1699

repalloc

void * repalloc(void *pointer, Size size)

Definition: mcxt.c:1610

pfree

void pfree(void *pointer)

Definition: mcxt.c:1594

CurrentMemoryContext

MemoryContext CurrentMemoryContext

Definition: mcxt.c:160

miscadmin.h

HOLD_CANCEL_INTERRUPTS

#define HOLD_CANCEL_INTERRUPTS()

Definition: miscadmin.h:141

RESUME_CANCEL_INTERRUPTS

#define RESUME_CANCEL_INTERRUPTS()

Definition: miscadmin.h:143

namestrcmp

int namestrcmp(Name name, const char *str)

Definition: name.c:247

generate_unaccent_rules.dest

dest

Definition: generate_unaccent_rules.py:285

attnum

int16 attnum

Definition: pg_attribute.h:74

Form_pg_attribute

FormData_pg_attribute * Form_pg_attribute

Definition: pg_attribute.h:202

format

static char format

Definition: pg_basebackup.c:134

pg_bswap.h

pg_ntoh32

#define pg_ntoh32(x)

Definition: pg_bswap.h:125

pg_ntoh16

#define pg_ntoh16(x)

Definition: pg_bswap.h:124

list_length

static int list_length(const List *l)

Definition: pg_list.h:152

lfirst_int

#define lfirst_int(lc)

Definition: pg_list.h:173

list_nth_int

static int list_nth_int(const List *list, int n)

Definition: pg_list.h:310

buf

static char * buf

Definition: pg_test_fsync.c:72

pg_wchar.h

MAX_CONVERSION_INPUT_LENGTH

#define MAX_CONVERSION_INPUT_LENGTH

Definition: pg_wchar.h:320

pgstat.h

pg_ascii_tolower

unsigned char pg_ascii_tolower(unsigned char ch)

Definition: pgstrcasecmp.c:146

postgres.h

Datum

uint64_t Datum

Definition: postgres.h:70

Oid

unsigned int Oid

Definition: postgres_ext.h:32

pq_getmessage

int pq_getmessage(StringInfo s, int maxlen)

Definition: pqcomm.c:1203

pq_getbyte

int pq_getbyte(void)

Definition: pqcomm.c:963

pq_startmsgread

void pq_startmsgread(void)

Definition: pqcomm.c:1141

pq_getmsgstring

const char * pq_getmsgstring(StringInfo msg)

Definition: pqformat.c:579

pq_endmessage

void pq_endmessage(StringInfo buf)

Definition: pqformat.c:296

pq_copymsgbytes

void pq_copymsgbytes(StringInfo msg, void *buf, int datalen)

Definition: pqformat.c:528

pq_beginmessage

void pq_beginmessage(StringInfo buf, char msgtype)

Definition: pqformat.c:88

pqformat.h

pq_sendbyte

static void pq_sendbyte(StringInfo buf, uint8 byt)

Definition: pqformat.h:160

pq_sendint16

static void pq_sendint16(StringInfo buf, uint16 i)

Definition: pqformat.h:136

c

char * c

Definition: preproc-cursor.c:31

string

char string[11]

Definition: preproc-type.c:52

progress.h

PROGRESS_COPY_BYTES_PROCESSED

#define PROGRESS_COPY_BYTES_PROCESSED

Definition: progress.h:147

PqMsg_CopyDone

#define PqMsg_CopyDone

Definition: protocol.h:64

PqMsg_CopyData

#define PqMsg_CopyData

Definition: protocol.h:65

PqMsg_CopyInResponse

#define PqMsg_CopyInResponse

Definition: protocol.h:45

PqMsg_Sync

#define PqMsg_Sync

Definition: protocol.h:27

PqMsg_CopyFail

#define PqMsg_CopyFail

Definition: protocol.h:29

PqMsg_Flush

#define PqMsg_Flush

Definition: protocol.h:24

rel.h

RelationGetDescr

#define RelationGetDescr(relation)

Definition: rel.h:540

makeStringInfo

StringInfo makeStringInfo(void)

Definition: stringinfo.c:72

resetStringInfo

void resetStringInfo(StringInfo str)

Definition: stringinfo.c:126

enlargeStringInfo

void enlargeStringInfo(StringInfo str, int needed)

Definition: stringinfo.c:337

CopyFormatOptions::header_line

int header_line

Definition: copy.h:64

CopyFormatOptions::default_print_len

int default_print_len

Definition: copy.h:70

CopyFormatOptions::binary

bool binary

Definition: copy.h:61

CopyFormatOptions::null_print_len

int null_print_len

Definition: copy.h:67

CopyFormatOptions::log_verbosity

CopyLogVerbosityChoice log_verbosity

Definition: copy.h:85

CopyFormatOptions::quote

char * quote

Definition: copy.h:72

CopyFormatOptions::on_error

CopyOnErrorChoice on_error

Definition: copy.h:84

CopyFormatOptions::escape

char * escape

Definition: copy.h:73

CopyFormatOptions::null_print

char * null_print

Definition: copy.h:66

CopyFormatOptions::delim

char * delim

Definition: copy.h:71

CopyFormatOptions::force_notnull_flags

bool * force_notnull_flags

Definition: copy.h:79

CopyFormatOptions::csv_mode

bool csv_mode

Definition: copy.h:63

CopyFormatOptions::force_null_flags

bool * force_null_flags

Definition: copy.h:82

CopyFormatOptions::default_print

char * default_print

Definition: copy.h:69

CopyFromRoutine::CopyFromOneRow

bool(* CopyFromOneRow)(CopyFromState cstate, ExprContext *econtext, Datum *values, bool *nulls)

Definition: copyapi.h:96

CopyFromStateData

Definition: copyfrom_internal.h:60

CopyFromStateData::rel

Relation rel

Definition: copyfrom_internal.h:75

CopyFromStateData::input_buf_len

int input_buf_len

Definition: copyfrom_internal.h:163

CopyFromStateData::attnumlist

List * attnumlist

Definition: copyfrom_internal.h:76

CopyFromStateData::input_reached_error

bool input_reached_error

Definition: copyfrom_internal.h:165

CopyFromStateData::max_fields

int max_fields

Definition: copyfrom_internal.h:133

CopyFromStateData::defexprs

ExprState ** defexprs

Definition: copyfrom_internal.h:107

CopyFromStateData::data_source_cb

copy_data_source_cb data_source_cb

Definition: copyfrom_internal.h:79

CopyFromStateData::raw_reached_eof

bool raw_reached_eof

Definition: copyfrom_internal.h:178

CopyFromStateData::need_transcoding

bool need_transcoding

Definition: copyfrom_internal.h:71

CopyFromStateData::copy_file

FILE * copy_file

Definition: copyfrom_internal.h:66

CopyFromStateData::input_buf

char * input_buf

Definition: copyfrom_internal.h:161

CopyFromStateData::defaults

bool * defaults

Definition: copyfrom_internal.h:109

CopyFromStateData::copy_src

CopySource copy_src

Definition: copyfrom_internal.h:65

CopyFromStateData::routine

const struct CopyFromRoutine * routine

Definition: copyfrom_internal.h:62

CopyFromStateData::line_buf

StringInfoData line_buf

Definition: copyfrom_internal.h:143

CopyFromStateData::raw_buf

char * raw_buf

Definition: copyfrom_internal.h:175

CopyFromStateData::opts

CopyFormatOptions opts

Definition: copyfrom_internal.h:81

CopyFromStateData::attribute_buf

StringInfoData attribute_buf

Definition: copyfrom_internal.h:129

CopyFromStateData::raw_buf_index

int raw_buf_index

Definition: copyfrom_internal.h:176

CopyFromStateData::num_errors

uint64 num_errors

Definition: copyfrom_internal.h:103

CopyFromStateData::eol_type

EolType eol_type

Definition: copyfrom_internal.h:69

CopyFromStateData::file_encoding

int file_encoding

Definition: copyfrom_internal.h:70

CopyFromStateData::convert_select_flags

bool * convert_select_flags

Definition: copyfrom_internal.h:82

CopyFromStateData::bytes_processed

uint64 bytes_processed

Definition: copyfrom_internal.h:183

CopyFromStateData::raw_fields

char ** raw_fields

Definition: copyfrom_internal.h:134

CopyFromStateData::raw_buf_len

int raw_buf_len

Definition: copyfrom_internal.h:177

CopyFromStateData::cur_lineno

uint64 cur_lineno

Definition: copyfrom_internal.h:87

CopyFromStateData::line_buf_valid

bool line_buf_valid

Definition: copyfrom_internal.h:144

CopyFromStateData::cur_attval

const char * cur_attval

Definition: copyfrom_internal.h:89

CopyFromStateData::input_buf_index

int input_buf_index

Definition: copyfrom_internal.h:162

CopyFromStateData::typioparams

Oid * typioparams

Definition: copyfrom_internal.h:100

CopyFromStateData::fe_msgbuf

StringInfo fe_msgbuf

Definition: copyfrom_internal.h:67

CopyFromStateData::cur_attname

const char * cur_attname

Definition: copyfrom_internal.h:88

CopyFromStateData::in_functions

FmgrInfo * in_functions

Definition: copyfrom_internal.h:99

CopyFromStateData::defmap

int * defmap

Definition: copyfrom_internal.h:105

CopyFromStateData::num_defaults

AttrNumber num_defaults

Definition: copyfrom_internal.h:97

CopyFromStateData::relname_only

bool relname_only

Definition: copyfrom_internal.h:90

CopyFromStateData::input_reached_eof

bool input_reached_eof

Definition: copyfrom_internal.h:164

CopyFromStateData::conversion_proc

Oid conversion_proc

Definition: copyfrom_internal.h:72

CopyFromStateData::escontext

ErrorSaveContext * escontext

Definition: copyfrom_internal.h:101

ExprContext

Definition: execnodes.h:268

ExprContext::ecxt_per_tuple_memory

MemoryContext ecxt_per_tuple_memory

Definition: execnodes.h:281

ExprState

Definition: execnodes.h:85

FmgrInfo

Definition: fmgr.h:57

Node

Definition: nodes.h:135

StringInfoData

Definition: stringinfo.h:47

StringInfoData::maxlen

int maxlen

Definition: stringinfo.h:50

StringInfoData::cursor

int cursor

Definition: stringinfo.h:51

StringInfoData::data

char * data

Definition: stringinfo.h:48

StringInfoData::len

int len

Definition: stringinfo.h:49

TupleDescData

Definition: tupdesc.h:136

TupleDescData::natts

int natts

Definition: tupdesc.h:137

TupleDescAttr

static FormData_pg_attribute * TupleDescAttr(TupleDesc tupdesc, int i)

Definition: tupdesc.h:160

ListCell

Definition: pg_list.h:46

unistd.h

pg_encoding_verifymbstr

int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)

Definition: wchar.c:2202

pg_encoding_max_length

int pg_encoding_max_length(int encoding)

Definition: wchar.c:2213

PostgreSQL Source Code: src/backend/commands/copyfromparse.c Source File