[フレーム]

scansup.c

Go to the documentation of this file.

1/*-------------------------------------------------------------------------

2 *

3 * scansup.c

4 * scanner support routines used by the core lexer

5 *

8 *

9 *

10 * IDENTIFICATION

11 * src/backend/parser/scansup.c

12 *

13 *-------------------------------------------------------------------------

14 */

15#include "postgres.h"

16

17#include <ctype.h>

18

19#include "mb/pg_wchar.h"

20#include "parser/scansup.h"

21

22

23/*

24 * downcase_truncate_identifier() --- do appropriate downcasing and

25 * truncation of an unquoted identifier. Optionally warn of truncation.

26 *

27 * Returns a palloc'd string containing the adjusted identifier.

28 *

29 * Note: in some usages the passed string is not null-terminated.

30 *

31 * Note: the API of this function is designed to allow for downcasing

32 * transformations that increase the string length, but we don't yet

33 * support that. If you want to implement it, you'll need to fix

34 * SplitIdentifierString() in utils/adt/varlena.c.

35 */

36char *

37 downcase_truncate_identifier(const char *ident, int len, bool warn)

38{

39 return downcase_identifier(ident, len, warn, true);

40}

41

42/*

43 * a workhorse for downcase_truncate_identifier

44 */

45char *

46 downcase_identifier(const char *ident, int len, bool warn, bool truncate)

47{

48 char *result;

49 int i;

50 bool enc_is_single_byte;

51

52 result = palloc(len + 1);

53 enc_is_single_byte = pg_database_encoding_max_length() == 1;

54

55 /*

56 * SQL99 specifies Unicode-aware case normalization, which we don't yet

57 * have the infrastructure for. Instead we use tolower() to provide a

58 * locale-aware translation. However, there are some locales where this

59 * is not right either (eg, Turkish may do strange things with 'i' and

60 * 'I'). Our current compromise is to use tolower() for characters with

61 * the high bit set, as long as they aren't part of a multi-byte

62 * character, and use an ASCII-only downcasing for 7-bit characters.

63 */

64 for (i = 0; i < len; i++)

65 {

66 unsigned char ch = (unsigned char) ident[i];

67

68 if (ch >= 'A' && ch <= 'Z')

69 ch += 'a' - 'A';

70 else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))

71 ch = tolower(ch);

72 result[i] = (char) ch;

73 }

74 result[i] = '0円';

75

76 if (i >= NAMEDATALEN && truncate)

77 truncate_identifier(result, i, warn);

78

79 return result;

80}

81

82

83/*

84 * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.

85 *

86 * The given string is modified in-place, if necessary. A warning is

87 * issued if requested.

88 *

89 * We require the caller to pass in the string length since this saves a

90 * strlen() call in some common usages.

91 */

92void

93 truncate_identifier(char *ident, int len, bool warn)

94{

95 if (len >= NAMEDATALEN)

96 {

97 len = pg_mbcliplen(ident, len, NAMEDATALEN - 1);

98 if (warn)

99 ereport(NOTICE,

100 (errcode(ERRCODE_NAME_TOO_LONG),

101 errmsg("identifier \"%s\" will be truncated to \"%.*s\"",

102 ident, len, ident)));

103 ident[len] = '0円';

104 }

105}

106

107/*

108 * scanner_isspace() --- return true if flex scanner considers char whitespace

109 *

110 * This should be used instead of the potentially locale-dependent isspace()

111 * function when it's important to match the lexer's behavior.

112 *

113 * In principle we might need similar functions for isalnum etc, but for the

114 * moment only isspace seems needed.

115 */

116bool

117 scanner_isspace(char ch)

118{

119 /* This must match scan.l's list of {space} characters */

120 if (ch == ' ' ||

121 ch == '\t' ||

122 ch == '\n' ||

123 ch == '\r' ||

124 ch == '\v' ||

125 ch == '\f')

126 return true;

127 return false;

128}

IS_HIGHBIT_SET

#define IS_HIGHBIT_SET(ch)

Definition: c.h:1154

errcode

int errcode(int sqlerrcode)

Definition: elog.c:854

errmsg

int errmsg(const char *fmt,...)

Definition: elog.c:1071

NOTICE

#define NOTICE

Definition: elog.h:35

ereport

#define ereport(elevel,...)

Definition: elog.h:150

ident

#define ident

Definition: indent_codes.h:47

i

int i

Definition: isn.c:77

pg_mbcliplen

int pg_mbcliplen(const char *mbstr, int len, int limit)

Definition: mbutils.c:1084

pg_database_encoding_max_length

int pg_database_encoding_max_length(void)

Definition: mbutils.c:1547

palloc

void * palloc(Size size)

Definition: mcxt.c:1365

NAMEDATALEN

#define NAMEDATALEN

Definition: pg_config_manual.h:29

len

const void size_t len

Definition: pg_crc32c_sse42.c:28

pg_wchar.h

postgres.h

downcase_identifier

char * downcase_identifier(const char *ident, int len, bool warn, bool truncate)

Definition: scansup.c:46

truncate_identifier

void truncate_identifier(char *ident, int len, bool warn)

Definition: scansup.c:93

downcase_truncate_identifier

char * downcase_truncate_identifier(const char *ident, int len, bool warn)

Definition: scansup.c:37

scanner_isspace

bool scanner_isspace(char ch)

Definition: scansup.c:117

scansup.h

warn

Definition: strftime.c:110

PostgreSQL Source Code: src/backend/parser/scansup.c Source File