Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 3b19aa4

Browse files
author
auxten
committed
Add built-in libicu and tokenizer from Tencent/wcdb which is used in Wechat
1 parent 6c720c2 commit 3b19aa4

File tree

171 files changed

+96074
-6
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

171 files changed

+96074
-6
lines changed

‎_example/simple/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
#!/bin/bash
22

3-
CGO_ENABLED=1 GOARCH=amd64 GOOS=linux CGO_CFLAGS="-DSQLITE_OMIT_LOAD_EXTENSION" go build -ldflags "-linkmode external -extldflags -static" -a -v --tags "linux sqlite_omit_load_extension" && ldd simple
3+
CGO_ENABLED=1 GOARCH=amd64 GOOS=linux CGO_CFLAGS="-DSQLITE_OMIT_LOAD_EXTENSION" go build -ldflags "-linkmode external -extldflags -static" -a -v --tags "linux sqlite_omit_load_extension sqlite_vtable sqlite_fts5 sqlite_icu" && ldd simple

‎_example/simple/simple.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@ package main
33
import (
44
"database/sql"
55
"fmt"
6-
_ "github.com/CovenantSQL/go-sqlite3-encrypt"
76
"log"
87
"os"
8+
9+
_ "github.com/CovenantSQL/go-sqlite3-encrypt"
910
)
1011

1112
func main() {

‎fts3_tokenizer.h

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/*
2+
** 2006 July 10
3+
**
4+
** The author disclaims copyright to this source code.
5+
**
6+
*************************************************************************
7+
** Defines the interface to tokenizers used by fulltext-search. There
8+
** are three basic components:
9+
**
10+
** sqlite3_tokenizer_module is a singleton defining the tokenizer
11+
** interface functions. This is essentially the class structure for
12+
** tokenizers.
13+
**
14+
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
15+
** including customization information defined at creation time.
16+
**
17+
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
18+
** tokens from a particular input.
19+
*/
20+
#ifndef _FTS3_TOKENIZER_H_
21+
#define _FTS3_TOKENIZER_H_
22+
23+
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
24+
** If tokenizers are to be allowed to call sqlite3_*() functions, then
25+
** we will need a way to register the API consistently.
26+
*/
27+
#include "sqlite3-binding.h"
28+
29+
/*
30+
** Structures used by the tokenizer interface. When a new tokenizer
31+
** implementation is registered, the caller provides a pointer to
32+
** an sqlite3_tokenizer_module containing pointers to the callback
33+
** functions that make up an implementation.
34+
**
35+
** When an fts3 table is created, it passes any arguments passed to
36+
** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
37+
** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
38+
** implementation. The xCreate() function in turn returns an
39+
** sqlite3_tokenizer structure representing the specific tokenizer to
40+
** be used for the fts3 table (customized by the tokenizer clause arguments).
41+
**
42+
** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
43+
** method is called. It returns an sqlite3_tokenizer_cursor object
44+
** that may be used to tokenize a specific input buffer based on
45+
** the tokenization rules supplied by a specific sqlite3_tokenizer
46+
** object.
47+
*/
48+
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
49+
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
50+
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
51+
52+
struct sqlite3_tokenizer_module {
53+
54+
/*
55+
** Structure version. Should always be set to 0.
56+
*/
57+
int iVersion;
58+
59+
/*
60+
** Create a new tokenizer. The values in the argv[] array are the
61+
** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
62+
** TABLE statement that created the fts3 table. For example, if
63+
** the following SQL is executed:
64+
**
65+
** CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
66+
**
67+
** then argc is set to 2, and the argv[] array contains pointers
68+
** to the strings "arg1" and "arg2".
69+
**
70+
** This method should return either SQLITE_OK (0), or an SQLite error
71+
** code. If SQLITE_OK is returned, then *ppTokenizer should be set
72+
** to point at the newly created tokenizer structure. The generic
73+
** sqlite3_tokenizer.pModule variable should not be initialised by
74+
** this callback. The caller will do so.
75+
*/
76+
int (*xCreate)(
77+
int argc, /* Size of argv array */
78+
const char *const*argv, /* Tokenizer argument strings */
79+
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
80+
);
81+
82+
/*
83+
** Destroy an existing tokenizer. The fts3 module calls this method
84+
** exactly once for each successful call to xCreate().
85+
*/
86+
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
87+
88+
/*
89+
** Create a tokenizer cursor to tokenize an input buffer. The caller
90+
** is responsible for ensuring that the input buffer remains valid
91+
** until the cursor is closed (using the xClose() method).
92+
*/
93+
int (*xOpen)(
94+
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
95+
const char *pInput, int nBytes, /* Input buffer */
96+
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
97+
);
98+
99+
/*
100+
** Destroy an existing tokenizer cursor. The fts3 module calls this
101+
** method exactly once for each successful call to xOpen().
102+
*/
103+
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
104+
105+
/*
106+
** Retrieve the next token from the tokenizer cursor pCursor. This
107+
** method should either return SQLITE_OK and set the values of the
108+
** "OUT" variables identified below, or SQLITE_DONE to indicate that
109+
** the end of the buffer has been reached, or an SQLite error code.
110+
**
111+
** *ppToken should be set to point at a buffer containing the
112+
** normalized version of the token (i.e. after any case-folding and/or
113+
** stemming has been performed). *pnBytes should be set to the length
114+
** of this buffer in bytes. The input text that generated the token is
115+
** identified by the byte offsets returned in *piStartOffset and
116+
** *piEndOffset. *piStartOffset should be set to the index of the first
117+
** byte of the token in the input buffer. *piEndOffset should be set
118+
** to the index of the first byte just past the end of the token in
119+
** the input buffer.
120+
**
121+
** The buffer *ppToken is set to point at is managed by the tokenizer
122+
** implementation. It is only required to be valid until the next call
123+
** to xNext() or xClose().
124+
*/
125+
/* TODO(shess) current implementation requires pInput to be
126+
** nul-terminated. This should either be fixed, or pInput/nBytes
127+
** should be converted to zInput.
128+
*/
129+
int (*xNext)(
130+
sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
131+
const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
132+
int *piStartOffset, /* OUT: Byte offset of token in input buffer */
133+
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
134+
int *piPosition /* OUT: Number of tokens returned before this one */
135+
);
136+
};
137+
138+
struct sqlite3_tokenizer {
139+
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
140+
/* Tokenizer implementations will typically add additional fields */
141+
};
142+
143+
struct sqlite3_tokenizer_cursor {
144+
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
145+
/* Tokenizer implementations will typically add additional fields */
146+
};
147+
148+
int fts3_global_term_cnt(int iTerm, int iCol);
149+
int fts3_term_cnt(int iTerm, int iCol);
150+
151+
152+
#endif /* _FTS3_TOKENIZER_H_ */

‎icucompat.c

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/*
2+
* Tencent is pleased to support the open source community by making
3+
* WCDB available.
4+
*
5+
* Copyright (C) 2017 THL A29 Limited, a Tencent company.
6+
* All rights reserved.
7+
*
8+
* Licensed under the BSD 3-Clause License (the "License"); you may not use
9+
* this file except in compliance with the License. You may obtain a copy of
10+
* the License at
11+
*
12+
* https://opensource.org/licenses/BSD-3-Clause
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
21+
#if defined(__ANDROID__) && !defined(WCDB_ICU_DIRECT_LINKING)
22+
23+
#define ICUCOMPAT_IMPL
24+
25+
#include "icucompat.h"
26+
#include <dirent.h>
27+
#include <dlfcn.h>
28+
#include <stdio.h>
29+
#include <string.h>
30+
31+
static void *s_libicuuc_so = NULL;
32+
static void *s_libicui18n_so = NULL;
33+
icu_compat_t __g_icucompat_iface__ = {0};
34+
35+
static void *
36+
load_function(void *lib_handle, const char *func_name, const char *suffix)
37+
{
38+
char buf[128];
39+
size_t len = strlcpy(buf, func_name, sizeof(buf));
40+
strlcpy(buf + len, suffix, sizeof(buf) - len);
41+
42+
return dlsym(lib_handle, buf);
43+
}
44+
45+
static void load_functions(const char *suffix)
46+
{
47+
#define ICUCOMPAT_UC_FUNC(ret_type, func_name, arg_list) \
48+
(__g_icucompat_iface__.func_name##_) = \
49+
(ret_type(*) arg_list) load_function(s_libicuuc_so, #func_name, \
50+
suffix);
51+
#define ICUCOMPAT_I18N_FUNC(ret_type, func_name, arg_list) \
52+
(__g_icucompat_iface__.func_name##_) = \
53+
(ret_type(*) arg_list) load_function(s_libicui18n_so, #func_name, \
54+
suffix);
55+
56+
#include "icuprototype.h"
57+
58+
#undef ICUCOMPAT_UC_FUNC
59+
#undef ICUCOMPAT_I18N_FUNC
60+
}
61+
62+
static const char *find_icu_suffix(void *lib_handle, char *buf, size_t buflen)
63+
{
64+
DIR *icudir = opendir("/system/usr/icu");
65+
if (!icudir)
66+
return NULL;
67+
68+
struct dirent *dp;
69+
while ((dp = readdir(icudir)) != NULL) {
70+
if (dp->d_type == DT_REG) {
71+
// Find /system/usr/icu/icudt??l.dat
72+
const char *name = dp->d_name;
73+
if (strlen(name) == 12 && !strncmp("icudt", name, 5) &&
74+
name[5] >= '0' && name[5] <= '9' && name[6] >= '0' &&
75+
name[6] <= '9' && !strncmp("l.dat", &name[7], 6)) {
76+
snprintf(buf, buflen, "_%c%c", name[5], name[6]);
77+
78+
// Try loading with suffix.
79+
if (load_function(lib_handle, "u_getVersion", buf)) {
80+
closedir(icudir);
81+
return buf;
82+
}
83+
}
84+
}
85+
}
86+
closedir(icudir);
87+
88+
// Failed, try plain names without suffix.
89+
if (load_function(lib_handle, "u_getVersion", "")) {
90+
buf[0] = 0;
91+
return buf;
92+
}
93+
94+
return NULL;
95+
}
96+
97+
int init_icucompat()
98+
{
99+
char suffix[16];
100+
if (s_libicuuc_so || s_libicui18n_so)
101+
return 0;
102+
103+
s_libicuuc_so = dlopen("libicuuc.so", RTLD_LAZY);
104+
s_libicui18n_so = dlopen("libicui18n.so", RTLD_LAZY);
105+
if (!s_libicuuc_so || !s_libicui18n_so)
106+
goto bail;
107+
108+
if (!find_icu_suffix(s_libicuuc_so, suffix, sizeof(suffix)))
109+
goto bail;
110+
111+
load_functions(suffix);
112+
return 0;
113+
114+
bail:
115+
if (s_libicuuc_so)
116+
dlclose(s_libicuuc_so);
117+
if (s_libicui18n_so)
118+
dlclose(s_libicui18n_so);
119+
s_libicuuc_so = NULL;
120+
s_libicui18n_so = NULL;
121+
return -1;
122+
}
123+
124+
void destroy_icucompat()
125+
{
126+
if (s_libicuuc_so)
127+
dlclose(s_libicuuc_so);
128+
if (s_libicui18n_so)
129+
dlclose(s_libicui18n_so);
130+
s_libicuuc_so = NULL;
131+
s_libicui18n_so = NULL;
132+
}
133+
134+
#endif // defined(__ANDROID__) && !defined(WCDB_ICU_DIRECT_LINKING)

‎icucompat.h

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* Tencent is pleased to support the open source community by making
3+
* WCDB available.
4+
*
5+
* Copyright (C) 2017 THL A29 Limited, a Tencent company.
6+
* All rights reserved.
7+
*
8+
* Licensed under the BSD 3-Clause License (the "License"); you may not use
9+
* this file except in compliance with the License. You may obtain a copy of
10+
* the License at
11+
*
12+
* https://opensource.org/licenses/BSD-3-Clause
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
21+
#ifndef __ICU_COMPAT_H__
22+
#define __ICU_COMPAT_H__
23+
24+
#ifdef __cplusplus
25+
extern "C" {
26+
#endif
27+
28+
/* Disable ICU export renaming. It will be replaced with icucompat macros. */
29+
#define U_DISABLE_RENAMING 1
30+
#include <unicode/ubrk.h>
31+
#include <unicode/ucnv.h>
32+
#include <unicode/ucol.h>
33+
#include <unicode/uiter.h>
34+
#include <unicode/umachine.h>
35+
#include <unicode/unorm.h>
36+
#include <unicode/ustring.h>
37+
#include <unicode/utypes.h>
38+
#include <unicode/uversion.h>
39+
40+
typedef struct icu_compat_t {
41+
#define ICUCOMPAT_UC_FUNC(ret_type, func_name, arg_list) \
42+
ret_type(U_EXPORT2 *func_name##_) arg_list;
43+
#define ICUCOMPAT_I18N_FUNC(ret_type, func_name, arg_list) \
44+
ret_type(U_EXPORT2 *func_name##_) arg_list;
45+
46+
#include "icuprototype.h"
47+
48+
#undef ICUCOMPAT_UC_FUNC
49+
#undef ICUCOMPAT_I18N_FUNC
50+
} icu_compat_t;
51+
extern icu_compat_t __g_icucompat_iface__;
52+
53+
int init_icucompat();
54+
void destroy_icucompat();
55+
56+
#ifndef ICUCOMPAT_IMPL
57+
#define ICUCOMPAT_DEFINE_SYMBOL(symbol) (__g_icucompat_iface__.symbol##_)
58+
#include "icuprototype.h"
59+
#endif
60+
61+
#ifdef __cplusplus
62+
}
63+
#endif
64+
65+
#endif

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /