Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit f7c0f7a

Browse files
author
Rinat Mukhtarov
committed
renames, fix error
1 parent ee665a3 commit f7c0f7a

File tree

2 files changed

+21
-20
lines changed

2 files changed

+21
-20
lines changed
Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,40 @@
11
/*
2-
Выгрузка из БД:
2+
Выгрузка данных из БД:
33
psql postgresql://user@host:6001/db_name \
4-
--command='\copy (select id,pid,word,checked from sphinx_wordforms order by word) to stdout csv' > sphinx_wordforms.csv
5-
xz -zc9 --threads=8 sphinx_wordforms.csv > sphinx_wordforms.csv.xz
4+
--command='\copy (select id,pid,word,checked from public.wordforms order by word) to stdout csv' \
5+
> wordforms.csv
6+
xz -zc9 --threads=8 wordforms.csv > wordforms.csv.xz
67
*/
78

89
CREATE EXTENSION IF NOT EXISTS fuzzymatch;
910
CREATE EXTENSION IF NOT EXISTS pg_trgm;
1011

11-
CREATE TABLE public.sphinx_wordforms (
12+
CREATE TABLE public.wordforms (
1213
id integer GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
1314
pid integer DEFAULT 0 NOT NULL,
1415
word text NOT NULL check (length(word) between 1 and 100),
1516
checked boolean
1617
);
17-
--ALTER TABLE public.sphinx_wordforms OWNER TO alexan;
18+
--ALTER TABLE public.wordforms OWNER TO alexan;
1819

19-
COMMENT ON TABLE public.sphinx_wordforms IS 'Слова русского языка со словоформами';
20-
COMMENT ON COLUMN public.sphinx_wordforms.id IS 'Идентификатор';
21-
COMMENT ON COLUMN public.sphinx_wordforms.pid IS 'Идентификатор леммы (начальной, словарной формы слова)';
22-
COMMENT ON COLUMN public.sphinx_wordforms.word IS 'Слово или словосочетание в нижнем регистре';
23-
COMMENT ON COLUMN public.sphinx_wordforms.checked IS 'Проверенное слово?';
20+
COMMENT ON TABLE public.wordforms IS 'Слова русского языка со словоформами';
21+
COMMENT ON COLUMN public.wordforms.id IS 'Идентификатор';
22+
COMMENT ON COLUMN public.wordforms.pid IS 'Идентификатор леммы (начальной, словарной формы слова)';
23+
COMMENT ON COLUMN public.wordforms.word IS 'Слово или словосочетание в нижнем регистре';
24+
COMMENT ON COLUMN public.wordforms.checked IS 'Проверенное слово?';
2425

25-
\copy public.sphinx_wordforms from program 'xzcat sphinx_wordforms.csv.xz' with (format csv, header true);
26+
\copy public.wordforms from program 'xzcat wordforms.csv.xz' with (format csv, header true);
2627

2728
-- создавать индексы после вставки данных быстрее, чем наоборот
2829

29-
CREATE INDEX idx_sphinx_wordforms_pid ON public.sphinx_wordforms USING btree (pid);
30-
CREATE UNIQUE INDEX idx_sphinx_wordforms_wildspeed_word_unique_lower ON public.sphinx_wordforms USING btree (lower((word)) varchar_pattern_ops);
31-
CREATE INDEX idx_sphinx_wordforms_word ON public.sphinx_wordforms USING btree (word);
30+
CREATE INDEX idx_sphinx_wordforms_pid ON public.wordforms USING btree (pid);
31+
CREATE UNIQUE INDEX idx_sphinx_wordforms_wildspeed_word_unique_lower ON public.wordforms USING btree (lower((word)) varchar_pattern_ops);
32+
CREATE INDEX idx_sphinx_wordforms_word ON public.wordforms USING btree (word);
3233

3334
-- создавать внешние ключи после создания индексов быстрее, чем наоборот
34-
ALTER TABLE ONLY public.sphinx_wordforms ADD CONSTRAINT v3_sphinx_wordforms_fk1 FOREIGN KEY (pid) REFERENCES public.sphinx_wordforms(id);
35+
ALTER TABLE ONLY public.wordforms ADD CONSTRAINT v3_sphinx_wordforms_fk1 FOREIGN KEY (pid) REFERENCES public.wordforms(id);
3536

36-
CREATE INDEX /*CONCURRENTLY*/ IF NOT EXISTS custom_query_group_name_name_trigram_index ON public.custom_query_group_name USING GIN (lower(name) gin_trgm_ops);
37-
CREATE INDEX /*CONCURRENTLY*/ IF NOT EXISTS sphinx_wordforms_word_trigram_index ON public.sphinx_wordforms USING GIN (lower(word) gin_trgm_ops);
37+
--CREATE INDEX /*CONCURRENTLY*/ IF NOT EXISTS custom_query_group_name_name_trigram_index ON public.custom_query_group_name USING GIN (lower(name) gin_trgm_ops);
38+
CREATE INDEX /*CONCURRENTLY*/ IF NOT EXISTS sphinx_wordforms_word_trigram_index ON public.wordforms USING GIN (lower(word) gin_trgm_ops);
3839

39-
SELECT COUNT(*) FROM sphinx_wordforms; -- 1,241,857 записей
40+
SELECT COUNT(*) FROM public.wordforms; -- 1,241,857 записей

‎functions/typos_correct/typos_correct.sql‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ WITH
5757
-- есть слово в словаре русского языка?
5858
NOT EXISTS(
5959
SELECT
60-
FROM sphinx_wordforms AS dict
60+
FROM public.wordforms AS dict
6161
WHERE lower(dict.word) = lower(q.word_from)
6262
AND checked = TRUE
6363
)
@@ -96,7 +96,7 @@ WITH
9696
levenshtein(q.word_from, t.word, vars.ins_cost, vars.del_cost, vars.sub_cost))::numeric, 4) AS levenshtein_distance3, -- среднее геометрическое
9797
round((1 - sqrt(levenshtein(q.word_from, t.word) *
9898
levenshtein(q.word_from, t.word, vars.ins_cost, vars.del_cost, vars.sub_cost)) / length(t.word))::numeric, 4) AS levenshtein_rank3
99-
FROM sphinx_wordforms AS t, vars
99+
FROM public.wordforms AS t, vars
100100
WHERE lower(t.word) % q.word_from -- используем GIN индекс!
101101
) AS t
102102
WHERE TRUE

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /