1
+ import os
2
+ import pyodbc
3
+ import logging
4
+ import json
5
+ from sentence_transformers import SentenceTransformer
6
+ from dotenv import load_dotenv
7
+ from utilities import get_mssql_connection
8
+
9
+ load_dotenv ()
10
+
11
+ if __name__ == '__main__' :
12
+ print ('Initializing sample...' )
13
+ print ('Getting embeddings...' )
14
+ sentences = [
15
+ 'The dog is barking' ,
16
+ 'The cat is purring' ,
17
+ 'The bear is growling'
18
+ ]
19
+ model = SentenceTransformer ('multi-qa-MiniLM-L6-cos-v1' )
20
+ embeddings = model .encode (sentences )
21
+
22
+ print ('Cleaning up the database...' )
23
+ try :
24
+ conn = get_mssql_connection ()
25
+ conn .execute ("DELETE FROM dbo.document_embeddings;" )
26
+ conn .execute ("DELETE FROM dbo.documents;" )
27
+ conn .commit ();
28
+ finally :
29
+ conn .close ()
30
+
31
+ print ('Saving documents and embeddings in the database...' )
32
+ try :
33
+ conn = get_mssql_connection ()
34
+ cursor = conn .cursor ()
35
+
36
+ for content , embedding in zip (sentences , embeddings ):
37
+ cursor .execute (f"""
38
+ INSERT INTO dbo.documents (content, embedding) VALUES (?, ?);
39
+ INSERT INTO dbo.document_embeddings SELECT SCOPE_IDENTITY(), CAST([key] AS INT), CAST([value] AS FLOAT) FROM OPENJSON(?);
40
+ """ ,
41
+ content ,
42
+ json .dumps (embedding .tolist ()),
43
+ json .dumps (embedding .tolist ())
44
+ )
45
+
46
+ cursor .close ()
47
+ conn .commit ()
48
+ finally :
49
+ conn .close ()
50
+
51
+ print ('Searching for similar documents...' )
52
+ print ('Getting embeddings...' )
53
+ query = 'growling bear'
54
+ embedding = model .encode (query )
55
+
56
+ print ('Querying database...' )
57
+ k = 5
58
+ try :
59
+ conn = get_mssql_connection ()
60
+ cursor = conn .cursor ()
61
+
62
+ results = cursor .execute (f"""
63
+ DECLARE @k INT = ?;
64
+ WITH keyword_search AS (
65
+ SELECT TOP(@k)
66
+ id,
67
+ ftt.[RANK] AS rank
68
+ FROM
69
+ dbo.documents
70
+ INNER JOIN
71
+ FREETEXTTABLE(dbo.documents, *, ?) AS ftt ON dbo.documents.id = ftt.[KEY]
72
+ ),
73
+ semantic_search AS
74
+ (
75
+ SELECT
76
+ id,
77
+ rank
78
+ FROM
79
+ dbo.similar_documents(?)
80
+ )
81
+ SELECT TOP(@k)
82
+ COALESCE(ss.id, ks.id) AS id,
83
+ COALESCE(1.0 / (@k + ss.rank), 0.0) +
84
+ COALESCE(1.0 / (@k + ks.rank), 0.0) AS score -- Reciprocal Rank Fusion (RRF)
85
+ FROM
86
+ semantic_search ss
87
+ FULL OUTER JOIN
88
+ keyword_search ks ON ss.id = ks.id
89
+ ORDER BY
90
+ score DESC
91
+ """ ,
92
+ k ,
93
+ query ,
94
+ json .dumps (embedding .tolist ()),
95
+ )
96
+
97
+ for row in results :
98
+ print ('document:' , row [0 ], 'RRF score:' , row [1 ])
99
+
100
+ cursor .close ()
101
+ conn .commit ()
102
+ finally :
103
+ conn .close ()
0 commit comments