Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 00f2c8c

Browse files
Initial commit
0 parents commit 00f2c8c

File tree

7 files changed

+291
-0
lines changed

7 files changed

+291
-0
lines changed

‎.gitignore‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
jbang-wrapper/**
2+
data/**
3+
models/**
4+
5+
.vscode

‎README.md‎

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Demo: RAG with MariaDB
2+
3+
## Prerequisites
4+
- Docker
5+
6+
## Setup
7+
8+
Start the LocalAI and MariaDB services:
9+
```shell
10+
docker compose up -d
11+
```
12+
13+
Check the model downloads progress:
14+
```
15+
docker logs -f local-ai
16+
```
17+
18+
Download the dataset:
19+
https://www.kaggle.com/datasets/asaniczka/amazon-canada-products-2023年2月1日m-products
20+
21+
Create a slice of the dataset. For example 50k products:
22+
```shell
23+
head -n 50001 ~/Datasets/amz_ca_total_products_data_processed.csv > ~/Datasets/slice.csv
24+
```
25+
26+
Connect to the MariaDB server:
27+
```shell
28+
docker exec -it mariadb mariadb -u root -p'password' demo
29+
```
30+
31+
Load the data from the CSV file into the MariaDB database:
32+
```sql
33+
LOAD DATA LOCAL INFILE '/Users/alejandro/Datasets/slice.csv'
34+
INTO TABLE products
35+
FIELDS TERMINATED BY ','
36+
ENCLOSED BY '"'
37+
LINES TERMINATED BY '\n'
38+
IGNORE 1 LINES
39+
(
40+
asin,
41+
title,
42+
img_url,
43+
product_url,
44+
stars,
45+
reviews,
46+
price,
47+
list_price,
48+
category_name,
49+
is_best_seller,
50+
bought_in_last_month
51+
);
52+
```
53+
54+
Calculate the vector embeddings:
55+
```shell
56+
cd rag-demo
57+
./UpdateVectors.java
58+
```
59+
60+
## Run the demo
61+
62+
Start the demo:
63+
```shell
64+
./RagDemo.java
65+
```

‎RagDemo.java‎

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
///usr/bin/env ./jbang-wrapper/jbang --quiet "0ドル" "$@" ; exit $?
2+
3+
//DEPS com.fasterxml.jackson.core:jackson-databind:2.18.1
4+
//DEPS com.konghq:unirest-java:3.14.5
5+
//DEPS org.mariadb.jdbc:mariadb-java-client:3.5.0
6+
//DEPS org.sql2o:sql2o:1.8.0
7+
//DEPS org.slf4j:slf4j-simple:2.0.16
8+
9+
import com.fasterxml.jackson.databind.ObjectMapper;
10+
import kong.unirest.Unirest;
11+
import org.sql2o.*;
12+
import java.util.stream.Collectors;
13+
14+
public class RagDemo {
15+
16+
static {
17+
Unirest.config().socketTimeout(120000).connectTimeout(120000);
18+
}
19+
20+
public static void main(String[] args) throws Exception {
21+
var input = System.console().readLine("I'm looking for: ");
22+
23+
System.out.println("\nFinding closest products...");
24+
var context = getContext(input);
25+
System.out.println("-------------------------------------------------------------");
26+
System.out.println(context);
27+
System.out.println("-------------------------------------------------------------\n");
28+
29+
System.out.println("Generating response...");
30+
var prompt = buildPrompt(input, context);
31+
var response = getResponse(prompt);
32+
System.out.println(response);
33+
}
34+
35+
private static String getContext(String input) throws Exception {
36+
var requestBody = """
37+
{ "model": "bert-cpp-minilm-v6", "input": %s }
38+
""".formatted(new ObjectMapper().writeValueAsString(input));
39+
40+
var response = Unirest.post("http://localhost:8080/v1/embeddings")
41+
.header("Content-Type", "application/json")
42+
.body(requestBody)
43+
.asString().getBody();
44+
45+
var connection = new Sql2o(
46+
"jdbc:mariadb://127.0.0.1:3306/demo", "root", "password").open();
47+
48+
var table = connection.createQuery("""
49+
SELECT id, CONCAT(
50+
"Product: ", title, ". Stars: ", stars, ". Price: $", price, ". Category: ", category_name,
51+
". Best seller: ", CASE WHEN is_best_seller THEN "Yes" ELSE "No" END
52+
) AS description
53+
FROM products
54+
WHERE embedding IS NOT NULL
55+
ORDER BY VEC_Distance(embedding, VEC_FromText(JSON_EXTRACT(:response, '$.data[0].embedding')))
56+
LIMIT 10
57+
""")
58+
.addParameter("response", response)
59+
.executeAndFetchTable();
60+
61+
return table.rows().stream()
62+
.map(row -> row.getString("description"))
63+
.collect(Collectors.joining("\n\n"));
64+
}
65+
66+
private static String buildPrompt(String input, Object context) {
67+
return """
68+
You are a sales assistant. I'm looking for %s.
69+
70+
Using the following information, recommend me a product in one single paragraph:
71+
72+
%s
73+
""".formatted(input, context);
74+
}
75+
76+
private static String getResponse(String prompt) throws Exception {
77+
var requestBody = """
78+
{ "model": "phi-2", "messages": [{"role": "user", "content": %s, "temperature": 0.4}] }
79+
""".formatted(new ObjectMapper().writeValueAsString(prompt));
80+
81+
var response = Unirest.post("http://localhost:8080/v1/chat/completions")
82+
.header("Content-Type", "application/json")
83+
.body(requestBody)
84+
.asString().getBody();
85+
86+
var mapper = new ObjectMapper();
87+
var jsonNode = mapper.readTree(response);
88+
return jsonNode.path("choices").get(0).path("message").path("content").asText();
89+
}
90+
91+
}

‎UpdateVectors.java‎

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
///usr/bin/env ./jbang-wrapper/jbang --quiet "0ドル" "$@" ; exit $?
2+
3+
//DEPS com.fasterxml.jackson.core:jackson-databind:2.18.1
4+
//DEPS com.konghq:unirest-java:3.14.5
5+
//DEPS org.mariadb.jdbc:mariadb-java-client:3.5.0
6+
//DEPS org.sql2o:sql2o:1.8.0
7+
//DEPS org.slf4j:slf4j-simple:2.0.16
8+
9+
import com.fasterxml.jackson.databind.ObjectMapper;
10+
import kong.unirest.Unirest;
11+
import org.sql2o.*;
12+
import org.sql2o.data.*;
13+
14+
public class UpdateVectors {
15+
16+
public static void main(String[] args) throws Exception {
17+
var connection = new Sql2o(
18+
"jdbc:mariadb://127.0.0.1:3306/demo", "root", "password").open();
19+
20+
var table = connection.createQuery("""
21+
SELECT id, CONCAT(
22+
"Product: ", title, ". Stars: ", stars, ". Price: $", price, ". Category: ", category_name,
23+
". Best seller: ", CASE WHEN is_best_seller THEN "Yes" ELSE "No" END
24+
) AS description
25+
FROM products
26+
WHERE embedding IS NULL
27+
""").executeAndFetchTableLazy();
28+
29+
for (Row row : table.rows()) {
30+
var id = row.getString("id");
31+
var description = row.getString("description");
32+
33+
var requestBody = """
34+
{ "model": "bert-cpp-minilm-v6", "input": %s }
35+
""".formatted(new ObjectMapper().writeValueAsString(description));
36+
37+
var response = Unirest.post("http://localhost:8080/v1/embeddings")
38+
.header("Content-Type", "application/json")
39+
.body(requestBody)
40+
.asString().getBody();
41+
42+
connection.createQuery("""
43+
UPDATE products
44+
SET embedding = VEC_FromText(JSON_EXTRACT(:response, '$.data[0].embedding'))
45+
WHERE id = :id
46+
""")
47+
.addParameter("response", response)
48+
.addParameter("id", id)
49+
.executeUpdate();
50+
51+
System.out.println("Updated embedding for product ID: " + id);
52+
}
53+
54+
connection.close();
55+
}
56+
}

‎docker-compose.yml‎

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
services:
2+
mariadb:
3+
image: quay.io/mariadb-foundation/mariadb-devel:11.6-vector-preview
4+
container_name: mariadb
5+
environment:
6+
MARIADB_ROOT_PASSWORD: password
7+
MARIADB_DATABASE: demo
8+
volumes:
9+
- ./schema.sql:/docker-entrypoint-initdb.d/schema.sql
10+
- ./data:/var/lib/mysql
11+
ports:
12+
- "3306:3306"
13+
14+
local-ai:
15+
image: localai/localai:master-ffmpeg-core
16+
container_name: local-ai
17+
command: bert-cpp phi-3.5-mini-instruct
18+
ports:
19+
- "8080:8080"
20+
environment:
21+
- DEBUG=true
22+
volumes:
23+
- ./models:/build/models:cached

‎schema.sql‎

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
CREATE TABLE IF NOT EXISTS products (
2+
id SERIAL PRIMARY KEY,
3+
asin VARCHAR(20),
4+
title VARCHAR(255),
5+
img_url VARCHAR(255),
6+
product_url VARCHAR(255),
7+
stars DECIMAL(2, 1),
8+
reviews INT,
9+
price DECIMAL(10, 2),
10+
list_price DECIMAL(10, 2),
11+
category_name VARCHAR(100),
12+
is_best_seller BOOLEAN,
13+
bought_in_last_month INT,
14+
embedding BLOB
15+
);

‎vector_search_demo.sql‎

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
CREATE OR REPLACE TABLE objects (
2+
name VARCHAR(50),
3+
embedding BLOB NOT NULL DEFAULT 0,
4+
VECTOR INDEX embedding_idx (embedding)
5+
);
6+
7+
INSERT INTO objects (name, embedding)
8+
VALUES
9+
('Alarm clock', VEC_FromText("[0.001, 0]")),
10+
('Cow', VEC_FromText("[1.0, 0.05]")),
11+
('Bicycle', VEC_FromText("[0.2, 0.156]")),
12+
('Eagle', VEC_FromText("[0.03, 0.9]"));
13+
14+
SELECT name
15+
FROM objects
16+
ORDER BY VEC_Distance(
17+
embedding,
18+
VEC_FromText('[0.01, 0.01]') -- small and slow
19+
)
20+
LIMIT 1;
21+
22+
SELECT name
23+
FROM objects
24+
ORDER BY VEC_Distance(
25+
embedding,
26+
VEC_FromText('[0.9, 0.5]') -- big and semi-fast
27+
)
28+
LIMIT 1;
29+
30+
SELECT name
31+
FROM objects
32+
ORDER BY VEC_Distance(
33+
embedding,
34+
VEC_FromText('[0.2, 0.8]') -- small and fast
35+
)
36+
LIMIT 1;

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /