Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit fd80079

Browse files
Microsoft Copilot with Elasticsearch supporting blog content (#456)
1 parent 36a4a78 commit fd80079

File tree

7 files changed

+830
-0
lines changed

7 files changed

+830
-0
lines changed
Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "386dbd82",
6+
"metadata": {},
7+
"source": [
8+
"# Enhance Microsoft Copilot with Elasticsearch\n",
9+
"\n",
10+
"This notebook execute an API that allows you to search for invoices using Elasticsearch generating a Ngrok tunnel to expose the API to the internet. This notebook is based on the article [Enhance Microsoft Copilot with Elasticsearch](https://www.elastic.co/blog/enhance-microsoft-copilot-with-elasticsearch)."
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": null,
16+
"id": "d460f865",
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"%pip install fastapi pyngrok uvicorn nest-asyncio elasticsearch==9 -q"
21+
]
22+
},
23+
{
24+
"cell_type": "code",
25+
"execution_count": null,
26+
"id": "3ac47371",
27+
"metadata": {},
28+
"outputs": [],
29+
"source": [
30+
"import os\n",
31+
"import json\n",
32+
"from getpass import getpass\n",
33+
"from datetime import datetime\n",
34+
"\n",
35+
"import nest_asyncio\n",
36+
"import uvicorn\n",
37+
"\n",
38+
"from fastapi import FastAPI, Query\n",
39+
"from pyngrok import conf, ngrok\n",
40+
"\n",
41+
"from elasticsearch.helpers import bulk\n",
42+
"from elasticsearch import Elasticsearch"
43+
]
44+
},
45+
{
46+
"cell_type": "markdown",
47+
"id": "64167eee",
48+
"metadata": {},
49+
"source": [
50+
"## Setup Variables"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": null,
56+
"id": "aa378fdb",
57+
"metadata": {},
58+
"outputs": [],
59+
"source": [
60+
"os.environ[\"ELASTICSEARCH_ENDPOINT\"] = getpass(\"Elastic Endpoint: \")\n",
61+
"os.environ[\"ELASTICSEARCH_API_KEY\"] = getpass(\"Elastic Api Key: \")\n",
62+
"os.environ[\"NGROK_AUTH_TOKEN\"] = getpass(\"Ngrok Auth Token: \")\n",
63+
"\n",
64+
"\n",
65+
"INDEX_NAME = \"invoices\""
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"id": "31041b60",
71+
"metadata": {},
72+
"source": [
73+
"## Elasticsearch client"
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": null,
79+
"id": "4d8a8201",
80+
"metadata": {},
81+
"outputs": [],
82+
"source": [
83+
"_client = Elasticsearch(\n",
84+
" os.environ[\"ELASTICSEARCH_ENDPOINT\"],\n",
85+
" api_key=os.environ[\"ELASTICSEARCH_API_KEY\"],\n",
86+
")"
87+
]
88+
},
89+
{
90+
"cell_type": "markdown",
91+
"id": "07578680",
92+
"metadata": {},
93+
"source": [
94+
"## Mappings"
95+
]
96+
},
97+
{
98+
"cell_type": "code",
99+
"execution_count": null,
100+
"id": "c34a804a",
101+
"metadata": {},
102+
"outputs": [],
103+
"source": [
104+
"try:\n",
105+
" _client.indices.create(\n",
106+
" index=INDEX_NAME,\n",
107+
" body={\n",
108+
" \"mappings\": {\n",
109+
" \"properties\": {\n",
110+
" \"id\": {\"type\": \"keyword\"},\n",
111+
" \"file_url\": {\"type\": \"keyword\"},\n",
112+
" \"issue_date\": {\"type\": \"date\"},\n",
113+
" \"description\": {\"type\": \"text\", \"copy_to\": \"semantic_field\"},\n",
114+
" \"services\": {\n",
115+
" \"type\": \"object\",\n",
116+
" \"properties\": {\n",
117+
" \"name\": {\n",
118+
" \"type\": \"text\",\n",
119+
" \"copy_to\": \"semantic_field\",\n",
120+
" },\n",
121+
" \"price\": {\"type\": \"float\"},\n",
122+
" },\n",
123+
" },\n",
124+
" \"total_amount\": {\n",
125+
" \"type\": \"float\",\n",
126+
" },\n",
127+
" \"semantic_field\": {\"type\": \"semantic_text\"},\n",
128+
" }\n",
129+
" }\n",
130+
" },\n",
131+
" )\n",
132+
"\n",
133+
" print(\"index created successfully\")\n",
134+
"except Exception as e:\n",
135+
" print(\n",
136+
" f\"Error creating inference endpoint: {e.info['error']['root_cause'][0]['reason'] }\"\n",
137+
" )"
138+
]
139+
},
140+
{
141+
"cell_type": "markdown",
142+
"id": "02a2c25a",
143+
"metadata": {},
144+
"source": [
145+
"## Ingesting documents to Elasticsearch"
146+
]
147+
},
148+
{
149+
"cell_type": "code",
150+
"execution_count": null,
151+
"id": "69f388c0",
152+
"metadata": {},
153+
"outputs": [],
154+
"source": [
155+
"with open(\"invoices_data.json\", \"r\", encoding=\"utf-8\") as f:\n",
156+
" invoices = json.load(f)"
157+
]
158+
},
159+
{
160+
"cell_type": "code",
161+
"execution_count": null,
162+
"id": "b96c42fb",
163+
"metadata": {},
164+
"outputs": [],
165+
"source": [
166+
"def build_data():\n",
167+
" for doc in invoices:\n",
168+
" yield {\"_index\": INDEX_NAME, \"_source\": doc}\n",
169+
"\n",
170+
"\n",
171+
"try:\n",
172+
" success, errors = bulk(_client, build_data())\n",
173+
" print(f\"{success} documents indexed successfully\")\n",
174+
"\n",
175+
" if errors:\n",
176+
" print(\"Errors during indexing:\", errors)\n",
177+
"\n",
178+
"except Exception as e:\n",
179+
" print(f\"Error: {str(e)}, please wait some seconds and try again.\")"
180+
]
181+
},
182+
{
183+
"cell_type": "markdown",
184+
"id": "d38c1869",
185+
"metadata": {},
186+
"source": [
187+
"## Building API"
188+
]
189+
},
190+
{
191+
"cell_type": "code",
192+
"execution_count": null,
193+
"id": "2ad221fb",
194+
"metadata": {},
195+
"outputs": [],
196+
"source": [
197+
"app = FastAPI()"
198+
]
199+
},
200+
{
201+
"cell_type": "code",
202+
"execution_count": null,
203+
"id": "76106dad",
204+
"metadata": {},
205+
"outputs": [],
206+
"source": [
207+
"@app.get(\"/search/semantic\")\n",
208+
"async def search_semantic(query: str = Query(None)):\n",
209+
" try:\n",
210+
" result = _client.search(\n",
211+
" index=INDEX_NAME,\n",
212+
" query={\n",
213+
" \"semantic\": {\n",
214+
" \"field\": \"semantic_field\",\n",
215+
" \"query\": query,\n",
216+
" }\n",
217+
" },\n",
218+
" )\n",
219+
"\n",
220+
" hits = result[\"hits\"][\"hits\"]\n",
221+
" results = [{\"score\": hit[\"_score\"], **hit[\"_source\"]} for hit in hits]\n",
222+
"\n",
223+
" return results\n",
224+
" except Exception as e:\n",
225+
" return Exception(f\"Error: {str(e)}\")\n",
226+
"\n",
227+
"\n",
228+
"@app.get(\"/search/by-date\")\n",
229+
"async def search_by_date(from_date: str = Query(None), to_date: str = Query(None)):\n",
230+
" try:\n",
231+
" from_dt = datetime.strptime(from_date, \"%m/%d/%Y %I:%M:%S %p\")\n",
232+
" to_dt = datetime.strptime(to_date, \"%m/%d/%Y %I:%M:%S %p\")\n",
233+
"\n",
234+
" formatted_from = from_dt.strftime(\"%d/%m/%Y\")\n",
235+
" formatted_to = to_dt.strftime(\"%d/%m/%Y\")\n",
236+
"\n",
237+
" result = _client.search(\n",
238+
" index=INDEX_NAME,\n",
239+
" query={\n",
240+
" \"range\": {\n",
241+
" \"issue_date\": {\n",
242+
" \"gte\": formatted_from,\n",
243+
" \"lte\": formatted_to,\n",
244+
" \"format\": \"dd/MM/yyyy\",\n",
245+
" }\n",
246+
" }\n",
247+
" },\n",
248+
" )\n",
249+
"\n",
250+
" hits = result[\"hits\"][\"hits\"]\n",
251+
" results = [hit[\"_source\"] for hit in hits]\n",
252+
"\n",
253+
" return results\n",
254+
" except Exception as e:\n",
255+
" return Exception(f\"Error: {str(e)}\")"
256+
]
257+
},
258+
{
259+
"cell_type": "markdown",
260+
"id": "cf1460e9",
261+
"metadata": {},
262+
"source": [
263+
"## Running the API"
264+
]
265+
},
266+
{
267+
"cell_type": "code",
268+
"execution_count": null,
269+
"id": "517c85c3",
270+
"metadata": {},
271+
"outputs": [],
272+
"source": [
273+
"conf.get_default().auth_token = os.environ[\"NGROK_AUTH_TOKEN\"]\n",
274+
"ngrok_tunnel = ngrok.connect(8000)\n",
275+
"\n",
276+
"print(\"Public URL:\", ngrok_tunnel.public_url)\n",
277+
"\n",
278+
"nest_asyncio.apply()\n",
279+
"uvicorn.run(app, port=8000)"
280+
]
281+
},
282+
{
283+
"cell_type": "markdown",
284+
"id": "ccffd29a",
285+
"metadata": {},
286+
"source": [
287+
"## Delete the index"
288+
]
289+
},
290+
{
291+
"cell_type": "code",
292+
"execution_count": null,
293+
"id": "991ba4e4",
294+
"metadata": {},
295+
"outputs": [],
296+
"source": [
297+
"def print_results(results):\n",
298+
" if results.get(\"acknowledged\", False):\n",
299+
" print(\"DELETED successfully.\")\n",
300+
"\n",
301+
" if \"error\" in results:\n",
302+
" print(f\"ERROR: {results['error']['root_cause'][0]['reason']}\")\n",
303+
"\n",
304+
"\n",
305+
"# Cleanup - Delete Index\n",
306+
"result = _client.indices.delete(index=INDEX_NAME, ignore=[400, 404])\n",
307+
"print_results(result)"
308+
]
309+
}
310+
],
311+
"metadata": {
312+
"kernelspec": {
313+
"display_name": "Python 3",
314+
"language": "python",
315+
"name": "python3"
316+
},
317+
"language_info": {
318+
"codemirror_mode": {
319+
"name": "ipython",
320+
"version": 3
321+
},
322+
"file_extension": ".py",
323+
"mimetype": "text/x-python",
324+
"name": "python",
325+
"nbconvert_exporter": "python",
326+
"pygments_lexer": "ipython3",
327+
"version": "3.13.2"
328+
}
329+
},
330+
"nbformat": 4,
331+
"nbformat_minor": 5
332+
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /