|
196 | 196 | }, |
197 | 197 | { |
198 | 198 | "cell_type": "code", |
199 | | - "execution_count": 5, |
| 199 | + "execution_count": null, |
200 | 200 | "metadata": {}, |
201 | 201 | "outputs": [], |
202 | 202 | "source": [ |
203 | 203 | "def pretty_response(response):\n", |
204 | 204 | " if len(response[\"hits\"][\"hits\"]) == 0:\n", |
205 | 205 | " print(\"Your search returned no results.\")\n", |
206 | 206 | " else:\n", |
207 | | - " for hit in response[\"hits\"][\"hits\"]:\n", |
| 207 | + " for idx, hit in enumerate(response[\"hits\"][\"hits\"], start=1):\n", |
208 | 208 | " id = hit[\"_id\"]\n", |
209 | 209 | " publication_date = hit[\"_source\"][\"publish_date\"]\n", |
210 | | - " rank = hit[\"_rank\"]\n", |
| 210 | + " score = hit[\"_score\"]\n", |
211 | 211 | " title = hit[\"_source\"][\"title\"]\n", |
212 | 212 | " summary = hit[\"_source\"][\"summary\"]\n", |
213 | | - " pretty_output = f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nRank: {rank}\"\n", |
| 213 | + " pretty_output = f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nRank: {idx}\\nScore: {score}\"\n", |
214 | 214 | " print(pretty_output)" |
215 | 215 | ] |
216 | 216 | }, |
|
231 | 231 | "\n", |
232 | 232 | "We then use [Reciprocal Rank Fusion (RRF)](https://www.elastic.co/guide/en/elasticsearch/reference/current/rrf.html) to balance the scores to provide a final list of documents, ranked in order of relevance. RRF is a ranking algorithm for combining results from different information retrieval strategies.\n", |
233 | 233 | "\n", |
234 | | - "Note that _score is null, and we instead use _rank to show our top-ranked documents." |
| 234 | + "Note: With the retriever API, _score contains the document’s relevance score, and the rank is simply the position in the results (first result is rank 1, etc.)." |
235 | 235 | ] |
236 | 236 | }, |
237 | 237 | { |
238 | 238 | "cell_type": "code", |
239 | | - "execution_count": 6, |
| 239 | + "execution_count": null, |
240 | 240 | "metadata": {}, |
241 | 241 | "outputs": [ |
242 | 242 | { |
|
280 | 280 | "response = client.search(\n", |
281 | 281 | " index=\"book_index\",\n", |
282 | 282 | " size=5,\n", |
283 | | - " query={\"match\": {\"summary\": \"python programming\"}},\n", |
284 | | - " knn={\n", |
285 | | - " \"field\": \"title_vector\",\n", |
286 | | - " \"query_vector\": model.encode(\n", |
287 | | - " \"python programming\"\n", |
288 | | - " ).tolist(), # generate embedding for query so it can be compared to `title_vector`\n", |
289 | | - " \"k\": 5,\n", |
290 | | - " \"num_candidates\": 10,\n", |
| 283 | + " retriever={\n", |
| 284 | + " \"rrf\": {\n", |
| 285 | + " \"retrievers\": [\n", |
| 286 | + " {\"standard\": {\"query\": {\"match\": {\"summary\": \"python programming\"}}}},\n", |
| 287 | + " {\n", |
| 288 | + " \"knn\": {\n", |
| 289 | + " \"field\": \"title_vector\",\n", |
| 290 | + " \"query_vector\": model.encode(\"python programming\").tolist(),\n", |
| 291 | + " \"k\": 5,\n", |
| 292 | + " \"num_candidates\": 10,\n", |
| 293 | + " }\n", |
| 294 | + " },\n", |
| 295 | + " ]\n", |
| 296 | + " }\n", |
291 | 297 | " },\n", |
292 | | - " rank={\"rrf\": {}},\n", |
293 | 298 | ")\n", |
294 | | - "\n", |
295 | 299 | "pretty_response(response)" |
296 | 300 | ] |
297 | 301 | } |
|
0 commit comments