Commit e8f446e

authored

Migrate Hybrid Search Labs Notebook from RRF to Retrievers API (Elasticsearch 9.x) (#459)

This commit migrates the Hybrid Search Labs Notebook to use the new Retrievers API in Elasticsearch 9.x, replacing the previous RRF-based implementation. The necessary changes to the JSON structure for the retriever have been addressed to ensure compatibility with the updated API. Note: While the migration covers the JSON and API integration, there may be underlying issues with the notebook execution or the Makefile that are not fully resolved in this commit. Further investigation and testing may be required to ensure smooth operation.

1 parent d11312a commit e8f446eCopy full SHA for e8f446e

File tree

1 file changed

+20

-16

lines changed

notebooks/search
- 02-hybrid-search.ipynb

1 file changed

+20

-16

lines changed

`‎notebooks/search/02-hybrid-search.ipynb‎`

Lines changed: 20 additions & 16 deletions

Original file line number	Diff line number	Diff line change
`@@ -196,21 +196,21 @@`
`196`	`196`	`},`
`197`	`197`	`{`
`198`	`198`	`"cell_type": "code",`
`199`		`- "execution_count": 5,`
	`199`	`+ "execution_count": null,`
`200`	`200`	`"metadata": {},`
`201`	`201`	`"outputs": [],`
`202`	`202`	`"source": [`
`203`	`203`	`"def pretty_response(response):\n",`
`204`	`204`	`" if len(response[\"hits\"][\"hits\"]) == 0:\n",`
`205`	`205`	`" print(\"Your search returned no results.\")\n",`
`206`	`206`	`" else:\n",`
`207`		`- " for hit in response[\"hits\"][\"hits\"]:\n",`
	`207`	`+ " for idx, hit in enumerate(response[\"hits\"][\"hits\"], start=1):\n",`
`208`	`208`	`" id = hit[\"_id\"]\n",`
`209`	`209`	`" publication_date = hit[\"_source\"][\"publish_date\"]\n",`
`210`		`- " rank = hit[\"_rank\"]\n",`
	`210`	`+ " score = hit[\"_score\"]\n",`
`211`	`211`	`" title = hit[\"_source\"][\"title\"]\n",`
`212`	`212`	`" summary = hit[\"_source\"][\"summary\"]\n",`
`213`		`- " pretty_output = f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nRank: {rank}\"\n",`
	`213`	`+ " pretty_output = f\"\\nID: {id}\\nPublication date: {publication_date}\\nTitle: {title}\\nSummary: {summary}\\nRank: {idx}\\nScore: {score}\"\n",`
`214`	`214`	`" print(pretty_output)"`
`215`	`215`	`]`
`216`	`216`	`},`
`@@ -231,12 +231,12 @@`
`231`	`231`	`"\n",`
`232`	`232`	`"We then use [Reciprocal Rank Fusion (RRF)](https://www.elastic.co/guide/en/elasticsearch/reference/current/rrf.html) to balance the scores to provide a final list of documents, ranked in order of relevance. RRF is a ranking algorithm for combining results from different information retrieval strategies.\n",`
`233`	`233`	`"\n",`
`234`		`- "Note that _score is null, and we instead use _rank to show our top-ranked documents."`
	`234`	`+ "Note: With the retriever API, _score contains the document’s relevance score, and the rank is simply the position in the results (first result is rank 1, etc.)."`
`235`	`235`	`]`
`236`	`236`	`},`
`237`	`237`	`{`
`238`	`238`	`"cell_type": "code",`
`239`		`- "execution_count": 6,`
	`239`	`+ "execution_count": null,`
`240`	`240`	`"metadata": {},`
`241`	`241`	`"outputs": [`
`242`	`242`	`{`
`@@ -280,18 +280,22 @@`
`280`	`280`	`"response = client.search(\n",`
`281`	`281`	`" index=\"book_index\",\n",`
`282`	`282`	`" size=5,\n",`
`283`		`- " query={\"match\": {\"summary\": \"python programming\"}},\n",`
`284`		`- " knn={\n",`
`285`		`- " \"field\": \"title_vector\",\n",`
`286`		`- " \"query_vector\": model.encode(\n",`
`287`		`- " \"python programming\"\n",`
`288`		- " ).tolist(), # generate embedding for query so it can be compared to `title_vector`\n",
`289`		`- " \"k\": 5,\n",`
`290`		`- " \"num_candidates\": 10,\n",`
	`283`	`+ " retriever={\n",`
	`284`	`+ " \"rrf\": {\n",`
	`285`	`+ " \"retrievers\": [\n",`
	`286`	`+ " {\"standard\": {\"query\": {\"match\": {\"summary\": \"python programming\"}}}},\n",`
	`287`	`+ " {\n",`
	`288`	`+ " \"knn\": {\n",`
	`289`	`+ " \"field\": \"title_vector\",\n",`
	`290`	`+ " \"query_vector\": model.encode(\"python programming\").tolist(),\n",`
	`291`	`+ " \"k\": 5,\n",`
	`292`	`+ " \"num_candidates\": 10,\n",`
	`293`	`+ " }\n",`
	`294`	`+ " },\n",`
	`295`	`+ " ]\n",`
	`296`	`+ " }\n",`
`291`	`297`	`" },\n",`
`292`		`- " rank={\"rrf\": {}},\n",`
`293`	`298`	`")\n",`
`294`		`- "\n",`
`295`	`299`	`"pretty_response(response)"`
`296`	`300`	`]`
`297`	`301`	`}`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit e8f446e

File tree

1 file changed

1 file changed

`‎notebooks/search/02-hybrid-search.ipynb‎`

0 commit comments