Commit 8c052dc

committed

fix type for streaming to conform with Microsoft Chat Protocol

1 parent 9af3296 commit 8c052dcCopy full SHA for 8c052dc

File tree

5 files changed

+138

-105

lines changed

src/backend/fastapi_app
- api_models.py
- rag_advanced.py
- rag_simple.py
- routes
  - api_routes.py
tests
- test_api_routes.py

5 files changed

+138

-105

lines changed

`‎src/backend/fastapi_app/api_models.py`

Lines changed: 14 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -1,12 +1,19 @@`
	`1`	`+from enum import Enum`
`1`	`2`	`from typing import Any`
`2`	`3`
`3`	`4`	`from openai.types.chat import ChatCompletionMessageParam`
`4`	`5`	`from pydantic import BaseModel`
`5`	`6`
`6`	`7`
	`8`	`+class AIChatRoles(str, Enum):`
	`9`	`+ USER = "user"`
	`10`	`+ ASSISTANT = "assistant"`
	`11`	`+ SYSTEM = "system"`
	`12`	`+`
	`13`	`+`
`7`	`14`	`class Message(BaseModel):`
`8`	`15`	`content: str`
`9`		`- role: str = "user"`
	`16`	`+ role: AIChatRoles = AIChatRoles.USER`
`10`	`17`
`11`	`18`
`12`	`19`	`class ChatRequest(BaseModel):`
`@@ -32,6 +39,12 @@ class RetrievalResponse(BaseModel):`
`32`	`39`	`session_state: Any \| None = None`
`33`	`40`
`34`	`41`
	`42`	`+class RetrievalResponseDelta(BaseModel):`
	`43`	`+ delta: Message \| None = None`
	`44`	`+ context: RAGContext \| None = None`
	`45`	`+ session_state: Any \| None = None`
	`46`	`+`
	`47`	`+`
`35`	`48`	`class ItemPublic(BaseModel):`
`36`	`49`	`id: int`
`37`	`50`	`type: str`

`‎src/backend/fastapi_app/rag_advanced.py`

Lines changed: 18 additions & 10 deletions

Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,14 @@`
`5`	`5`	`from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageParam`
`6`	`6`	`from openai_messages_token_helper import build_messages, get_token_limit`
`7`	`7`
`8`		`-from fastapi_app.api_models import Message, RAGContext, RetrievalResponse, ThoughtStep`
	`8`	`+from fastapi_app.api_models import (`
	`9`	`+ AIChatRoles,`
	`10`	`+ Message,`
	`11`	`+ RAGContext,`
	`12`	`+ RetrievalResponse,`
	`13`	`+ RetrievalResponseDelta,`
	`14`	`+ ThoughtStep,`
	`15`	`+)`
`9`	`16`	`from fastapi_app.postgres_models import Item`
`10`	`17`	`from fastapi_app.postgres_searcher import PostgresSearcher`
`11`	`18`	`from fastapi_app.query_rewriter import build_search_function, extract_search_arguments`
`@@ -110,10 +117,10 @@ async def run(`
`110`	`117`	`stream=False,`
`111`	`118`	`)`
`112`	`119`
`113`		`- first_choice_message = chat_completion_response.choices[0].message`
`114`		`-`
`115`	`120`	`return RetrievalResponse(`
`116`		`- message=Message(content=str(first_choice_message.content), role=first_choice_message.role),`
	`121`	`+ message=Message(`
	`122`	`+ content=str(chat_completion_response.choices[0].message.content), role=AIChatRoles.ASSISTANT`
	`123`	`+ ),`
`117`	`124`	`context=RAGContext(`
`118`	`125`	`data_points={item.id: item.to_dict() for item in results},`
`119`	`126`	`thoughts=[`
`@@ -157,7 +164,7 @@ async def run_stream(`
`157`	`164`	`self,`
`158`	`165`	`messages: list[ChatCompletionMessageParam],`
`159`	`166`	`overrides: dict[str, Any] = {},`
`160`		`- ) -> AsyncGenerator[RetrievalResponse\|Message, None]:`
	`167`	`+ ) -> AsyncGenerator[RetrievalResponseDelta, None]:`
`161`	`168`	`chat_params = self.get_params(messages, overrides)`
`162`	`169`
`163`	`170`	`# Generate an optimized keyword search query based on the chat history and the last question`
`@@ -188,8 +195,7 @@ async def run_stream(`
`188`	`195`	`# The connection closes when it returns back to the context manger in the dependencies`
`189`	`196`	`await self.searcher.db_session.close()`
`190`	`197`
`191`		`- yield RetrievalResponse(`
`192`		`- message=Message(content="", role="assistant"),`
	`198`	`+ yield RetrievalResponseDelta(`
`193`	`199`	`context=RAGContext(`
`194`	`200`	`data_points={item.id: item.to_dict() for item in results},`
`195`	`201`	`thoughts=[`
`@@ -230,7 +236,9 @@ async def run_stream(`
`230`	`236`	`)`
`231`	`237`
`232`	`238`	`async for response_chunk in chat_completion_async_stream:`
`233`		`- # first response has empty choices`
`234`		`- if response_chunk.choices:`
`235`		`- yield Message(content=str(response_chunk.choices[0].delta.content), role="assistant")`
	`239`	`+ # first response has empty choices and last response has empty content`
	`240`	`+ if response_chunk.choices and response_chunk.choices[0].delta.content:`
	`241`	`+ yield RetrievalResponseDelta(`
	`242`	`+ delta=Message(content=str(response_chunk.choices[0].delta.content), role=AIChatRoles.ASSISTANT)`
	`243`	`+ )`
`236`	`244`	`return`

`‎src/backend/fastapi_app/rag_simple.py`

Lines changed: 19 additions & 11 deletions

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,14 @@`
`8`	`8`	`from openai_messages_token_helper import build_messages, get_token_limit`
`9`	`9`	`from pydantic import BaseModel`
`10`	`10`
`11`		`-from fastapi_app.api_models import Message, RAGContext, RetrievalResponse, ThoughtStep`
	`11`	`+from fastapi_app.api_models import (`
	`12`	`+ AIChatRoles,`
	`13`	`+ Message,`
	`14`	`+ RAGContext,`
	`15`	`+ RetrievalResponse,`
	`16`	`+ RetrievalResponseDelta,`
	`17`	`+ ThoughtStep,`
	`18`	`+)`
`12`	`19`	`from fastapi_app.postgres_models import Item`
`13`	`20`	`from fastapi_app.postgres_searcher import PostgresSearcher`
`14`	`21`
`@@ -76,7 +83,7 @@ async def run_stream(`
`76`	`83`	`self,`
`77`	`84`	`messages: list[ChatCompletionMessageParam],`
`78`	`85`	`overrides: dict[str, Any] = {},`
`79`		`- ) -> AsyncGenerator[RetrievalResponse\|Message, None]:`
	`86`	`+ ) -> AsyncGenerator[RetrievalResponseDelta, None]:`
`80`	`87`	`raise NotImplementedError`
`81`	`88`	`if False:`
`82`	`89`	`yield 0`
`@@ -145,10 +152,10 @@ async def run(`
`145`	`152`	`stream=False,`
`146`	`153`	`)`
`147`	`154`
`148`		`- first_choice_message = chat_completion_response.choices[0].message`
`149`		`-`
`150`	`155`	`return RetrievalResponse(`
`151`		`- message=Message(content=str(first_choice_message.content), role=first_choice_message.role),`
	`156`	`+ message=Message(`
	`157`	`+ content=str(chat_completion_response.choices[0].message.content), role=AIChatRoles.ASSISTANT`
	`158`	`+ ),`
`152`	`159`	`context=RAGContext(`
`153`	`160`	`data_points={item.id: item.to_dict() for item in results},`
`154`	`161`	`thoughts=[`
`@@ -182,7 +189,7 @@ async def run_stream(`
`182`	`189`	`self,`
`183`	`190`	`messages: list[ChatCompletionMessageParam],`
`184`	`191`	`overrides: dict[str, Any] = {},`
`185`		`- ) -> AsyncGenerator[RetrievalResponse\|Message, None]:`
	`192`	`+ ) -> AsyncGenerator[RetrievalResponseDelta, None]:`
`186`	`193`	`chat_params = self.get_params(messages, overrides)`
`187`	`194`
`188`	`195`	`# Retrieve relevant items from the database`
`@@ -206,8 +213,7 @@ async def run_stream(`
`206`	`213`	`# The connection closes when it returns back to the context manger in the dependencies`
`207`	`214`	`await self.searcher.db_session.close()`
`208`	`215`
`209`		`- yield RetrievalResponse(`
`210`		`- message=Message(content="", role="assistant"),`
	`216`	`+ yield RetrievalResponseDelta(`
`211`	`217`	`context=RAGContext(`
`212`	`218`	`data_points={item.id: item.to_dict() for item in results},`
`213`	`219`	`thoughts=[`
`@@ -237,7 +243,9 @@ async def run_stream(`
`237`	`243`	`),`
`238`	`244`	`)`
`239`	`245`	`async for response_chunk in chat_completion_async_stream:`
`240`		`- # first response has empty choices`
`241`		`- if response_chunk.choices:`
`242`		`- yield Message(content=str(response_chunk.choices[0].delta.content), role="assistant")`
	`246`	`+ # first response has empty choices and last response has empty content`
	`247`	`+ if response_chunk.choices and response_chunk.choices[0].delta.content:`
	`248`	`+ yield RetrievalResponseDelta(`
	`249`	`+ delta=Message(content=str(response_chunk.choices[0].delta.content), role=AIChatRoles.ASSISTANT)`
	`250`	`+ )`
`243`	`251`	`return`

`‎src/backend/fastapi_app/routes/api_routes.py`

Lines changed: 9 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,13 @@`
`7`	`7`	`from fastapi.responses import StreamingResponse`
`8`	`8`	`from sqlalchemy import select`
`9`	`9`
`10`		`-from fastapi_app.api_models import ChatRequest, ItemPublic, ItemWithDistance, Message, RetrievalResponse`
	`10`	`+from fastapi_app.api_models import (`
	`11`	`+ ChatRequest,`
	`12`	`+ ItemPublic,`
	`13`	`+ ItemWithDistance,`
	`14`	`+ RetrievalResponse,`
	`15`	`+ RetrievalResponseDelta,`
	`16`	`+)`
`11`	`17`	`from fastapi_app.dependencies import ChatClient, CommonDeps, DBSession, EmbeddingsClient`
`12`	`18`	`from fastapi_app.postgres_models import Item`
`13`	`19`	`from fastapi_app.postgres_searcher import PostgresSearcher`
`@@ -17,13 +23,13 @@`
`17`	`23`	`router = fastapi.APIRouter()`
`18`	`24`
`19`	`25`
`20`		`-async def format_as_ndjson(r: AsyncGenerator[RetrievalResponse\|Message, None]) -> AsyncGenerator[str, None]:`
	`26`	`+async def format_as_ndjson(r: AsyncGenerator[RetrievalResponseDelta, None]) -> AsyncGenerator[str, None]:`
`21`	`27`	`"""`
`22`	`28`	`Format the response as NDJSON`
`23`	`29`	`"""`
`24`	`30`	`try:`
`25`	`31`	`async for event in r:`
`26`		`- yield json.dumps(event.model_dump(), ensure_ascii=False) + "\n"`
	`32`	`+ yield event.model_dump_json() + "\n"`
`27`	`33`	`except Exception as error:`
`28`	`34`	`logging.exception("Exception while generating response stream: %s", error)`
`29`	`35`	`yield json.dumps({"error": str(error)}, ensure_ascii=False) + "\n"`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 8c052dc

File tree

5 files changed

5 files changed

`‎src/backend/fastapi_app/api_models.py`

`‎src/backend/fastapi_app/rag_advanced.py`

`‎src/backend/fastapi_app/rag_simple.py`

`‎src/backend/fastapi_app/routes/api_routes.py`

0 commit comments