From c050cfd8a08eda7d2d878a45b50537f5d09fc5fd Mon Sep 17 00:00:00 2001 From: Yolanda Robla Date: Wed, 15 Jan 2025 16:24:58 +0100 Subject: [PATCH] fix: correct problems with ollama and aider It has a pair of problems: - always need to check that the model contains some value - only send chunks that contain data, and do not send double newlines as they get considered as a different chunk Closes: #586 --- .../providers/ollama/completion_handler.py | 24 +++++++++++-------- src/codegate/providers/ollama/provider.py | 3 +++ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py index f569d988..bccd4992 100644 --- a/src/codegate/providers/ollama/completion_handler.py +++ b/src/codegate/providers/ollama/completion_handler.py @@ -1,3 +1,4 @@ +import json from typing import AsyncIterator, Optional, Union import structlog @@ -11,18 +12,21 @@ async def ollama_stream_generator( - stream: AsyncIterator[ChatResponse], + stream: AsyncIterator[ChatResponse] ) -> AsyncIterator[str]: """OpenAI-style SSE format""" try: async for chunk in stream: - print(chunk) try: - yield f"{chunk.model_dump_json()}\n\n" + content = chunk.model_dump_json() + if content: + yield f"{chunk.model_dump_json()}\n" except Exception as e: - yield f"{str(e)}\n\n" + if str(e): + yield f"{str(e)}\n" except Exception as e: - yield f"{str(e)}\n\n" + if str(e): + yield f"{str(e)}\n" class OllamaShim(BaseCompletionHandler): @@ -39,17 +43,17 @@ async def execute_completion( ) -> Union[ChatResponse, GenerateResponse]: """Stream response directly from Ollama API.""" if is_fim_request: - prompt = request["messages"][0]["content"] + prompt = request["messages"][0].get("content", "") response = await self.client.generate( - model=request["model"], prompt=prompt, stream=stream, options=request["options"] + model=request["model"], prompt=prompt, stream=stream, options=request["options"] # type: ignore ) else: response = await self.client.chat( model=request["model"], messages=request["messages"], - stream=stream, - options=request["options"], - ) + stream=stream, # type: ignore + options=request["options"], # type: ignore + ) # type: ignore return response def _create_streaming_response(self, stream: AsyncIterator[ChatResponse]) -> StreamingResponse: diff --git a/src/codegate/providers/ollama/provider.py b/src/codegate/providers/ollama/provider.py index 8307f7e0..5284f593 100644 --- a/src/codegate/providers/ollama/provider.py +++ b/src/codegate/providers/ollama/provider.py @@ -58,6 +58,9 @@ async def show_model(request: Request): https://github.com/ollama/ollama/blob/main/docs/api.md#show-model-information """ body = await request.body() + body_json = json.loads(body) + if "name" not in body_json: + raise HTTPException(status_code=400, detail="model is required in the request body") async with httpx.AsyncClient() as client: response = await client.post( f"{self.base_url}/api/show",