Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit b71e564

Browse files
Fix cleaned chunks for anthropic in muxing (#1136)
* Fix cleaned chunks for anthropic in muxing * fix linting
1 parent 1055216 commit b71e564

File tree

2 files changed

+42
-4
lines changed

2 files changed

+42
-4
lines changed

src/codegate/muxing/adapter.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,22 +84,28 @@ def provider_format_funcs(self) -> Dict[str, Callable]:
8484
"""
8585
pass
8686

87+
def _clean_chunk(self, chunk: str) -> str:
88+
"""Clean the chunk from the "data:" and any extra characters."""
89+
# Find the first position of 'data:' and add 5 characters to skip 'data:'
90+
start_pos = chunk.find("data:") + 5
91+
cleaned_chunk = chunk[start_pos:].strip()
92+
return cleaned_chunk
93+
8794
def _format_openai(self, chunk: str) -> str:
8895
"""
8996
The chunk is already in OpenAI format. To standarize remove the "data:" prefix.
9097
9198
This function is used by both chat and FIM formatters
9299
"""
93-
cleaned_chunk = chunk.split("data:")[1].strip()
94-
return cleaned_chunk
100+
return self._clean_chunk(chunk)
95101

96102
def _format_antropic(self, chunk: str) -> str:
97103
"""
98104
Format the Anthropic chunk to OpenAI format.
99105
100106
This function is used by both chat and FIM formatters
101107
"""
102-
cleaned_chunk = chunk.split("data:")[1].strip()
108+
cleaned_chunk = self._clean_chunk(chunk)
103109
try:
104110
# Use `strict=False` to allow the JSON payload to contain
105111
# newlines, tabs and other valid characters that might

tests/muxing/test_adapter.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22

33
from codegate.db.models import ProviderType
4-
from codegate.muxing.adapter import BodyAdapter
4+
from codegate.muxing.adapter import BodyAdapter, ChatStreamChunkFormatter
55

66

77
class MockedEndpoint:
@@ -30,3 +30,35 @@ def test_catch_all(provider_type, endpoint_route, expected_route):
3030
model_route = MockedModelRoute(provider_type, endpoint_route)
3131
actual_route = body_adapter._get_provider_formatted_url(model_route)
3232
assert actual_route == expected_route
33+
34+
35+
@pytest.mark.parametrize(
36+
"chunk, expected_cleaned_chunk",
37+
[
38+
(
39+
(
40+
'event: content_block_delta\ndata:{"type": "content_block_delta", "index": 0, '
41+
'"delta": {"type": "text_delta", "text": "\n metadata:\n name: trusty"}}'
42+
),
43+
(
44+
'{"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", '
45+
'"text": "\n metadata:\n name: trusty"}}'
46+
),
47+
),
48+
(
49+
(
50+
"event: content_block_delta\n"
51+
'data:{"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", '
52+
'"text": "v1\nkind: NetworkPolicy\nmetadata:"}}'
53+
),
54+
(
55+
'{"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text"'
56+
': "v1\nkind: NetworkPolicy\nmetadata:"}}'
57+
),
58+
),
59+
],
60+
)
61+
def test_clean_chunk(chunk, expected_cleaned_chunk):
62+
formatter = ChatStreamChunkFormatter()
63+
gotten_chunk = formatter._clean_chunk(chunk)
64+
assert gotten_chunk == expected_cleaned_chunk

0 commit comments

Comments
 (0)