Ajout de la partie IA

This commit is contained in:
2026-04-20 14:52:20 +02:00
parent 94bbf8beff
commit 5b133aa2fe
50 changed files with 3236 additions and 11 deletions

View File

@@ -0,0 +1,110 @@
"""Adapter Ollama — implémentation concrète des ports LLMProvider et LLMChatProvider.
Isole le reste de l'application des spécificités du protocole Ollama
(URL /api/generate, /api/chat, payload, parsing). Pour swap vers OpenAI
demain, on écrit un nouvel adapter sans toucher au reste du code.
"""
import json
from typing import AsyncIterator
import httpx
from app.core.config import Settings
from app.domain.models import ChatMessage
from app.domain.ports import LLMProviderError
class OllamaLLMProvider:
"""Implémentation des ports LLM — appelle un serveur Ollama via HTTP.
Satisfait implicitement (duck typing) à la fois `LLMProvider` (endpoint
/api/generate, appel unique) et `LLMChatProvider` (endpoint /api/chat,
streaming token par token).
"""
def __init__(self, settings: Settings) -> None:
self._base_url = settings.ollama_base_url
self._model = settings.llm_model
self._timeout = settings.llm_timeout_seconds
async def generate(
self,
prompt: str,
*,
output_format: str | None = None,
temperature: float | None = None,
) -> str:
url = f"{self._base_url}/api/generate"
payload: dict[str, object] = {
"model": self._model,
"prompt": prompt,
"stream": False,
}
if output_format is not None:
payload["format"] = output_format
if temperature is not None:
# Ollama attend les hyperparamètres sous la clé "options".
payload["options"] = {"temperature": temperature}
async with httpx.AsyncClient(timeout=self._timeout) as client:
try:
response = await client.post(url, json=payload)
response.raise_for_status()
except httpx.HTTPError as exc:
raise LLMProviderError(
f"Erreur lors de l'appel à Ollama : {exc}"
) from exc
return response.json()["response"]
async def stream_chat(
self,
messages: list[ChatMessage],
*,
system_prompt: str | None = None,
temperature: float | None = None,
) -> AsyncIterator[str]:
"""Streame depuis Ollama /api/chat. Parse le NDJSON ligne par ligne.
Ollama renvoie un JSON par ligne au fil de la génération :
- étapes intermédiaires : `{"message": {"content": "token"}, "done": false}`
- étape finale : `{"done": true, ...}`
On yield chaque token non-vide au consommateur, qui se charge du
formatage SSE (c'est la responsabilité du controller HTTP, pas
de l'adapter LLM).
"""
url = f"{self._base_url}/api/chat"
payload_messages: list[dict[str, str]] = []
if system_prompt:
payload_messages.append({"role": "system", "content": system_prompt})
payload_messages.extend(
{"role": m.role, "content": m.content} for m in messages
)
payload: dict[str, object] = {
"model": self._model,
"messages": payload_messages,
"stream": True,
}
if temperature is not None:
payload["options"] = {"temperature": temperature}
async with httpx.AsyncClient(timeout=self._timeout) as client:
try:
async with client.stream("POST", url, json=payload) as response:
response.raise_for_status()
async for line in response.aiter_lines():
if not line.strip():
continue
chunk = json.loads(line)
if chunk.get("done"):
break
token = chunk.get("message", {}).get("content", "")
if token:
yield token
except httpx.HTTPError as exc:
raise LLMProviderError(
f"Erreur lors du streaming Ollama : {exc}"
) from exc