Mise en place docker + mise en place des settings (config ollama / 1min.ai)
This commit is contained in:
6
brain/.dockerignore
Normal file
6
brain/.dockerignore
Normal file
@@ -0,0 +1,6 @@
|
||||
data/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
.env
|
||||
.venv/
|
||||
venv/
|
||||
16
brain/Dockerfile
Normal file
16
brain/Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
||||
FROM python:3.12-slim
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY app ./app
|
||||
|
||||
RUN mkdir -p /app/data
|
||||
VOLUME ["/app/data"]
|
||||
|
||||
EXPOSE 8000
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
@@ -3,11 +3,18 @@
|
||||
Équivalent Python du `application.properties` Spring Boot, avec validation
|
||||
Pydantic : une variable manquante/invalide = crash au démarrage, pas une
|
||||
NullPointerException surprise à la 3ème requête.
|
||||
|
||||
Depuis l'ecran Parametres (UI) : certains champs sont surchargeables a chaud
|
||||
via `settings_store` (fichier JSON). A chaque Depends(get_settings), on relit
|
||||
.env + overrides fusionnes. Pas de cache : le cout d'un read JSON local est
|
||||
negligeable face a un appel LLM.
|
||||
"""
|
||||
from functools import lru_cache
|
||||
from typing import Literal
|
||||
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
from app.core.settings_store import load_overrides
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Settings chargés depuis .env ou variables d'environnement."""
|
||||
@@ -18,6 +25,9 @@ class Settings(BaseSettings):
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
# Provider LLM actif. "ollama" = local ; "onemin" = 1min.ai (etage 2).
|
||||
llm_provider: Literal["ollama", "onemin"] = "ollama"
|
||||
|
||||
ollama_base_url: str = "http://localhost:11434"
|
||||
llm_model: str = "gemma4:26b"
|
||||
llm_timeout_seconds: int = 120
|
||||
@@ -29,8 +39,16 @@ class Settings(BaseSettings):
|
||||
# LLM_NUM_CTX dans .env si besoin (ex: VRAM limitée → 8192).
|
||||
llm_num_ctx: int = 16384
|
||||
|
||||
# 1min.ai (etage 2) — la cle et le modele sont stockes via settings_store
|
||||
# (modifiables depuis l'UI). Les defauts ici sont juste des placeholders.
|
||||
onemin_api_key: str = ""
|
||||
onemin_model: str = "gpt-4o-mini"
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_settings() -> Settings:
|
||||
"""Singleton via cache — FastAPI l'injecte avec Depends() dans les routes."""
|
||||
return Settings()
|
||||
"""Fabrique des Settings merges (.env -> overrides runtime).
|
||||
|
||||
Relu a chaque requete HTTP (via Depends). Permet a l'UI de changer
|
||||
le modele / provider sans redemarrer le Brain.
|
||||
"""
|
||||
return Settings(**load_overrides())
|
||||
|
||||
41
brain/app/core/settings_store.py
Normal file
41
brain/app/core/settings_store.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""Overrides runtime persistés sur disque pour les Settings.
|
||||
|
||||
Les Settings par defaut viennent de .env (12-factor). L'utilisateur peut
|
||||
surcharger certains champs depuis l'UI (ex: modele Ollama choisi) — ces
|
||||
overrides sont stockes dans un fichier JSON local, relus a chaque requete.
|
||||
|
||||
Thread-safe via un lock simple : suffisant pour un deploiement mono-process
|
||||
(usage local). Si un jour on passe en multi-worker, migrer vers SQLite.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
_LOCK = threading.Lock()
|
||||
_OVERRIDES_PATH = Path("data/settings.json")
|
||||
|
||||
|
||||
def load_overrides() -> dict[str, Any]:
|
||||
"""Retourne le dict d'overrides, ou {} si le fichier n'existe pas / est corrompu."""
|
||||
if not _OVERRIDES_PATH.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(_OVERRIDES_PATH.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
|
||||
def save_overrides(patch: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Fusionne `patch` dans les overrides existants et persiste. Retourne l'etat final."""
|
||||
with _LOCK:
|
||||
current = load_overrides()
|
||||
current.update(patch)
|
||||
_OVERRIDES_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
_OVERRIDES_PATH.write_text(
|
||||
json.dumps(current, indent=2, ensure_ascii=False),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return current
|
||||
174
brain/app/infrastructure/onemin_adapter.py
Normal file
174
brain/app/infrastructure/onemin_adapter.py
Normal file
@@ -0,0 +1,174 @@
|
||||
"""Adapter 1min.ai — implementation alternative des ports LLMProvider / LLMChatProvider.
|
||||
|
||||
API 1min.ai (cf. https://docs.1min.ai/docs/api/chat-with-ai-api) :
|
||||
- POST https://api.1min.ai/api/chat-with-ai (one-shot)
|
||||
- POST https://api.1min.ai/api/chat-with-ai?isStreaming=true (SSE)
|
||||
- Auth : header "API-KEY: <cle>"
|
||||
- Body : {"type": "UNIFY_CHAT_WITH_AI", "model": "...",
|
||||
"promptObject": {"prompt": "..."}}
|
||||
|
||||
Le port LoreMind expose une API "messages[]", mais 1min.ai attend un prompt
|
||||
unique. On aplatit donc l'historique + system prompt en un seul bloc texte,
|
||||
avec des marqueurs de role lisibles pour le modele.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import AsyncIterator
|
||||
|
||||
import httpx
|
||||
|
||||
from app.core.config import Settings
|
||||
from app.domain.models import ChatMessage
|
||||
from app.domain.ports import LLMProviderError
|
||||
|
||||
_API_BASE = "https://api.1min.ai/api/chat-with-ai"
|
||||
_PAYLOAD_TYPE = "UNIFY_CHAT_WITH_AI"
|
||||
|
||||
|
||||
class OneMinAiLLMProvider:
|
||||
"""Adapter 1min.ai — satisfait LLMProvider et LLMChatProvider par duck typing."""
|
||||
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
if not settings.onemin_api_key:
|
||||
raise LLMProviderError(
|
||||
"Cle API 1min.ai manquante. Configure-la depuis l'ecran Parametres."
|
||||
)
|
||||
self._api_key = settings.onemin_api_key
|
||||
self._model = settings.onemin_model
|
||||
self._timeout = settings.llm_timeout_seconds
|
||||
|
||||
def _headers(self) -> dict[str, str]:
|
||||
return {"API-KEY": self._api_key, "Content-Type": "application/json"}
|
||||
|
||||
def _payload(self, prompt: str) -> dict[str, object]:
|
||||
return {
|
||||
"type": _PAYLOAD_TYPE,
|
||||
"model": self._model,
|
||||
"promptObject": {"prompt": prompt},
|
||||
}
|
||||
|
||||
async def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
*,
|
||||
output_format: str | None = None, # 1min.ai ne supporte pas format=json
|
||||
temperature: float | None = None, # idem, pas d'hyperparam expose ici
|
||||
) -> str:
|
||||
"""Appel one-shot : retourne la reponse complete sous forme de string."""
|
||||
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
||||
try:
|
||||
response = await client.post(
|
||||
_API_BASE, headers=self._headers(), json=self._payload(prompt)
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
except httpx.HTTPError as exc:
|
||||
raise LLMProviderError(f"Erreur 1min.ai : {exc}") from exc
|
||||
|
||||
return self._extract_result(data)
|
||||
|
||||
async def stream_chat(
|
||||
self,
|
||||
messages: list[ChatMessage],
|
||||
*,
|
||||
system_prompt: str | None = None,
|
||||
temperature: float | None = None,
|
||||
) -> AsyncIterator[str]:
|
||||
"""Streame via SSE.
|
||||
|
||||
1min.ai expose deux evenements utiles :
|
||||
- `event: content` → `data: {"content": "..."}`
|
||||
- `event: done` → fin du stream
|
||||
- `event: error` → erreur serveur
|
||||
On yield le champ `content` au fil de l'arrivee.
|
||||
"""
|
||||
prompt = self._flatten_messages(messages, system_prompt)
|
||||
url = f"{_API_BASE}?isStreaming=true"
|
||||
|
||||
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
||||
try:
|
||||
async with client.stream(
|
||||
"POST", url, headers=self._headers(), json=self._payload(prompt)
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
async for token in self._parse_sse(response):
|
||||
yield token
|
||||
except httpx.HTTPError as exc:
|
||||
raise LLMProviderError(
|
||||
f"Erreur lors du streaming 1min.ai : {exc}"
|
||||
) from exc
|
||||
|
||||
# --- Helpers ------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
async def _parse_sse(response: httpx.Response) -> AsyncIterator[str]:
|
||||
"""Decoupe le flux SSE ligne par ligne et yield les chunks 'content'."""
|
||||
current_event: str | None = None
|
||||
current_data = ""
|
||||
async for line in response.aiter_lines():
|
||||
if line == "":
|
||||
# Fin d'un evenement SSE : dispatch
|
||||
if current_event == "done":
|
||||
return
|
||||
if current_event == "error":
|
||||
raise LLMProviderError(f"1min.ai a signale une erreur : {current_data}")
|
||||
if current_data and current_event in (None, "content", "message"):
|
||||
token = OneMinAiLLMProvider._extract_content_chunk(current_data)
|
||||
if token:
|
||||
yield token
|
||||
current_event = None
|
||||
current_data = ""
|
||||
continue
|
||||
if line.startswith("event:"):
|
||||
current_event = line[6:].strip()
|
||||
elif line.startswith("data:"):
|
||||
chunk = line[5:].lstrip()
|
||||
current_data = f"{current_data}\n{chunk}" if current_data else chunk
|
||||
|
||||
@staticmethod
|
||||
def _extract_content_chunk(data: str) -> str:
|
||||
"""Extrait le champ `content` d'un data JSON, avec tolerance si format brut."""
|
||||
try:
|
||||
obj = json.loads(data)
|
||||
except json.JSONDecodeError:
|
||||
return data # filet de securite si le serveur envoie du texte brut
|
||||
if isinstance(obj, dict):
|
||||
return obj.get("content") or obj.get("token") or ""
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _extract_result(payload: dict) -> str:
|
||||
"""Extrait le texte final d'une reponse non-streamee.
|
||||
|
||||
Schema attendu : `aiRecord.aiRecordDetail.resultObject` (list[str]).
|
||||
On concatene par securite (le serveur renvoie habituellement un seul element).
|
||||
"""
|
||||
record = payload.get("aiRecord") or {}
|
||||
detail = record.get("aiRecordDetail") or {}
|
||||
result = detail.get("resultObject") or []
|
||||
if isinstance(result, list):
|
||||
return "".join(str(x) for x in result)
|
||||
if isinstance(result, str):
|
||||
return result
|
||||
raise LLMProviderError("Reponse 1min.ai inattendue : resultObject absent.")
|
||||
|
||||
@staticmethod
|
||||
def _flatten_messages(
|
||||
messages: list[ChatMessage], system_prompt: str | None
|
||||
) -> str:
|
||||
"""Transforme [system_prompt, history] en un unique prompt textuel.
|
||||
|
||||
1min.ai n'accepte qu'un champ `prompt` : on serialise la conversation
|
||||
avec des marqueurs explicites pour que le modele comprenne les tours.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
if system_prompt:
|
||||
parts.append(f"[SYSTEM]\n{system_prompt}")
|
||||
if messages:
|
||||
history = "\n\n".join(
|
||||
f"[{m.role.upper()}]\n{m.content}" for m in messages
|
||||
)
|
||||
parts.append(history)
|
||||
parts.append("[ASSISTANT]") # invite le modele a continuer
|
||||
return "\n\n".join(parts)
|
||||
@@ -5,8 +5,9 @@ au domaine via injection de dépendance (ports + use cases), et transforme les
|
||||
erreurs du domaine en réponses HTTP. Aucune connaissance d'Ollama ici.
|
||||
"""
|
||||
import json
|
||||
from typing import Annotated, AsyncIterator
|
||||
from typing import Annotated, AsyncIterator, Literal
|
||||
|
||||
import httpx
|
||||
from fastapi import Depends, FastAPI, HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel, Field
|
||||
@@ -14,6 +15,7 @@ from pydantic import BaseModel, Field
|
||||
from app.application.chat import ChatUseCase
|
||||
from app.application.generate_page import GeneratePageUseCase
|
||||
from app.core.config import Settings, get_settings
|
||||
from app.core.settings_store import save_overrides
|
||||
from app.domain.models import (
|
||||
ArcSummary,
|
||||
CampaignStructuralContext,
|
||||
@@ -29,6 +31,7 @@ from app.domain.models import (
|
||||
)
|
||||
from app.domain.ports import LLMProvider, LLMProviderError
|
||||
from app.infrastructure.ollama_adapter import OllamaLLMProvider
|
||||
from app.infrastructure.onemin_adapter import OneMinAiLLMProvider
|
||||
|
||||
app = FastAPI(
|
||||
title="LoreMind Brain",
|
||||
@@ -189,10 +192,17 @@ def get_llm_provider(
|
||||
"""Factory d'adapter — point d'inversion de dépendance.
|
||||
|
||||
C'est ici (et uniquement ici) qu'on choisit QUEL adapter concret
|
||||
incarne le port. Pour swap vers un autre fournisseur, on change
|
||||
cette ligne et rien d'autre.
|
||||
incarne le port, en fonction du champ `llm_provider` des Settings
|
||||
(modifiable a chaud depuis l'ecran Parametres de l'UI).
|
||||
"""
|
||||
return OllamaLLMProvider(settings)
|
||||
try:
|
||||
if settings.llm_provider == "onemin":
|
||||
return OneMinAiLLMProvider(settings)
|
||||
return OllamaLLMProvider(settings)
|
||||
except LLMProviderError as exc:
|
||||
# Ex : cle 1min.ai manquante. On renvoie du 400 plutot que du 500
|
||||
# pour que le frontend puisse afficher un message actionnable.
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
|
||||
|
||||
def get_generate_page_use_case(
|
||||
@@ -392,6 +402,161 @@ def _to_campaign_context(dto: CampaignContextDTO | None) -> CampaignStructuralCo
|
||||
)
|
||||
|
||||
|
||||
# --- Settings (parametrage runtime depuis l'UI) ------------------------------
|
||||
|
||||
|
||||
class SettingsDTO(BaseModel):
|
||||
"""Vue serialisable des settings modifiables depuis l'UI.
|
||||
|
||||
Expose uniquement les champs que l'utilisateur peut changer a chaud.
|
||||
Les secrets (onemin_api_key) sont masques en lecture.
|
||||
"""
|
||||
|
||||
llm_provider: Literal["ollama", "onemin"]
|
||||
ollama_base_url: str
|
||||
llm_model: str
|
||||
onemin_model: str
|
||||
# True si une cle 1min.ai est deja configuree — pas de leak de la cle elle-meme.
|
||||
onemin_api_key_set: bool
|
||||
|
||||
|
||||
class SettingsUpdateDTO(BaseModel):
|
||||
"""Patch partiel des settings. Tous les champs sont optionnels."""
|
||||
|
||||
llm_provider: Literal["ollama", "onemin"] | None = None
|
||||
ollama_base_url: str | None = None
|
||||
llm_model: str | None = None
|
||||
onemin_model: str | None = None
|
||||
# Chaine vide => on efface la cle. None => pas de changement.
|
||||
onemin_api_key: str | None = None
|
||||
|
||||
|
||||
def _to_settings_dto(s: Settings) -> SettingsDTO:
|
||||
return SettingsDTO(
|
||||
llm_provider=s.llm_provider,
|
||||
ollama_base_url=s.ollama_base_url,
|
||||
llm_model=s.llm_model,
|
||||
onemin_model=s.onemin_model,
|
||||
onemin_api_key_set=bool(s.onemin_api_key),
|
||||
)
|
||||
|
||||
|
||||
@app.get("/settings", response_model=SettingsDTO)
|
||||
def read_settings(settings: Annotated[Settings, Depends(get_settings)]) -> SettingsDTO:
|
||||
"""Retourne la config courante (secrets masques)."""
|
||||
return _to_settings_dto(settings)
|
||||
|
||||
|
||||
@app.put("/settings", response_model=SettingsDTO)
|
||||
def update_settings(patch: SettingsUpdateDTO) -> SettingsDTO:
|
||||
"""Applique un patch partiel aux settings et persiste les overrides.
|
||||
|
||||
Toute requete HTTP suivante verra les nouvelles valeurs (pas de cache).
|
||||
"""
|
||||
overrides = {k: v for k, v in patch.model_dump().items() if v is not None}
|
||||
if overrides:
|
||||
save_overrides(overrides)
|
||||
# Relit .env + overrides fusionnes pour confirmation.
|
||||
return _to_settings_dto(get_settings())
|
||||
|
||||
|
||||
@app.get("/models/ollama")
|
||||
async def list_ollama_models(
|
||||
settings: Annotated[Settings, Depends(get_settings)],
|
||||
) -> dict[str, list[str]]:
|
||||
"""Liste les modeles disponibles sur le serveur Ollama configure.
|
||||
|
||||
Retourne une liste vide si Ollama est injoignable — l'UI affichera un
|
||||
message plutot qu'une 500.
|
||||
"""
|
||||
url = f"{settings.ollama_base_url}/api/tags"
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5) as client:
|
||||
response = await client.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
except httpx.HTTPError:
|
||||
return {"models": []}
|
||||
models = [m.get("name", "") for m in data.get("models", []) if m.get("name")]
|
||||
return {"models": sorted(models)}
|
||||
|
||||
|
||||
@app.get("/models/onemin")
|
||||
def list_onemin_models() -> dict[str, list[dict[str, object]]]:
|
||||
"""Catalogue statique des modeles 1min.ai, groupes par fournisseur.
|
||||
|
||||
Liste construite par probing direct de l'endpoint chat-with-ai avec
|
||||
une vraie cle API (avril 2026) : chaque ID renvoie 200, les IDs
|
||||
absents renvoient 400 UNSUPPORTED_MODEL.
|
||||
|
||||
Nota : les IDs Anthropic utilisent la nomenclature propre a 1min.ai
|
||||
(`claude-<family>-<version>`), pas la convention officielle Anthropic.
|
||||
"""
|
||||
return {
|
||||
"groups": [
|
||||
{
|
||||
"provider": "Anthropic",
|
||||
"models": ["claude-opus-4-6", "claude-sonnet-4-6"],
|
||||
},
|
||||
{
|
||||
"provider": "OpenAI",
|
||||
"models": [
|
||||
"gpt-5",
|
||||
"gpt-5-mini",
|
||||
"gpt-5-nano",
|
||||
"gpt-4.1",
|
||||
"gpt-4.1-mini",
|
||||
"gpt-4.1-nano",
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
"gpt-4-turbo",
|
||||
"gpt-3.5-turbo",
|
||||
"o3",
|
||||
"o3-pro",
|
||||
"o3-mini",
|
||||
"o4-mini",
|
||||
],
|
||||
},
|
||||
{
|
||||
"provider": "Google",
|
||||
"models": ["gemini-2.5-pro", "gemini-2.5-flash"],
|
||||
},
|
||||
{
|
||||
"provider": "Mistral",
|
||||
"models": [
|
||||
"mistral-large-latest",
|
||||
"mistral-medium-latest",
|
||||
"mistral-small-latest",
|
||||
"open-mistral-nemo",
|
||||
],
|
||||
},
|
||||
{
|
||||
"provider": "DeepSeek",
|
||||
"models": ["deepseek-chat", "deepseek-reasoner"],
|
||||
},
|
||||
{
|
||||
"provider": "xAI",
|
||||
"models": ["grok-3", "grok-3-mini"],
|
||||
},
|
||||
{
|
||||
"provider": "Meta",
|
||||
"models": [
|
||||
"meta/meta-llama-3.1-405b-instruct",
|
||||
"meta/meta-llama-3-70b-instruct",
|
||||
],
|
||||
},
|
||||
{
|
||||
"provider": "Alibaba",
|
||||
"models": ["qwen-plus", "qwen3-max"],
|
||||
},
|
||||
{
|
||||
"provider": "Perplexity",
|
||||
"models": ["sonar", "sonar-pro"],
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def _to_narrative_entity(dto: NarrativeEntityDTO | None) -> NarrativeEntityContext | None:
|
||||
if dto is None:
|
||||
return None
|
||||
|
||||
7
brain/data/settings.json
Normal file
7
brain/data/settings.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"llm_provider": "onemin",
|
||||
"ollama_base_url": "http://localhost:11434",
|
||||
"llm_model": "gemma4:26b",
|
||||
"onemin_model": "mistral-large-latest",
|
||||
"onemin_api_key": "9f8eb3da313eef5e95887889b7d10b42bbc1c42b2d157bc3589a8962e5d9dd9e"
|
||||
}
|
||||
Reference in New Issue
Block a user