diff --git a/brain/app/main.py b/brain/app/main.py
index 10e88b0..2558d0f 100644
--- a/brain/app/main.py
+++ b/brain/app/main.py
@@ -689,6 +689,76 @@ async def get_ollama_model_info(
return OllamaModelInfoDTO(context_length=0)
+@app.post("/models/ollama/pull")
+async def pull_ollama_model(
+ body: dict[str, str],
+ settings: Annotated[Settings, Depends(get_settings)],
+) -> StreamingResponse:
+ """Telecharge un modele depuis Ollama et streame la progression.
+
+ Proxifie l'endpoint `/api/pull` d'Ollama qui renvoie du JSON ligne par
+ ligne (NDJSON) avec le statut de chaque etape : manifest, layers,
+ digest, success. On reemet ce flux tel quel au client (le front
+ parsera les lignes et affichera une barre de progression).
+
+ Le timeout est intentionnellement tres long (60 min) car certains
+ modeles font 30+ Go.
+ """
+ name = (body.get("name") or "").strip()
+ if not name:
+ raise HTTPException(status_code=400, detail="name requis")
+ url = f"{settings.ollama_base_url}/api/pull"
+
+ async def stream() -> AsyncIterator[bytes]:
+ # On utilise un timeout long pour la lecture (60 min) mais court pour
+ # la connexion (10s) — si Ollama n'est pas joignable, on echoue vite.
+ timeout = httpx.Timeout(connect=10, read=3600, write=10, pool=10)
+ try:
+ async with httpx.AsyncClient(timeout=timeout) as client:
+ async with client.stream("POST", url, json={"model": name, "stream": True}) as r:
+ if r.status_code != 200:
+ # Ollama renvoie un message JSON d'erreur. On le passe
+ # tel quel au client en preservant le code HTTP.
+ body_text = await r.aread()
+ yield body_text
+ return
+ async for chunk in r.aiter_bytes():
+ yield chunk
+ except httpx.HTTPError as e:
+ # Erreur reseau : on emet une ligne JSON d'erreur compatible
+ # avec le format NDJSON d'Ollama.
+ err = json.dumps({"error": f"Connexion a Ollama impossible : {e}"}) + "\n"
+ yield err.encode("utf-8")
+
+ # application/x-ndjson : un objet JSON par ligne, pas de wrapping SSE.
+ # C'est le format natif d'Ollama, le front le parsera ligne par ligne.
+ return StreamingResponse(stream(), media_type="application/x-ndjson")
+
+
+@app.delete("/models/ollama/{name:path}")
+async def delete_ollama_model(
+ name: str,
+ settings: Annotated[Settings, Depends(get_settings)],
+) -> dict[str, str]:
+ """Supprime un modele du serveur Ollama.
+
+ Le `:path` dans le pattern autorise les `:` du nom (ex: `gemma4:e4b`)
+ sans avoir besoin de URL-encoder cote client.
+ """
+ if not name.strip():
+ raise HTTPException(status_code=400, detail="name requis")
+ url = f"{settings.ollama_base_url}/api/delete"
+ try:
+ async with httpx.AsyncClient(timeout=10) as client:
+ response = await client.request("DELETE", url, json={"model": name})
+ if response.status_code == 404:
+ raise HTTPException(status_code=404, detail=f"Modele '{name}' introuvable")
+ response.raise_for_status()
+ except httpx.HTTPError as e:
+ raise HTTPException(status_code=502, detail=f"Ollama injoignable : {e}")
+ return {"status": "deleted", "name": name}
+
+
@app.get("/models/onemin")
def list_onemin_models() -> dict[str, list[dict[str, object]]]:
"""Catalogue statique des modeles 1min.ai, groupes par fournisseur.
diff --git a/core/pom.xml b/core/pom.xml
index 3ed2dd1..b599cf4 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -14,7 +14,7 @@
com.loremind
loremind-core
- 0.6.6
+ 0.6.8
LoreMind Core
Backend Core - Architecture Hexagonale
diff --git a/core/src/main/java/com/loremind/infrastructure/web/controller/SettingsController.java b/core/src/main/java/com/loremind/infrastructure/web/controller/SettingsController.java
index 501b02a..c0ecfe0 100644
--- a/core/src/main/java/com/loremind/infrastructure/web/controller/SettingsController.java
+++ b/core/src/main/java/com/loremind/infrastructure/web/controller/SettingsController.java
@@ -7,7 +7,9 @@ import org.springframework.http.HttpMethod;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.DeleteMapping;
import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.PutMapping;
import org.springframework.web.bind.annotation.RequestBody;
@@ -15,7 +17,17 @@ import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.client.RestTemplate;
import org.springframework.web.server.ResponseStatusException;
+import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
import java.util.Map;
/**
@@ -66,11 +78,81 @@ public class SettingsController {
return forward(HttpMethod.POST, "/models/ollama/info", body);
}
+ /**
+ * Telecharge un modele Ollama et streame la progression au client.
+ *
+ * On bypass RestTemplate (qui bufferise toute la reponse) au profit du
+ * client HTTP standard de Java en mode streaming. Le Brain renvoie du
+ * NDJSON ligne par ligne ; on relaie chaque chunk tel quel pour que le
+ * frontend voie la progression en temps reel.
+ */
+ @PostMapping(value = "/models/ollama/pull", produces = "application/x-ndjson")
+ public ResponseEntity pullOllamaModel(@RequestBody Map body) {
+ guardDemoMode();
+ StreamingResponseBody stream = output -> {
+ HttpClient http = HttpClient.newBuilder()
+ .connectTimeout(Duration.ofSeconds(10))
+ .build();
+ HttpRequest req = HttpRequest.newBuilder()
+ .uri(URI.create(brainBaseUrl + "/models/ollama/pull"))
+ .timeout(Duration.ofMinutes(60))
+ .header("Content-Type", "application/json")
+ .POST(HttpRequest.BodyPublishers.ofString(toJson(body)))
+ .build();
+ try {
+ HttpResponse resp = http.send(req, HttpResponse.BodyHandlers.ofInputStream());
+ try (InputStream in = resp.body()) {
+ byte[] buf = new byte[4096];
+ int n;
+ while ((n = in.read(buf)) != -1) {
+ output.write(buf, 0, n);
+ output.flush();
+ }
+ }
+ } catch (InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ throw new IOException("Pull interrompu", ie);
+ }
+ };
+ return ResponseEntity.ok().contentType(MediaType.parseMediaType("application/x-ndjson")).body(stream);
+ }
+
+ @DeleteMapping("/models/ollama/{name}")
+ public ResponseEntity