Mise en place v0.6.8

Amélioration de l'installation automatique Ajout de la possibilité de télécharger le llm que l'on veut à l'interieur de l'application en communicant avec ollama
2026-04-26 01:11:04 +02:00
parent 5e04e84ee4
commit addf78f01d
12 changed files with 754 additions and 18 deletions
--- a/brain/app/main.py
+++ b/brain/app/main.py
@@ -689,6 +689,76 @@ async def get_ollama_model_info(
    return OllamaModelInfoDTO(context_length=0)


+@app.post("/models/ollama/pull")
+async def pull_ollama_model(
+    body: dict[str, str],
+    settings: Annotated[Settings, Depends(get_settings)],
+) -> StreamingResponse:
+    """Telecharge un modele depuis Ollama et streame la progression.
+
+    Proxifie l'endpoint `/api/pull` d'Ollama qui renvoie du JSON ligne par
+    ligne (NDJSON) avec le statut de chaque etape : manifest, layers,
+    digest, success. On reemet ce flux tel quel au client (le front
+    parsera les lignes et affichera une barre de progression).
+
+    Le timeout est intentionnellement tres long (60 min) car certains
+    modeles font 30+ Go.
+    """
+    name = (body.get("name") or "").strip()
+    if not name:
+        raise HTTPException(status_code=400, detail="name requis")
+    url = f"{settings.ollama_base_url}/api/pull"
+
+    async def stream() -> AsyncIterator[bytes]:
+        # On utilise un timeout long pour la lecture (60 min) mais court pour
+        # la connexion (10s) — si Ollama n'est pas joignable, on echoue vite.
+        timeout = httpx.Timeout(connect=10, read=3600, write=10, pool=10)
+        try:
+            async with httpx.AsyncClient(timeout=timeout) as client:
+                async with client.stream("POST", url, json={"model": name, "stream": True}) as r:
+                    if r.status_code != 200:
+                        # Ollama renvoie un message JSON d'erreur. On le passe
+                        # tel quel au client en preservant le code HTTP.
+                        body_text = await r.aread()
+                        yield body_text
+                        return
+                    async for chunk in r.aiter_bytes():
+                        yield chunk
+        except httpx.HTTPError as e:
+            # Erreur reseau : on emet une ligne JSON d'erreur compatible
+            # avec le format NDJSON d'Ollama.
+            err = json.dumps({"error": f"Connexion a Ollama impossible : {e}"}) + "\n"
+            yield err.encode("utf-8")
+
+    # application/x-ndjson : un objet JSON par ligne, pas de wrapping SSE.
+    # C'est le format natif d'Ollama, le front le parsera ligne par ligne.
+    return StreamingResponse(stream(), media_type="application/x-ndjson")
+
+
+@app.delete("/models/ollama/{name:path}")
+async def delete_ollama_model(
+    name: str,
+    settings: Annotated[Settings, Depends(get_settings)],
+) -> dict[str, str]:
+    """Supprime un modele du serveur Ollama.
+
+    Le `:path` dans le pattern autorise les `:` du nom (ex: `gemma4:e4b`)
+    sans avoir besoin de URL-encoder cote client.
+    """
+    if not name.strip():
+        raise HTTPException(status_code=400, detail="name requis")
+    url = f"{settings.ollama_base_url}/api/delete"
+    try:
+        async with httpx.AsyncClient(timeout=10) as client:
+            response = await client.request("DELETE", url, json={"model": name})
+            if response.status_code == 404:
+                raise HTTPException(status_code=404, detail=f"Modele '{name}' introuvable")
+            response.raise_for_status()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=502, detail=f"Ollama injoignable : {e}")
+    return {"status": "deleted", "name": name}
+
+
@app.get("/models/onemin")
 def list_onemin_models() -> dict[str, list[dict[str, object]]]:
    """Catalogue statique des modeles 1min.ai, groupes par fournisseur.
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -14,7 +14,7 @@

    <groupId>com.loremind</groupId>
    <artifactId>loremind-core</artifactId>
-    <version>0.6.6</version>
+    <version>0.6.8</version>
    <name>LoreMind Core</name>
    <description>Backend Core - Architecture Hexagonale</description>

--- a/core/src/main/java/com/loremind/infrastructure/web/controller/SettingsController.java
+++ b/core/src/main/java/com/loremind/infrastructure/web/controller/SettingsController.java
@@ -7,7 +7,9 @@ import org.springframework.http.HttpMethod;
 import org.springframework.http.HttpStatus;
 import org.springframework.http.MediaType;
 import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.DeleteMapping;
 import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PathVariable;
 import org.springframework.web.bind.annotation.PostMapping;
 import org.springframework.web.bind.annotation.PutMapping;
 import org.springframework.web.bind.annotation.RequestBody;
@@ -15,7 +17,17 @@ import org.springframework.web.bind.annotation.RequestMapping;
 import org.springframework.web.bind.annotation.RestController;
 import org.springframework.web.client.RestTemplate;
 import org.springframework.web.server.ResponseStatusException;
+import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody;

+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
 import java.util.Map;

 /**
@@ -66,11 +78,81 @@ public class SettingsController {
        return forward(HttpMethod.POST, "/models/ollama/info", body);
    }

+    /**
+     * Telecharge un modele Ollama et streame la progression au client.
+     * <p>
+     * On bypass RestTemplate (qui bufferise toute la reponse) au profit du
+     * client HTTP standard de Java en mode streaming. Le Brain renvoie du
+     * NDJSON ligne par ligne ; on relaie chaque chunk tel quel pour que le
+     * frontend voie la progression en temps reel.
+     */
+    @PostMapping(value = "/models/ollama/pull", produces = "application/x-ndjson")
+    public ResponseEntity<StreamingResponseBody> pullOllamaModel(@RequestBody Map<String, Object> body) {
+        guardDemoMode();
+        StreamingResponseBody stream = output -> {
+            HttpClient http = HttpClient.newBuilder()
+                    .connectTimeout(Duration.ofSeconds(10))
+                    .build();
+            HttpRequest req = HttpRequest.newBuilder()
+                    .uri(URI.create(brainBaseUrl + "/models/ollama/pull"))
+                    .timeout(Duration.ofMinutes(60))
+                    .header("Content-Type", "application/json")
+                    .POST(HttpRequest.BodyPublishers.ofString(toJson(body)))
+                    .build();
+            try {
+                HttpResponse<InputStream> resp = http.send(req, HttpResponse.BodyHandlers.ofInputStream());
+                try (InputStream in = resp.body()) {
+                    byte[] buf = new byte[4096];
+                    int n;
+                    while ((n = in.read(buf)) != -1) {
+                        output.write(buf, 0, n);
+                        output.flush();
+                    }
+                }
+            } catch (InterruptedException ie) {
+                Thread.currentThread().interrupt();
+                throw new IOException("Pull interrompu", ie);
+            }
+        };
+        return ResponseEntity.ok().contentType(MediaType.parseMediaType("application/x-ndjson")).body(stream);
+    }
+
+    @DeleteMapping("/models/ollama/{name}")
+    public ResponseEntity<Map<String, Object>> deleteOllamaModel(@PathVariable("name") String name) {
+        guardDemoMode();
+        return forward(HttpMethod.DELETE, "/models/ollama/" + name, null);
+    }
+
    @GetMapping("/models/onemin")
    public ResponseEntity<Map<String, Object>> listOneMinModels() {
        return forward(HttpMethod.GET, "/models/onemin", null);
    }

+    /**
+     * Serialiseur JSON minimal pour eviter d'instancier ObjectMapper a chaque
+     * appel. Suffisant pour notre cas d'usage : Map<String,Object> avec des
+     * String/Number/Boolean en valeur.
+     */
+    private static String toJson(Map<String, Object> m) {
+        StringBuilder sb = new StringBuilder("{");
+        boolean first = true;
+        for (Map.Entry<String, Object> e : m.entrySet()) {
+            if (!first) sb.append(",");
+            sb.append("\"").append(escape(e.getKey())).append("\":");
+            Object v = e.getValue();
+            if (v == null) sb.append("null");
+            else if (v instanceof Number || v instanceof Boolean) sb.append(v);
+            else sb.append("\"").append(escape(v.toString())).append("\"");
+            first = false;
+        }
+        return sb.append("}").toString();
+    }
+
+    private static String escape(String s) {
+        return s.replace("\\", "\\\\").replace("\"", "\\\"")
+                .replace("\n", "\\n").replace("\r", "\\r").replace("\t", "\\t");
+    }
+
    private void guardDemoMode() {
        if (demoMode) {
            throw new ResponseStatusException(HttpStatus.FORBIDDEN, "Settings disabled in demo mode");
--- a/installers/install.ps1
+++ b/installers/install.ps1
@@ -40,7 +40,7 @@
  Auteur       : ietm64
  Licence      : AGPL-3.0
  Projet       : LoreMindMJ - assistant pour Maitres de Jeu de JDR
-  Version      : 0.6.6
+  Version      : 0.6.8

 .LINK
  https://git.igmlcreation.fr/ietm64/loremind
@@ -91,17 +91,36 @@ function Test-Docker {
    return ($LASTEXITCODE -eq 0)
 }

-function Wait-Docker([int]$TimeoutSec = 180) {
+function Wait-Docker([int]$TimeoutSec = 600) {
+    # Attend que Docker reponde. Tolere les erreurs "command not found" pendant
+    # les premieres iterations le temps que le PATH soit rafraichi.
    Write-Step "Attente du demarrage de Docker Desktop (max ${TimeoutSec}s)..."
+    Write-Host "  Si Docker Desktop affiche un contrat de licence, acceptez-le."
    $deadline = (Get-Date).AddSeconds($TimeoutSec)
+    $reportedFound = $false
    while ((Get-Date) -lt $deadline) {
-        docker info *>$null
-        if ($LASTEXITCODE -eq 0) { Write-Ok "Docker repond"; return $true }
-        Start-Sleep -Seconds 3
+        if (Get-Command docker -ErrorAction SilentlyContinue) {
+            if (-not $reportedFound) {
+                Write-Ok "Commande 'docker' detectee, attente du daemon..."
+                $reportedFound = $true
+            }
+            docker info *>$null
+            if ($LASTEXITCODE -eq 0) { Write-Ok "Docker repond"; return $true }
+        }
+        Start-Sleep -Seconds 5
    }
    return $false
 }

+function Update-PathFromRegistry {
+    # winget install ne propage pas les modifs de PATH a la session courante.
+    # On relit la valeur PATH depuis le registre (Machine + User) et on
+    # l'applique a $env:PATH pour rendre 'docker.exe' immediatement utilisable.
+    $machinePath = [Environment]::GetEnvironmentVariable('Path','Machine')
+    $userPath    = [Environment]::GetEnvironmentVariable('Path','User')
+    $env:PATH = ($machinePath, $userPath -join ';').TrimEnd(';')
+}
+
 # ---------------------------------------------------------------------------
 # 0. Verification des droits administrateur
 # ---------------------------------------------------------------------------
@@ -159,12 +178,25 @@ if (Test-Docker) {
    winget install --id Docker.DockerDesktop -e --accept-package-agreements --accept-source-agreements
    if ($LASTEXITCODE -ne 0) { Write-Err "Echec de l'installation Docker Desktop via winget"; exit 1 }

+    # winget a modifie le PATH systeme mais pas celui de la session courante.
+    # On le rafraichit pour que la commande 'docker' soit immediatement trouvable.
+    Update-PathFromRegistry
+
    Write-Step "Lancement de Docker Desktop..."
    $dd = "$env:ProgramFiles\Docker\Docker\Docker Desktop.exe"
    if (Test-Path $dd) { Start-Process $dd }

-    if (-not (Wait-Docker 240)) {
-        Write-Err "Docker n'a pas demarre. Lancez-le manuellement puis relancez ce script."
+    Write-Host ""
+    Write-Host "  Docker Desktop demarre pour la premiere fois." -ForegroundColor Yellow
+    Write-Host "  Au premier lancement, il affiche un contrat de licence (Subscription Service Agreement)."
+    Write-Host "  Cliquez 'Accept' pour continuer."
+    Write-Host ""
+    Read-Host "  Appuyez sur Entree une fois que Docker Desktop affiche 'Engine running' (icone baleine verte)"
+
+    if (-not (Wait-Docker 600)) {
+        Write-Err "Docker ne repond toujours pas apres 10 minutes."
+        Write-Err "Verifiez que Docker Desktop est lance et que vous avez accepte le contrat,"
+        Write-Err "puis relancez install.bat."
        exit 1
    }
 }
@@ -270,7 +302,7 @@ if ($llmProvider -eq 'ollama') {
    }
 }

-$llmModel = 'gemma4:26b'
+$llmModel = 'gemma4:e4b'

 $autoUpdate = if ($NonInteractive) { $true } else {
    $r = Read-Host "  Activer les mises a jour auto (chaque nuit a 4h) ? [O/n]"
@@ -328,6 +360,43 @@ Write-Step "Demarrage de la stack"
 docker compose up -d
 if ($LASTEXITCODE -ne 0) { Write-Err "Echec docker compose up"; exit 1 }

+# ---------------------------------------------------------------------------
+# 5b. Telechargement du modele Ollama (mode embarque uniquement)
+# ---------------------------------------------------------------------------
+# En mode embarque, le conteneur Ollama est prêt mais ne contient aucun modele
+# par defaut. On propose de pull le modele configure tout de suite pour que
+# l'utilisateur ait quelque chose a utiliser des le premier lancement.
+if ($ollamaMode -eq 'embedded' -and $llmProvider -eq 'ollama') {
+    $pullNow = if ($NonInteractive) { $true } else {
+        $r = Read-Host "  Telecharger le modele '$llmModel' maintenant ? (peut prendre quelques minutes) [O/n]"
+        -not ($r -match '^(n|N|no|non)$')
+    }
+    if ($pullNow) {
+        # Petite attente pour laisser le conteneur ollama finir son init.
+        Write-Step "Attente de la disponibilite du conteneur Ollama..."
+        $ollamaReady = $false
+        for ($i = 0; $i -lt 30; $i++) {
+            docker exec loremind-ollama ollama list *>$null
+            if ($LASTEXITCODE -eq 0) { $ollamaReady = $true; break }
+            Start-Sleep -Seconds 2
+        }
+        if (-not $ollamaReady) {
+            Write-Warn2 "Le conteneur Ollama ne repond pas encore. Vous pourrez pull le modele plus tard avec :"
+            Write-Warn2 "  docker exec -it loremind-ollama ollama pull $llmModel"
+        } else {
+            Write-Step "Telechargement du modele $llmModel (peut prendre plusieurs minutes selon votre connexion)..."
+            docker exec loremind-ollama ollama pull $llmModel
+            if ($LASTEXITCODE -eq 0) {
+                Write-Ok "Modele $llmModel pret a l'emploi"
+            } else {
+                Write-Warn2 "Echec du pull. Reessayez manuellement : docker exec -it loremind-ollama ollama pull $llmModel"
+            }
+        }
+    } else {
+        Write-Host "  Pour le telecharger plus tard : docker exec -it loremind-ollama ollama pull $llmModel"
+    }
+}
+
 # ---------------------------------------------------------------------------
 # 6. Recap
 # ---------------------------------------------------------------------------
--- a/installers/install.sh
+++ b/installers/install.sh
@@ -129,7 +129,7 @@ fi
 # 3. none     : aucune installation, configuration ulterieure via l'app
 OLLAMA_MODE="embedded"
 OLLAMA_BASE_URL_VAL="http://ollama:11434"
-LLM_MODEL_VAL="gemma4:26b"
+LLM_MODEL_VAL="gemma4:e4b"
 if [ "$LLM_PROVIDER" = "ollama" ]; then
    HOST_OLLAMA_REPLY="$(ask "Avez-vous deja Ollama installe sur cette machine ? [o/N]" "N")"
    case "$HOST_OLLAMA_REPLY" in
@@ -228,6 +228,41 @@ docker compose pull
 step "Demarrage de la stack"
 docker compose up -d

+# 5b. Telechargement du modele Ollama (mode embarque uniquement)
+# ----------------------------------------------------------------------------
+# Le conteneur Ollama est pret mais sans modele. On propose le pull tout de
+# suite pour que l'utilisateur ait quelque chose a utiliser au premier lancement.
+if [ "$LLM_PROVIDER" = "ollama" ] && [ "$OLLAMA_MODE" = "embedded" ]; then
+    PULL_REPLY="$(ask "Telecharger le modele '${LLM_MODEL_VAL}' maintenant ? (peut prendre plusieurs minutes) [O/n]" "O")"
+    case "$PULL_REPLY" in
+        n|N|no|non|No|Non)
+            echo "  Pour le telecharger plus tard : docker exec -it loremind-ollama ollama pull ${LLM_MODEL_VAL}"
+            ;;
+        *)
+            step "Attente de la disponibilite du conteneur Ollama..."
+            OLLAMA_READY=0
+            for i in $(seq 1 30); do
+                if docker exec loremind-ollama ollama list >/dev/null 2>&1; then
+                    OLLAMA_READY=1
+                    break
+                fi
+                sleep 2
+            done
+            if [ "$OLLAMA_READY" = "0" ]; then
+                warn "Le conteneur Ollama ne repond pas encore. Vous pourrez pull plus tard :"
+                warn "  docker exec -it loremind-ollama ollama pull ${LLM_MODEL_VAL}"
+            else
+                step "Telechargement du modele ${LLM_MODEL_VAL} (peut prendre plusieurs minutes selon votre connexion)..."
+                if docker exec loremind-ollama ollama pull "${LLM_MODEL_VAL}"; then
+                    ok "Modele ${LLM_MODEL_VAL} pret a l'emploi"
+                else
+                    warn "Echec du pull. Reessayez : docker exec -it loremind-ollama ollama pull ${LLM_MODEL_VAL}"
+                fi
+            fi
+            ;;
+    esac
+fi
+
 # 6. Recap
 URL="http://localhost:${WEB_PORT}"
 echo
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "loremind-web",
-  "version": "0.6.6",
+  "version": "0.6.8",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "loremind-web",
-      "version": "0.6.6",
+      "version": "0.6.8",
      "dependencies": {
        "@angular/animations": "^17.0.0",
        "@angular/common": "^17.0.0",
--- a/web/package.json
+++ b/web/package.json
@@ -1,6 +1,6 @@
 {
  "name": "loremind-web",
-  "version": "0.6.6",
+  "version": "0.6.8",
  "description": "LoreMind Frontend - Angular",
  "scripts": {
    "ng": "ng",
--- a/web/src/app/interceptors/session-expired.interceptor.ts
+++ b/web/src/app/interceptors/session-expired.interceptor.ts
@@ -1,5 +1,7 @@
 import { HttpInterceptorFn, HttpErrorResponse } from '@angular/common/http';
+import { inject } from '@angular/core';
 import { catchError, throwError } from 'rxjs';
+import { ConfigService } from '../services/config.service';

 /**
 * Detecte la perte de session demo (orchestrateur) via les codes 401/502 sur
@@ -7,9 +9,10 @@ import { catchError, throwError } from 'rxjs';
 * Le reload renvoie l'utilisateur sur la page "Preparation" pour creer une
 * nouvelle session sans qu'il ait a faire Ctrl+Shift+R.
 *
- * Cet interceptor est inerte en mode normal (non-demo) : si le backend natif
- * renvoie un 401 legitime, ca declenche aussi le reload, ce qui est sans
- * consequence puisqu'aucun flux d'auth utilisateur n'existe encore cote app.
+ * Strictement inerte hors mode demo : sans cette garde, un 401 du backend
+ * natif (par ex. HTTP Basic sur /api/settings avant authentification) ou un
+ * 502 transitoire au boot (Brain pas encore pret) declencherait a tort
+ * l'overlay "session demo expiree" sur les installs self-hosted.
 */

 // Module-level flag : evite de declencher overlay + reload plusieurs fois si
@@ -17,8 +20,16 @@ import { catchError, throwError } from 'rxjs';
 let alreadyTriggered = false;

 export const sessionExpiredInterceptor: HttpInterceptorFn = (req, next) => {
+  const config = inject(ConfigService);
+
  return next(req).pipe(
    catchError((err) => {
+      // Garde stricte : l'overlay et le reload n'ont de sens qu'en mode demo,
+      // ou l'orchestrateur drop les sessions sans prevenir le client.
+      if (!config.demoMode) {
+        return throwError(() => err);
+      }
+
      const isApiCall = req.url.includes('/api/');
      const isSessionLoss =
        err instanceof HttpErrorResponse && (err.status === 401 || err.status === 502);
--- a/web/src/app/services/settings.service.ts
+++ b/web/src/app/services/settings.service.ts
@@ -65,6 +65,86 @@ export class SettingsService {
  listOneMinModels(): Observable<{ groups: OneMinModelGroup[] }> {
    return this.http.get<{ groups: OneMinModelGroup[] }>(`${this.apiUrl}/models/onemin`, this.authOptions);
  }
+
+  /**
+   * Telecharge un modele Ollama et streame la progression au client.
+   *
+   * Le backend renvoie du NDJSON (un objet JSON par ligne) avec le format
+   * Ollama natif : `{status, digest?, total?, completed?}`. On parse chaque
+   * ligne au fur et a mesure et on emet via un Observable que le composant
+   * peut consommer pour mettre a jour une barre de progression.
+   *
+   * On utilise `fetch` directement plutot que `HttpClient` car Angular
+   * bufferise les reponses XHR, ce qui empeche le streaming en temps reel.
+   */
+  pullOllamaModel(name: string): Observable<OllamaPullEvent> {
+    return new Observable<OllamaPullEvent>((subscriber) => {
+      const controller = new AbortController();
+      (async () => {
+        try {
+          const response = await fetch(`${this.apiUrl}/models/ollama/pull`, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            credentials: 'include',
+            body: JSON.stringify({ name }),
+            signal: controller.signal,
+          });
+          if (!response.ok || !response.body) {
+            subscriber.error(new Error(`HTTP ${response.status}`));
+            return;
+          }
+          const reader = response.body.getReader();
+          const decoder = new TextDecoder();
+          let buffer = '';
+          while (true) {
+            const { value, done } = await reader.read();
+            if (done) break;
+            buffer += decoder.decode(value, { stream: true });
+            // Decoupage NDJSON : chaque ligne est un objet JSON complet.
+            let nl: number;
+            while ((nl = buffer.indexOf('\n')) >= 0) {
+              const line = buffer.slice(0, nl).trim();
+              buffer = buffer.slice(nl + 1);
+              if (!line) continue;
+              try {
+                subscriber.next(JSON.parse(line) as OllamaPullEvent);
+              } catch {
+                // Ligne non JSON (rare) : on l'ignore.
+              }
+            }
+          }
+          subscriber.complete();
+        } catch (err) {
+          if ((err as Error).name !== 'AbortError') subscriber.error(err);
+        }
+      })();
+      return () => controller.abort();
+    });
+  }
+
+  deleteOllamaModel(name: string): Observable<{ status: string; name: string }> {
+    return this.http.delete<{ status: string; name: string }>(
+      `${this.apiUrl}/models/ollama/${encodeURIComponent(name)}`, this.authOptions);
+  }
+}
+
+/**
+ * Format des evenements emis par Ollama pendant un pull. Les champs sont
+ * optionnels car le serveur emet differents types de messages selon l'etape :
+ *  - `{status: "pulling manifest"}`
+ *  - `{status: "downloading", digest, total, completed}`
+ *  - `{status: "verifying sha256 digest"}`
+ *  - `{status: "writing manifest"}`
+ *  - `{status: "removing any unused layers"}`
+ *  - `{status: "success"}`
+ *  - `{error: "..."}` en cas d'erreur (modele inexistant, reseau, etc.)
+ */
+export interface OllamaPullEvent {
+  status?: string;
+  digest?: string;
+  total?: number;
+  completed?: number;
+  error?: string;
 }

 /** Un groupe de modeles 1min.ai regroupes par fournisseur (Anthropic, OpenAI, ...). */
--- a/web/src/app/settings/settings.component.html
+++ b/web/src/app/settings/settings.component.html
@@ -56,11 +56,91 @@
          <lucide-icon [img]="RefreshCw" [size]="14"></lucide-icon>
          <span>{{ loadingModels ? 'Chargement...' : 'Actualiser' }}</span>
        </button>
+        <button type="button" class="btn-secondary" (click)="openPullDialog()">
+          <lucide-icon [img]="Plus" [size]="14"></lucide-icon>
+          <span>Telecharger</span>
+        </button>
      </div>
      <p class="hint" *ngIf="ollamaModels.length === 0">Aucun modele detecte. Verifie que Ollama tourne et que l'URL est correcte.</p>
    </div>
+
+    <!-- Liste des modeles installes avec bouton supprimer -->
+    <div class="form-row" *ngIf="ollamaModels.length > 0">
+      <label>Modeles installes</label>
+      <ul class="installed-models">
+        <li *ngFor="let m of ollamaModels">
+          <span class="model-name">{{ m }}</span>
+          <button type="button" class="btn-icon btn-danger"
+                  (click)="deleteModel(m)"
+                  [disabled]="deletingModel === m"
+                  [title]="'Supprimer ' + m">
+            <lucide-icon [img]="Trash2" [size]="14"></lucide-icon>
+          </button>
+        </li>
+      </ul>
+    </div>
  </section>

+  <!-- Dialog de telechargement de modele -->
+  <div class="modal-overlay" *ngIf="pullDialogOpen" (click)="closePullDialog()">
+    <div class="modal-content" (click)="$event.stopPropagation()">
+      <header class="modal-header">
+        <h3>Telecharger un modele Ollama</h3>
+        <button type="button" class="btn-icon" (click)="closePullDialog()" [disabled]="pullInProgress" title="Fermer">
+          <lucide-icon [img]="X" [size]="18"></lucide-icon>
+        </button>
+      </header>
+
+      <div class="modal-body">
+        <div *ngIf="!pullInProgress">
+          <label for="pull-name">Nom du modele</label>
+          <input id="pull-name" type="text" [(ngModel)]="pullModelName"
+                 placeholder="ex: gemma4:e4b" autocomplete="off"
+                 (keydown.enter)="startPull()">
+
+          <p class="hint">Suggestions :</p>
+          <div class="suggestions">
+            <button type="button" *ngFor="let s of pullSuggestions"
+                    class="suggestion-chip" (click)="selectSuggestion(s)">{{ s }}</button>
+          </div>
+          <p class="hint" style="margin-top: 0.75rem;">
+            La liste complete est sur <a href="https://ollama.com/library" target="_blank" rel="noopener">ollama.com/library</a>.
+          </p>
+        </div>
+
+        <div *ngIf="pullInProgress" class="pull-progress">
+          <div class="pull-status">{{ pullStatus }}</div>
+          <div class="progress-bar" *ngIf="pullTotal > 0">
+            <div class="progress-fill" [style.width.%]="pullPercent"></div>
+          </div>
+          <div class="progress-text" *ngIf="pullTotal > 0">
+            {{ formatBytes(pullCompleted) }} / {{ formatBytes(pullTotal) }} ({{ pullPercent }}%)
+          </div>
+          <div class="progress-text" *ngIf="pullTotal === 0">
+            Preparation...
+          </div>
+        </div>
+      </div>
+
+      <footer class="modal-footer">
+        <button type="button" class="btn-secondary"
+                (click)="cancelPull()" *ngIf="pullInProgress">
+          Annuler
+        </button>
+        <button type="button" class="btn-secondary"
+                (click)="closePullDialog()" *ngIf="!pullInProgress">
+          Fermer
+        </button>
+        <button type="button" class="btn-primary"
+                (click)="startPull()"
+                [disabled]="pullInProgress || !pullModelName.trim()" *ngIf="!pullInProgress">
+          <lucide-icon [img]="Download" [size]="14"></lucide-icon>
+          <span>Telecharger</span>
+        </button>
+      </footer>
+    </div>
+  </div>
+
  <!-- Bloc 1min.ai -->
  <section class="card" *ngIf="settings && settings.llm_provider === 'onemin'">
    <h2>Configuration 1min.ai</h2>
--- a/web/src/app/settings/settings.component.scss
+++ b/web/src/app/settings/settings.component.scss
@@ -5,6 +5,173 @@
  color: var(--color-text, #e8e8e8);
 }

+// --- Liste des modeles installes -----------------------------------------
+.installed-models {
+  list-style: none;
+  margin: 6px 0 0;
+  padding: 0;
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+
+  li {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    padding: 6px 12px;
+    background: rgba(255, 255, 255, 0.03);
+    border: 1px solid rgba(255, 255, 255, 0.06);
+    border-radius: 6px;
+    font-size: 0.9rem;
+  }
+
+  .model-name {
+    font-family: ui-monospace, SFMono-Regular, monospace;
+    color: #c4b8e0;
+  }
+}
+
+.btn-icon {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  width: 28px;
+  height: 28px;
+  background: transparent;
+  border: 1px solid rgba(255, 255, 255, 0.1);
+  border-radius: 5px;
+  color: inherit;
+  cursor: pointer;
+  &:hover { background: rgba(255, 255, 255, 0.06); }
+  &:disabled { opacity: 0.5; cursor: not-allowed; }
+}
+
+.btn-icon.btn-danger:hover {
+  background: rgba(239, 68, 68, 0.12);
+  border-color: rgba(239, 68, 68, 0.4);
+  color: #fca5a5;
+}
+
+// --- Modal de pull -------------------------------------------------------
+.modal-overlay {
+  position: fixed;
+  inset: 0;
+  background: rgba(0, 0, 0, 0.6);
+  backdrop-filter: blur(4px);
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  z-index: 1000;
+}
+
+.modal-content {
+  background: #1f1a2e;
+  border: 1px solid rgba(255, 255, 255, 0.1);
+  border-radius: 10px;
+  width: min(520px, 92vw);
+  max-height: 90vh;
+  display: flex;
+  flex-direction: column;
+}
+
+.modal-header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 16px 20px;
+  border-bottom: 1px solid rgba(255, 255, 255, 0.08);
+
+  h3 {
+    margin: 0;
+    font-size: 1.05rem;
+    font-weight: 600;
+  }
+}
+
+.modal-body {
+  padding: 20px;
+  overflow-y: auto;
+
+  label {
+    display: block;
+    margin-bottom: 6px;
+    font-weight: 500;
+    font-size: 0.9rem;
+  }
+
+  input[type="text"] {
+    width: 100%;
+    padding: 8px 12px;
+    background: rgba(255, 255, 255, 0.04);
+    border: 1px solid rgba(255, 255, 255, 0.12);
+    border-radius: 6px;
+    color: inherit;
+    font-family: ui-monospace, SFMono-Regular, monospace;
+    box-sizing: border-box;
+    &:focus { outline: 2px solid #b794f4; outline-offset: 0; }
+  }
+}
+
+.suggestions {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 6px;
+  margin-top: 4px;
+}
+
+.suggestion-chip {
+  padding: 4px 10px;
+  background: rgba(183, 148, 244, 0.1);
+  border: 1px solid rgba(183, 148, 244, 0.25);
+  border-radius: 14px;
+  color: #d6c5f0;
+  font-family: ui-monospace, SFMono-Regular, monospace;
+  font-size: 0.82rem;
+  cursor: pointer;
+  &:hover { background: rgba(183, 148, 244, 0.2); }
+}
+
+.modal-footer {
+  display: flex;
+  justify-content: flex-end;
+  gap: 8px;
+  padding: 14px 20px;
+  border-top: 1px solid rgba(255, 255, 255, 0.08);
+}
+
+.pull-progress {
+  display: flex;
+  flex-direction: column;
+  gap: 10px;
+  padding: 12px 0;
+}
+
+.pull-status {
+  font-size: 0.92rem;
+  color: #d6c5f0;
+  font-family: ui-monospace, SFMono-Regular, monospace;
+}
+
+.progress-bar {
+  width: 100%;
+  height: 8px;
+  background: rgba(255, 255, 255, 0.06);
+  border-radius: 4px;
+  overflow: hidden;
+}
+
+.progress-fill {
+  height: 100%;
+  background: linear-gradient(90deg, #8b5cf6, #b794f4);
+  transition: width 0.3s ease;
+}
+
+.progress-text {
+  font-size: 0.82rem;
+  color: #aaa0c5;
+  font-family: ui-monospace, SFMono-Regular, monospace;
+}
+
 .page-header {
  display: flex;
  align-items: center;
--- a/web/src/app/settings/settings.component.ts
+++ b/web/src/app/settings/settings.component.ts
@@ -2,8 +2,9 @@ import { Component, OnInit } from '@angular/core';
 import { CommonModule } from '@angular/common';
 import { FormsModule } from '@angular/forms';
 import { Router } from '@angular/router';
-import { LucideAngularModule, ArrowLeft, RefreshCw, Save, Check, AlertCircle, Download } from 'lucide-angular';
-import { SettingsService, AppSettings, AppSettingsUpdate, OneMinModelGroup } from '../services/settings.service';
+import { LucideAngularModule, ArrowLeft, RefreshCw, Save, Check, AlertCircle, Download, Trash2, Plus, X } from 'lucide-angular';
+import { SettingsService, AppSettings, AppSettingsUpdate, OneMinModelGroup, OllamaPullEvent } from '../services/settings.service';
+import { Subscription } from 'rxjs';
 import { UpdatesService, UpdateStatus } from '../services/updates.service';
 import { ConfigService } from '../services/config.service';

@@ -33,6 +34,34 @@ export class SettingsComponent implements OnInit {
  readonly Check = Check;
  readonly AlertCircle = AlertCircle;
  readonly Download = Download;
+  readonly Trash2 = Trash2;
+  readonly Plus = Plus;
+  readonly X = X;
+
+  // --- Pull / delete de modeles Ollama ---
+  /** Dialog d'ajout de modele ouvert/ferme. */
+  pullDialogOpen = false;
+  /** Nom saisi par l'utilisateur dans le dialog. */
+  pullModelName = '';
+  /** Suggestions courantes affichees dans le dialog. */
+  readonly pullSuggestions = [
+    'gemma4:e4b', 'gemma3:4b', 'gemma3:12b',
+    'llama3.2:3b', 'llama3.1:8b',
+    'mistral:7b', 'qwen2.5:3b', 'qwen2.5:7b'
+  ];
+  /** Pull en cours ; null si aucun. */
+  pullInProgress = false;
+  /** Etape courante affichee a l'utilisateur (ex: "downloading", "verifying"). */
+  pullStatus = '';
+  /** Bytes telecharges sur le digest courant. */
+  pullCompleted = 0;
+  /** Bytes totaux du digest courant. */
+  pullTotal = 0;
+  /** Souscription au flux de pull pour pouvoir l'annuler. */
+  private pullSubscription: Subscription | null = null;
+
+  /** Modele en cours de suppression (nom) pour disabler son bouton. */
+  deletingModel: string | null = null;

  // Mises a jour conteneurs
  updateStatus: UpdateStatus | null = null;
@@ -229,6 +258,119 @@ export class SettingsComponent implements OnInit {
    });
  }

+  // --- Gestion des modeles Ollama (pull / delete) -------------------------
+
+  openPullDialog(): void {
+    this.pullDialogOpen = true;
+    this.pullModelName = '';
+    this.resetPullState();
+  }
+
+  closePullDialog(): void {
+    if (this.pullInProgress) return; // empêche fermeture pendant un pull
+    this.pullDialogOpen = false;
+  }
+
+  selectSuggestion(name: string): void {
+    this.pullModelName = name;
+  }
+
+  startPull(): void {
+    const name = this.pullModelName.trim();
+    if (!name || this.pullInProgress) return;
+    this.resetPullState();
+    this.pullInProgress = true;
+    this.pullStatus = 'connexion...';
+    this.errorMessage = '';
+
+    this.pullSubscription = this.settingsService.pullOllamaModel(name).subscribe({
+      next: (event: OllamaPullEvent) => {
+        if (event.error) {
+          this.errorMessage = `Echec : ${event.error}`;
+          this.pullInProgress = false;
+          return;
+        }
+        if (event.status) this.pullStatus = event.status;
+        if (event.completed != null) this.pullCompleted = event.completed;
+        if (event.total != null) this.pullTotal = event.total;
+      },
+      error: (err) => {
+        this.errorMessage = this.extractError(err, `Echec du telechargement de ${name}.`);
+        this.pullInProgress = false;
+      },
+      complete: () => {
+        this.pullInProgress = false;
+        this.successMessage = `Modele ${name} telecharge.`;
+        this.refreshModels();
+        // Si l'utilisateur n'avait aucun modele, on selectionne celui-ci.
+        if (this.settings && !this.settings.llm_model) {
+          this.settings.llm_model = name;
+          this.fetchOllamaModelInfo();
+        }
+        // Petite tempo avant de fermer pour que le user voie "success".
+        setTimeout(() => this.closePullDialog(), 1200);
+      }
+    });
+  }
+
+  cancelPull(): void {
+    if (this.pullSubscription) {
+      this.pullSubscription.unsubscribe();
+      this.pullSubscription = null;
+    }
+    this.pullInProgress = false;
+    this.pullStatus = 'annule';
+  }
+
+  private resetPullState(): void {
+    this.pullStatus = '';
+    this.pullCompleted = 0;
+    this.pullTotal = 0;
+    if (this.pullSubscription) {
+      this.pullSubscription.unsubscribe();
+      this.pullSubscription = null;
+    }
+  }
+
+  /** Pourcentage du digest courant pour la barre de progression. */
+  get pullPercent(): number {
+    if (this.pullTotal <= 0) return 0;
+    return Math.min(100, Math.round((this.pullCompleted / this.pullTotal) * 100));
+  }
+
+  /** Affichage humain des octets ('1.2 GB' / '450 MB'). */
+  formatBytes(b: number): string {
+    if (!b) return '0';
+    const u = ['B', 'KB', 'MB', 'GB', 'TB'];
+    let i = 0;
+    let v = b;
+    while (v >= 1024 && i < u.length - 1) { v /= 1024; i++; }
+    return `${v.toFixed(v < 10 && i > 0 ? 1 : 0)} ${u[i]}`;
+  }
+
+  deleteModel(name: string): void {
+    if (!confirm(`Supprimer le modele '${name}' ? L'espace disque sera libere.`)) return;
+    this.deletingModel = name;
+    this.errorMessage = '';
+    this.settingsService.deleteOllamaModel(name).subscribe({
+      next: () => {
+        this.deletingModel = null;
+        this.successMessage = `Modele ${name} supprime.`;
+        // Si l'utilisateur supprime le modele actuellement selectionne,
+        // on bascule sur le premier disponible (ou vide).
+        this.refreshModels();
+        if (this.settings && this.settings.llm_model === name) {
+          this.settings.llm_model = '';
+          this.ollamaModelMaxContext = 0;
+        }
+      },
+      error: (err) => {
+        this.deletingModel = null;
+        this.errorMessage = this.extractError(err, `Echec de la suppression de ${name}.`);
+      }
+    });
+  }
+
  goBack(): void {
    this.router.navigate(['/lore']);
  }