ollama: elimina GPU, fija imagen 0.20.7, reduce a qwen2.5:3b

- Imagen: ollama/ollama:latest → ollama/ollama:0.20.7 - Elimina NVIDIA_VISIBLE_DEVICES, NVIDIA_DRIVER_CAPABILITIES - Elimina nvidia.com/gpu: "1" de resources limits - Reduce memory: 8/20Gi → 4/8Gi (solo CPU, modelo 3b) - Startup: auto-pull cambiado a qwen2.5:3b Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-24 15:34:37 +00:00
parent 792b53dee7
commit 8bab07201a
1 changed files with 4 additions and 10 deletions
@@ -42,7 +42,7 @@ spec:
        kubernetes.io/hostname: chemavx-k8
      containers:
        - name: ollama
-          image: ollama/ollama:latest
+          image: ollama/ollama:0.20.7
          command: ["/usr/bin/bash", "-c"]
          args:
            - |
@@ -53,8 +53,7 @@ spec:
                sleep 2
              done
              echo "Pulling llama3.1:8b..."
-              ollama pull llama3.1:8b
+              ollama pull qwen2.5:3b
              ollama rm llama3.2:3b 2>/dev/null || true
              echo "Model ready."
              wait $SERVE_PID
          ports:
@@ -68,17 +67,12 @@ spec:
              value: "8192"
            - name: OLLAMA_KEEP_ALIVE
              value: "-1"
            - name: NVIDIA_VISIBLE_DEVICES
              value: all
            - name: NVIDIA_DRIVER_CAPABILITIES
              value: compute,utility
          resources:
            requests:
-              memory: 8Gi
+              memory: 4Gi
              cpu: "500m"
            limits:
-              memory: 20Gi
+              memory: 8Gi
              nvidia.com/gpu: "1"
          volumeMounts:
            - name: ollama-data
              mountPath: /root/.ollama