ollama: elimina GPU, fija imagen 0.20.7, reduce a qwen2.5:3b

- Imagen: ollama/ollama:latest → ollama/ollama:0.20.7
- Elimina NVIDIA_VISIBLE_DEVICES, NVIDIA_DRIVER_CAPABILITIES
- Elimina nvidia.com/gpu: "1" de resources limits
- Reduce memory: 8/20Gi → 4/8Gi (solo CPU, modelo 3b)
- Startup: auto-pull cambiado a qwen2.5:3b

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-24 15:34:37 +00:00
parent 792b53dee7
commit 8bab07201a
+4 -10
View File
@@ -42,7 +42,7 @@ spec:
kubernetes.io/hostname: chemavx-k8 kubernetes.io/hostname: chemavx-k8
containers: containers:
- name: ollama - name: ollama
image: ollama/ollama:latest image: ollama/ollama:0.20.7
command: ["/usr/bin/bash", "-c"] command: ["/usr/bin/bash", "-c"]
args: args:
- | - |
@@ -53,8 +53,7 @@ spec:
sleep 2 sleep 2
done done
echo "Pulling llama3.1:8b..." echo "Pulling llama3.1:8b..."
ollama pull llama3.1:8b ollama pull qwen2.5:3b
ollama rm llama3.2:3b 2>/dev/null || true
echo "Model ready." echo "Model ready."
wait $SERVE_PID wait $SERVE_PID
ports: ports:
@@ -68,17 +67,12 @@ spec:
value: "8192" value: "8192"
- name: OLLAMA_KEEP_ALIVE - name: OLLAMA_KEEP_ALIVE
value: "-1" value: "-1"
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
resources: resources:
requests: requests:
memory: 8Gi memory: 4Gi
cpu: "500m" cpu: "500m"
limits: limits:
memory: 20Gi memory: 8Gi
nvidia.com/gpu: "1"
volumeMounts: volumeMounts:
- name: ollama-data - name: ollama-data
mountPath: /root/.ollama mountPath: /root/.ollama