ollama: elimina GPU, fija imagen 0.20.7, reduce a qwen2.5:3b
- Imagen: ollama/ollama:latest → ollama/ollama:0.20.7 - Elimina NVIDIA_VISIBLE_DEVICES, NVIDIA_DRIVER_CAPABILITIES - Elimina nvidia.com/gpu: "1" de resources limits - Reduce memory: 8/20Gi → 4/8Gi (solo CPU, modelo 3b) - Startup: auto-pull cambiado a qwen2.5:3b Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+4
-10
@@ -42,7 +42,7 @@ spec:
|
|||||||
kubernetes.io/hostname: chemavx-k8
|
kubernetes.io/hostname: chemavx-k8
|
||||||
containers:
|
containers:
|
||||||
- name: ollama
|
- name: ollama
|
||||||
image: ollama/ollama:latest
|
image: ollama/ollama:0.20.7
|
||||||
command: ["/usr/bin/bash", "-c"]
|
command: ["/usr/bin/bash", "-c"]
|
||||||
args:
|
args:
|
||||||
- |
|
- |
|
||||||
@@ -53,8 +53,7 @@ spec:
|
|||||||
sleep 2
|
sleep 2
|
||||||
done
|
done
|
||||||
echo "Pulling llama3.1:8b..."
|
echo "Pulling llama3.1:8b..."
|
||||||
ollama pull llama3.1:8b
|
ollama pull qwen2.5:3b
|
||||||
ollama rm llama3.2:3b 2>/dev/null || true
|
|
||||||
echo "Model ready."
|
echo "Model ready."
|
||||||
wait $SERVE_PID
|
wait $SERVE_PID
|
||||||
ports:
|
ports:
|
||||||
@@ -68,17 +67,12 @@ spec:
|
|||||||
value: "8192"
|
value: "8192"
|
||||||
- name: OLLAMA_KEEP_ALIVE
|
- name: OLLAMA_KEEP_ALIVE
|
||||||
value: "-1"
|
value: "-1"
|
||||||
- name: NVIDIA_VISIBLE_DEVICES
|
|
||||||
value: all
|
|
||||||
- name: NVIDIA_DRIVER_CAPABILITIES
|
|
||||||
value: compute,utility
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: 8Gi
|
memory: 4Gi
|
||||||
cpu: "500m"
|
cpu: "500m"
|
||||||
limits:
|
limits:
|
||||||
memory: 20Gi
|
memory: 8Gi
|
||||||
nvidia.com/gpu: "1"
|
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: ollama-data
|
- name: ollama-data
|
||||||
mountPath: /root/.ollama
|
mountPath: /root/.ollama
|
||||||
|
|||||||
Reference in New Issue
Block a user