Files
k8s-manifests/ollama/ollama.yaml
T
chemavx 8bab07201a ollama: elimina GPU, fija imagen 0.20.7, reduce a qwen2.5:3b
- Imagen: ollama/ollama:latest → ollama/ollama:0.20.7
- Elimina NVIDIA_VISIBLE_DEVICES, NVIDIA_DRIVER_CAPABILITIES
- Elimina nvidia.com/gpu: "1" de resources limits
- Reduce memory: 8/20Gi → 4/8Gi (solo CPU, modelo 3b)
- Startup: auto-pull cambiado a qwen2.5:3b

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-24 15:34:37 +00:00

142 lines
3.0 KiB
YAML

---
apiVersion: v1
kind: Namespace
metadata:
name: ollama
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ollama-models
namespace: ollama
spec:
accessModes:
- ReadWriteOnce
storageClassName: local-path
resources:
requests:
storage: 20Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ollama
namespace: ollama
labels:
app: ollama
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: ollama
template:
metadata:
labels:
app: ollama
spec:
nodeSelector:
kubernetes.io/hostname: chemavx-k8
containers:
- name: ollama
image: ollama/ollama:0.20.7
command: ["/usr/bin/bash", "-c"]
args:
- |
ollama serve &
SERVE_PID=$!
echo "Waiting for ollama to start..."
until ollama list >/dev/null 2>&1; do
sleep 2
done
echo "Pulling llama3.1:8b..."
ollama pull qwen2.5:3b
echo "Model ready."
wait $SERVE_PID
ports:
- name: http
containerPort: 11434
protocol: TCP
env:
- name: OLLAMA_MODELS
value: /root/.ollama/models
- name: OLLAMA_NUM_CTX
value: "8192"
- name: OLLAMA_KEEP_ALIVE
value: "-1"
resources:
requests:
memory: 4Gi
cpu: "500m"
limits:
memory: 8Gi
volumeMounts:
- name: ollama-data
mountPath: /root/.ollama
livenessProbe:
httpGet:
path: /api/tags
port: 11434
initialDelaySeconds: 60
periodSeconds: 30
failureThreshold: 5
readinessProbe:
httpGet:
path: /api/tags
port: 11434
initialDelaySeconds: 30
periodSeconds: 10
failureThreshold: 15
volumes:
- name: ollama-data
persistentVolumeClaim:
claimName: ollama-models
---
apiVersion: v1
kind: Service
metadata:
name: ollama
namespace: ollama
labels:
app: ollama
spec:
type: ClusterIP
selector:
app: ollama
ports:
- name: http
port: 11434
targetPort: 11434
protocol: TCP
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: ollama
namespace: ollama
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
traefik.ingress.kubernetes.io/router.entrypoints: websecure
spec:
ingressClassName: traefik
rules:
- host: ollama.chemavx.xyz
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: ollama
port:
number: 11434
tls:
- hosts:
- ollama.chemavx.xyz
secretName: ollama-tls