8592a09bc7
RollingUpdate caused rollout deadlocks because the PVC (ReadWriteOnce) cannot be mounted by two pods simultaneously. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
148 lines
3.2 KiB
YAML
148 lines
3.2 KiB
YAML
---
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata:
|
|
name: ollama
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: ollama-models
|
|
namespace: ollama
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
storageClassName: local-path
|
|
resources:
|
|
requests:
|
|
storage: 20Gi
|
|
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: ollama
|
|
namespace: ollama
|
|
labels:
|
|
app: ollama
|
|
spec:
|
|
replicas: 1
|
|
strategy:
|
|
type: Recreate
|
|
selector:
|
|
matchLabels:
|
|
app: ollama
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: ollama
|
|
spec:
|
|
nodeSelector:
|
|
kubernetes.io/hostname: chemavx-k8
|
|
containers:
|
|
- name: ollama
|
|
image: ollama/ollama:latest
|
|
command: ["/usr/bin/bash", "-c"]
|
|
args:
|
|
- |
|
|
ollama serve &
|
|
SERVE_PID=$!
|
|
echo "Waiting for ollama to start..."
|
|
until ollama list >/dev/null 2>&1; do
|
|
sleep 2
|
|
done
|
|
echo "Pulling llama3.1:8b..."
|
|
ollama pull llama3.1:8b
|
|
ollama rm llama3.2:3b 2>/dev/null || true
|
|
echo "Model ready."
|
|
wait $SERVE_PID
|
|
ports:
|
|
- name: http
|
|
containerPort: 11434
|
|
protocol: TCP
|
|
env:
|
|
- name: OLLAMA_MODELS
|
|
value: /root/.ollama/models
|
|
- name: OLLAMA_NUM_CTX
|
|
value: "8192"
|
|
- name: OLLAMA_KEEP_ALIVE
|
|
value: "-1"
|
|
- name: NVIDIA_VISIBLE_DEVICES
|
|
value: all
|
|
- name: NVIDIA_DRIVER_CAPABILITIES
|
|
value: compute,utility
|
|
resources:
|
|
requests:
|
|
memory: 8Gi
|
|
cpu: "500m"
|
|
limits:
|
|
memory: 20Gi
|
|
nvidia.com/gpu: "1"
|
|
volumeMounts:
|
|
- name: ollama-data
|
|
mountPath: /root/.ollama
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /api/tags
|
|
port: 11434
|
|
initialDelaySeconds: 60
|
|
periodSeconds: 30
|
|
failureThreshold: 5
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /api/tags
|
|
port: 11434
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 10
|
|
failureThreshold: 15
|
|
volumes:
|
|
- name: ollama-data
|
|
persistentVolumeClaim:
|
|
claimName: ollama-models
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: ollama
|
|
namespace: ollama
|
|
labels:
|
|
app: ollama
|
|
spec:
|
|
type: ClusterIP
|
|
selector:
|
|
app: ollama
|
|
ports:
|
|
- name: http
|
|
port: 11434
|
|
targetPort: 11434
|
|
protocol: TCP
|
|
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: ollama
|
|
namespace: ollama
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: letsencrypt-prod
|
|
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
|
spec:
|
|
ingressClassName: traefik
|
|
rules:
|
|
- host: ollama.chemavx.xyz
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend:
|
|
service:
|
|
name: ollama
|
|
port:
|
|
number: 11434
|
|
tls:
|
|
- hosts:
|
|
- ollama.chemavx.xyz
|
|
secretName: ollama-tls
|