k8s-manifests/ollama/ollama.yaml

---
apiVersion: v1
kind: Namespace
metadata:
  name: ollama

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: ollama-models
  namespace: ollama
spec:
  accessModes:
    - ReadWriteOnce
  storageClassName: local-path
  resources:
    requests:
      storage: 20Gi

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: ollama
  namespace: ollama
  labels:
    app: ollama
spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: ollama
  template:
    metadata:
      labels:
        app: ollama
    spec:
      nodeSelector:
        kubernetes.io/hostname: chemavx-k8
      containers:
        - name: ollama
          image: ollama/ollama:0.20.7
          command: ["/usr/bin/bash", "-c"]
          args:
            - |
              ollama serve &
              SERVE_PID=$!
              echo "Waiting for ollama to start..."
              until ollama list >/dev/null 2>&1; do
                sleep 2
              done
              echo "Pulling llama3.1:8b..."
              ollama pull qwen2.5:3b
              echo "Model ready."
              wait $SERVE_PID
          ports:
            - name: http
              containerPort: 11434
              protocol: TCP
          env:
            - name: OLLAMA_MODELS
              value: /root/.ollama/models
            - name: OLLAMA_NUM_CTX
              value: "8192"
            - name: OLLAMA_KEEP_ALIVE
              value: "-1"
          resources:
            requests:
              memory: 4Gi
              cpu: "500m"
            limits:
              memory: 8Gi
          volumeMounts:
            - name: ollama-data
              mountPath: /root/.ollama
          livenessProbe:
            httpGet:
              path: /api/tags
              port: 11434
            initialDelaySeconds: 60
            periodSeconds: 30
            failureThreshold: 5
          readinessProbe:
            httpGet:
              path: /api/tags
              port: 11434
            initialDelaySeconds: 30
            periodSeconds: 10
            failureThreshold: 15
      volumes:
        - name: ollama-data
          persistentVolumeClaim:
            claimName: ollama-models

---
apiVersion: v1
kind: Service
metadata:
  name: ollama
  namespace: ollama
  labels:
    app: ollama
spec:
  type: ClusterIP
  selector:
    app: ollama
  ports:
    - name: http
      port: 11434
      targetPort: 11434
      protocol: TCP

---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: ollama
  namespace: ollama
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-prod
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
spec:
  ingressClassName: traefik
  rules:
    - host: ollama.chemavx.xyz
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: ollama
                port:
                  number: 11434
  tls:
    - hosts:
        - ollama.chemavx.xyz
      secretName: ollama-tls