diff --git a/ollama/ollama.yaml b/ollama/ollama.yaml index 095dcc9..a875106 100644 --- a/ollama/ollama.yaml +++ b/ollama/ollama.yaml @@ -42,7 +42,7 @@ spec: kubernetes.io/hostname: chemavx-k8 containers: - name: ollama - image: ollama/ollama:latest + image: ollama/ollama:0.20.7 command: ["/usr/bin/bash", "-c"] args: - | @@ -53,8 +53,7 @@ spec: sleep 2 done echo "Pulling llama3.1:8b..." - ollama pull llama3.1:8b - ollama rm llama3.2:3b 2>/dev/null || true + ollama pull qwen2.5:3b echo "Model ready." wait $SERVE_PID ports: @@ -68,17 +67,12 @@ spec: value: "8192" - name: OLLAMA_KEEP_ALIVE value: "-1" - - name: NVIDIA_VISIBLE_DEVICES - value: all - - name: NVIDIA_DRIVER_CAPABILITIES - value: compute,utility resources: requests: - memory: 8Gi + memory: 4Gi cpu: "500m" limits: - memory: 20Gi - nvidia.com/gpu: "1" + memory: 8Gi volumeMounts: - name: ollama-data mountPath: /root/.ollama