apiVersion: apps/v1 kind: Deployment metadata: name: app spec: template: spec: runtimeClassName: nvidia tolerations: - key: "gpu" operator: "Equal" value: "true" effect: "NoSchedule" containers: - name: ollama image: ollama/ollama:0.6.4 imagePullPolicy: IfNotPresent resources: limits: nvidia.com/gpu: 1 ports: - name: ollama containerPort: 11434 protocol: TCP volumeMounts: - mountPath: /root/.ollama name: ollama-data livenessProbe: httpGet: path: / port: ollama readinessProbe: httpGet: path: / port: ollama volumes: - name: ollama-data persistentVolumeClaim: claimName: ollama-pvc