Ollama and Tabby, and add https for ollama

2025-01-31 17:03:48 -05:00 · 2025-01-31 17:03:48 -05:00 · 82ab5dd0a1
commit 82ab5dd0a1
parent 6d6580c252
11 changed files with 344 additions and 1 deletions
--- a/namespaces/ai/namespace.yaml
+++ b/namespaces/ai/namespace.yaml
@ -0,0 +1,7 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  labels:
+    homelab-access: "true"
+  name: ai
--- a/namespaces/ai/ollama/config.toml
+++ b/namespaces/ai/ollama/config.toml
@ -0,0 +1,17 @@
+[model.completion.http]
+kind = "ollama/completion"
+model_name = "deepseek-r1:8b"
+api_endpoint = "http://ollama:11434"
+# prompt_template = "<PRE> {prefix} <SUF>{suffix} <MID>"  # Example prompt template for the CodeLlama model series.
+
+# Chat model
+[model.chat.http]
+kind = "openai/chat"
+model_name = "deepseek-r1:8b"
+api_endpoint = "http://ollama:11434/v1"
+
+# Embedding model
+[model.embedding.http]
+kind = "ollama/embedding"
+model_name = "ordis/jina-embeddings-v2-base-code"
+api_endpoint = "http://ollama:11434"
--- a/namespaces/ai/ollama/kustomization.yaml
+++ b/namespaces/ai/ollama/kustomization.yaml
@ -0,0 +1,102 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+transformers:
+  - |-
+    apiVersion: builtin
+    kind: NamespaceTransformer
+    metadata:
+      name: notImportantHere
+      namespace: ai
+    unsetOnly: true
+
+namePrefix: ollama-
+resources:
+  - ../../../kustomize/deployment/
+
+replacements:
+  - source:
+      kind: Service
+      name: svc
+    targets:
+      - select:
+          kind: HTTPRoute
+        options:
+          create: true
+        fieldPaths:
+          - spec.rules.0.backendRefs.0.name
+  - source:
+      kind: Deployment
+      name: app
+      fieldPath: metadata.labels.[app.kubernetes.io/appName]
+    targets:
+      - select:
+          kind: HTTPRoute
+        options:
+          create: true
+          delimiter: "."
+          index: 0
+        fieldPaths:
+          - spec.hostnames.0
+      - select:
+          kind: InfisicalSecret
+        options:
+          delimiter: "-"
+          index: 0
+        fieldPaths:
+          - spec.managedSecretReference.secretName
+      - select:
+          kind: InfisicalSecret
+        options:
+          delimiter: "/"
+          index: 2
+        fieldPaths:
+          - spec.authentication.universalAuth.secretsScope.secretsPath
+      - select:
+          kind: Service
+        fieldPaths:
+          - spec.ports.0.name
+          - spec.ports.0.targetPort
+  - source:
+      kind: Deployment
+      name: app
+      fieldPath: metadata.labels.[app.kubernetes.io/appNamespace]
+    targets:
+      - select:
+          kind: InfisicalSecret
+        fieldPaths:
+          - spec.managedSecretReference.secretNamespace
+  - source:
+      kind: Deployment
+      name: app
+      fieldPath: spec.template.spec.containers.0.ports.0.containerPort
+    targets:
+      - select:
+          kind: Service
+        fieldPaths:
+          - spec.ports.0.port
+
+patches:
+  - path: patches/deployment.yaml
+    target:
+      kind: Deployment
+      name: app
+  - path: patches/pvc.yaml
+    target:
+      kind: PersistentVolumeClaim
+      name: pvc
+  - path: patches/httproute.yaml
+    target:
+      kind: HTTPRoute
+      name: http
+  - path: patches/httpsroute.yaml
+    target:
+      kind: HTTPRoute
+      name: https
+
+labels:
+  - includeSelectors: true
+    pairs:
+      app.kubernetes.io/appName: ollama
+  - pairs:
+      app.kubernetes.io/appNamespace: ai
--- a/namespaces/ai/ollama/patches/deployment.yaml
+++ b/namespaces/ai/ollama/patches/deployment.yaml
@ -0,0 +1,39 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: app
+spec:
+  template:
+    spec:
+      runtimeClassName: nvidia
+      tolerations:
+        - key: "gpu"
+          operator: "Equal"
+          value: "true"
+          effect: "NoSchedule"
+      containers:
+        - name: ollama
+          image: ollama/ollama:latest
+          resources:
+            limits:
+              nvidia.com/gpu: 1
+          ports:
+            - name: ollama
+              containerPort: 11434
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /root/.ollama
+              name: ollama-data
+          livenessProbe:
+            httpGet:
+              path: /
+              port: ollama
+          readinessProbe:
+            httpGet:
+              path: /
+              port: ollama
+
+      volumes:
+        - name: ollama-data
+          persistentVolumeClaim:
+            claimName: ollama-pvc
--- a/namespaces/ai/ollama/patches/httproute.yaml
+++ b/namespaces/ai/ollama/patches/httproute.yaml
@ -0,0 +1,13 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: http
+spec:
+  parentRefs:
+    - name: homelab-gateway
+      sectionName: ollama
+      namespace: homelab
+  rules:
+    - backendRefs:
+        - name: ollama
+          port: 11434
--- a/namespaces/ai/ollama/patches/httpsroute.yaml
+++ b/namespaces/ai/ollama/patches/httpsroute.yaml
@ -0,0 +1,5 @@
+$patch: delete
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: https
--- a/namespaces/ai/ollama/patches/pvc.yaml
+++ b/namespaces/ai/ollama/patches/pvc.yaml
@ -0,0 +1,8 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: pvc
+spec:
+  resources:
+    requests:
+      storage: 100Gi
--- a/namespaces/ai/tabby/config.toml
+++ b/namespaces/ai/tabby/config.toml
@ -0,0 +1,17 @@
+[model.completion.http]
+kind = "ollama/completion"
+model_name = "deepseek-r1:8b"
+api_endpoint = "http://ollama:11434"
+# prompt_template = "<PRE> {prefix} <SUF>{suffix} <MID>"  # Example prompt template for the CodeLlama model series.
+
+# Chat model
+[model.chat.http]
+kind = "openai/chat"
+model_name = "deepseek-r1:8b"
+api_endpoint = "http://ollama:11434/v1"
+
+# Embedding model
+[model.embedding.http]
+kind = "ollama/embedding"
+model_name = "ordis/jina-embeddings-v2-base-code"
+api_endpoint = "http://ollama:11434"
--- a/namespaces/ai/tabby/kustomization.yaml
+++ b/namespaces/ai/tabby/kustomization.yaml
@ -0,0 +1,86 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+transformers:
+  - |-
+    apiVersion: builtin
+    kind: NamespaceTransformer
+    metadata:
+      name: notImportantHere
+      namespace: ai
+    unsetOnly: true
+
+namePrefix: tabby-
+resources:
+  - ../../../kustomize/deployment/
+
+replacements:
+  - source:
+      kind: Service
+      name: svc
+    targets:
+      - select:
+          kind: HTTPRoute
+        options:
+          create: true
+        fieldPaths:
+          - spec.rules.0.backendRefs.0.name
+  - source:
+      kind: Deployment
+      name: app
+      fieldPath: metadata.labels.[app.kubernetes.io/appName]
+    targets:
+      - select:
+          kind: HTTPRoute
+        options:
+          create: true
+          delimiter: "."
+          index: 0
+        fieldPaths:
+          - spec.hostnames.0
+      - select:
+          kind: InfisicalSecret
+        options:
+          delimiter: "-"
+          index: 0
+        fieldPaths:
+          - spec.managedSecretReference.secretName
+      - select:
+          kind: InfisicalSecret
+        options:
+          delimiter: "/"
+          index: 2
+        fieldPaths:
+          - spec.authentication.universalAuth.secretsScope.secretsPath
+      - select:
+          kind: Service
+        fieldPaths:
+          - spec.ports.0.name
+          - spec.ports.0.targetPort
+  - source:
+      kind: Deployment
+      name: app
+      fieldPath: metadata.labels.[app.kubernetes.io/appNamespace]
+    targets:
+      - select:
+          kind: InfisicalSecret
+        fieldPaths:
+          - spec.managedSecretReference.secretNamespace
+
+patches:
+  - path: patches/deployment.yaml
+    target:
+      kind: Deployment
+      name: app
+
+labels:
+  - includeSelectors: true
+    pairs:
+      app.kubernetes.io/appName: tabby
+  - pairs:
+      app.kubernetes.io/appNamespace: ai
+
+configMapGenerator:
+  - name: config
+    files:
+      - config.toml
--- a/namespaces/ai/tabby/patches/deployment.yaml
+++ b/namespaces/ai/tabby/patches/deployment.yaml
@ -0,0 +1,44 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: app
+spec:
+  template:
+    spec:
+      tolerations:
+        - key: "gpu"
+          operator: "Equal"
+          value: "true"
+          effect: "NoSchedule"
+      containers:
+        - name: tabby
+          image: tabbyml/tabby:latest
+          ports:
+            - name: tabby
+              containerPort: 8080
+              protocol: TCP
+          args:
+            - serve
+            - --port
+            - "8080"
+          volumeMounts:
+            - mountPath: /data
+              name: tabby-data # Changed to match volumeClaimTemplate
+            - name: config-volume
+              mountPath: /data/config.toml
+              subPath: config.toml
+          livenessProbe:
+            httpGet:
+              path: /
+              port: tabby
+          readinessProbe:
+            httpGet:
+              path: /
+              port: tabby
+      volumes:
+        - name: config-volume
+          configMap:
+            name: tabby-config
+        - name: tabby-data
+          persistentVolumeClaim:
+            claimName: tabby-pvc
--- a/namespaces/homelab/gateway.yaml
+++ b/namespaces/homelab/gateway.yaml
@ -56,4 +56,9 @@ spec:
              homelab-access: "true"
      name: ollama
      port: 11434
-      protocol: HTTP
+      protocol: HTTPS
+      tls:
+        mode: Terminate
+        certificateRefs:
+          - kind: Secret
+            name: wildcard-leechpepin-tls