apiVersion: v1 kind: Namespace metadata: name: llm --- apiVersion: apps/v1 kind: Deployment metadata: name: llama-cpp-server namespace: llm spec: replicas: 1 selector: matchLabels: app: llama-cpp-server strategy: type: Recreate template: metadata: labels: app: llama-cpp-server spec: initContainers: - name: download-model image: curlimages/curl command: - /bin/sh - -c - | MODEL_URL="https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q8_0.gguf?download=true" MODEL_FILE="/models/Meta-Llama-3-8B-Instruct.Q8_0.gguf" # Purge everything except the desired model file find /models -type f ! -name "$(basename $MODEL_FILE)" -delete # Check if the model file does not exist and then download it if [ ! -f $MODEL_FILE ]; then curl -L -o $MODEL_FILE $MODEL_URL fi volumeMounts: - name: model-storage mountPath: /models containers: - name: llama-cpp-server image: ghcr.io/ggerganov/llama.cpp:server command: - /server - -m - "/models/Meta-Llama-3-8B-Instruct.Q8_0.gguf" - --port - "8000" - --host - "0.0.0.0" - -n - "512" resources: requests: memory: "18Gi" cpu: 0.1 volumeMounts: - name: model-storage mountPath: /models volumes: - name: model-storage persistentVolumeClaim: claimName: llama-model-pvc --- apiVersion: v1 kind: PersistentVolumeClaim metadata: name: llama-model-pvc namespace: llm spec: accessModes: - ReadWriteOnce resources: requests: storage: 20Gi --- apiVersion: v1 kind: Service metadata: name: llama-server-service namespace: llm spec: type: ClusterIP selector: app: llama-cpp-server ports: - protocol: TCP port: 8000 targetPort: 8000 --- apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: llama-server-service namespace: llm annotations: nginx.ingress.kubernetes.io/force-ssl-redirect: "true" nginx.ingress.kubernetes.io/auth-realm: Authentication Required - llama webui nginx.ingress.kubernetes.io/auth-secret: llama-auth nginx.ingress.kubernetes.io/auth-type: basic cert-manager.io/cluster-issuer: "letsencrypt-prod" kubernetes.io/ingress.class: nginx spec: ingressClassName: nginx tls: - hosts: - "llama.moritzgraf.de" secretName: llama-moritzgraf-de rules: - host: llama.moritzgraf.de http: paths: - backend: service: name: llama-server-service port: number: 8000 path: / pathType: Prefix --- apiVersion: v1 data: # fabian:stinkt # $htpasswd -c auth fabian # -> Creates file auth with creds, does not work in git repo. unkn why. auth: ZmFiaWFuOiRhcHIxJHRTV3YzU3hOJHJPZEJ5WXhYdG4vbVJtSzhtaENWZy4K kind: Secret metadata: name: llama-auth namespace: llm type: Opaque