infrapuzzle/k8s/llm/llama_cpp_hosting.yaml

136 lines
3.1 KiB
YAML

apiVersion: v1
kind: Namespace
metadata:
name: llm
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: llama-cpp-server
namespace: llm
spec:
replicas: 1
selector:
matchLabels:
app: llama-cpp-server
strategy:
type: Recreate
template:
metadata:
labels:
app: llama-cpp-server
spec:
initContainers:
- name: download-model
image: curlimages/curl
command:
- /bin/sh
- -c
- |
MODEL_URL="https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q8_0.gguf?download=true"
MODEL_FILE="/models/Meta-Llama-3-8B-Instruct.Q8_0.gguf"
# Purge everything except the desired model file
find /models -type f ! -name "$(basename $MODEL_FILE)" -delete
# Check if the model file does not exist and then download it
if [ ! -f $MODEL_FILE ]; then
curl -L -o $MODEL_FILE $MODEL_URL
fi
volumeMounts:
- name: model-storage
mountPath: /models
containers:
- name: llama-cpp-server
image: ghcr.io/ggerganov/llama.cpp:server
command:
- /server
- -m
- "/models/Meta-Llama-3-8B-Instruct.Q8_0.gguf"
- --port
- "8000"
- --host
- "0.0.0.0"
- -n
- "512"
resources:
requests:
memory: "18Gi"
cpu: 0.1
volumeMounts:
- name: model-storage
mountPath: /models
volumes:
- name: model-storage
persistentVolumeClaim:
claimName: llama-model-pvc
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: llama-model-pvc
namespace: llm
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi
---
apiVersion: v1
kind: Service
metadata:
name: llama-server-service
namespace: llm
spec:
type: ClusterIP
selector:
app: llama-cpp-server
ports:
- protocol: TCP
port: 8000
targetPort: 8000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: llama-server-service
namespace: llm
annotations:
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
nginx.ingress.kubernetes.io/auth-realm: Authentication Required - llama webui
nginx.ingress.kubernetes.io/auth-secret: llama-auth
nginx.ingress.kubernetes.io/auth-type: basic
cert-manager.io/cluster-issuer: "letsencrypt-prod"
kubernetes.io/ingress.class: nginx
spec:
ingressClassName: nginx
tls:
- hosts:
- "llama.moritzgraf.de"
secretName: llama-moritzgraf-de
rules:
- host: llama.moritzgraf.de
http:
paths:
- backend:
service:
name: llama-server-service
port:
number: 8000
path: /
pathType: Prefix
---
apiVersion: v1
data:
# fabian:stinkt
# $htpasswd -c auth fabian
# -> Creates file auth with creds, does not work in git repo. unkn why.
auth: ZmFiaWFuOiRhcHIxJHRTV3YzU3hOJHJPZEJ5WXhYdG4vbVJtSzhtaENWZy4K
kind: Secret
metadata:
name: llama-auth
namespace: llm
type: Opaque