136 lines
3.1 KiB
YAML
136 lines
3.1 KiB
YAML
apiVersion: v1
|
|
kind: Namespace
|
|
metadata:
|
|
name: llm
|
|
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: llama-cpp-server
|
|
namespace: llm
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: llama-cpp-server
|
|
strategy:
|
|
type: Recreate
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: llama-cpp-server
|
|
spec:
|
|
initContainers:
|
|
- name: download-model
|
|
image: curlimages/curl
|
|
command:
|
|
- /bin/sh
|
|
- -c
|
|
- |
|
|
MODEL_URL="https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q8_0.gguf?download=true"
|
|
MODEL_FILE="/models/Meta-Llama-3-8B-Instruct.Q8_0.gguf"
|
|
# Purge everything except the desired model file
|
|
find /models -type f ! -name "$(basename $MODEL_FILE)" -delete
|
|
# Check if the model file does not exist and then download it
|
|
if [ ! -f $MODEL_FILE ]; then
|
|
curl -L -o $MODEL_FILE $MODEL_URL
|
|
fi
|
|
volumeMounts:
|
|
- name: model-storage
|
|
mountPath: /models
|
|
containers:
|
|
- name: llama-cpp-server
|
|
image: ghcr.io/ggerganov/llama.cpp:server
|
|
command:
|
|
- /server
|
|
- -m
|
|
- "/models/Meta-Llama-3-8B-Instruct.Q8_0.gguf"
|
|
- --port
|
|
- "8000"
|
|
- --host
|
|
- "0.0.0.0"
|
|
- -n
|
|
- "512"
|
|
resources:
|
|
requests:
|
|
memory: "18Gi"
|
|
cpu: 0.1
|
|
volumeMounts:
|
|
- name: model-storage
|
|
mountPath: /models
|
|
volumes:
|
|
- name: model-storage
|
|
persistentVolumeClaim:
|
|
claimName: llama-model-pvc
|
|
|
|
---
|
|
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: llama-model-pvc
|
|
namespace: llm
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
resources:
|
|
requests:
|
|
storage: 20Gi
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: llama-server-service
|
|
namespace: llm
|
|
spec:
|
|
type: ClusterIP
|
|
selector:
|
|
app: llama-cpp-server
|
|
ports:
|
|
- protocol: TCP
|
|
port: 8000
|
|
targetPort: 8000
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: llama-server-service
|
|
namespace: llm
|
|
annotations:
|
|
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
|
nginx.ingress.kubernetes.io/auth-realm: Authentication Required - llama webui
|
|
nginx.ingress.kubernetes.io/auth-secret: llama-auth
|
|
nginx.ingress.kubernetes.io/auth-type: basic
|
|
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
|
kubernetes.io/ingress.class: nginx
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls:
|
|
- hosts:
|
|
- "llama.moritzgraf.de"
|
|
secretName: llama-moritzgraf-de
|
|
rules:
|
|
- host: llama.moritzgraf.de
|
|
http:
|
|
paths:
|
|
- backend:
|
|
service:
|
|
name: llama-server-service
|
|
port:
|
|
number: 8000
|
|
path: /
|
|
pathType: Prefix
|
|
---
|
|
apiVersion: v1
|
|
data:
|
|
# fabian:stinkt
|
|
# $htpasswd -c auth fabian
|
|
# -> Creates file auth with creds, does not work in git repo. unkn why.
|
|
auth: ZmFiaWFuOiRhcHIxJHRTV3YzU3hOJHJPZEJ5WXhYdG4vbVJtSzhtaENWZy4K
|
|
kind: Secret
|
|
metadata:
|
|
name: llama-auth
|
|
namespace: llm
|
|
type: Opaque
|