Compare commits
No commits in common. "0865c1f80f4ee203961ed5e680db518aeb7c88f9" and "f2f440462e2914977718db1fb0271f13b24cd539" have entirely different histories.
0865c1f80f
...
f2f440462e
|
|
@ -19,8 +19,6 @@ Infrapuzzle is the newly restructured way of implementing my private infrastruct
|
|||
|
||||
[Documentation in subfolder](./k8s/README.md). The services themselfes.
|
||||
|
||||
|
||||
|
||||
## Links used
|
||||
|
||||
* [ingress via host network](https://kubernetes.github.io/ingress-nginx/deploy/baremetal/#via-the-host-network)
|
||||
|
|
|
|||
|
|
@ -767,20 +767,3 @@ oci://8gears.container-registry.com/library/n8n \
|
|||
--namespace n8n-fabi --values n8n-fabi/n8n-fabi.secret.yml --version 1.0.15
|
||||
```
|
||||
|
||||
|
||||
## LLM (Ollama)
|
||||
Hosts a local LLM (Llama 3 8B) for inference.
|
||||
* **Deploy**: `kubectl apply -f llm/ollama.yaml`
|
||||
* **Verification**: Check pods in `llm` namespace.
|
||||
|
||||
## OpenClaw
|
||||
An autonomous AI agent platform.
|
||||
1. **Create Namespace**: `kubectl apply -f openclaw/namespace.yaml`
|
||||
2. **Configure Secrets**:
|
||||
* Edit `openclaw/openclaw.secret.yaml`.
|
||||
* **Gemini**: Replace `change-me` with your Gemini API Key.
|
||||
* **Telegram**: Replace `telegram-bot-token` with your Bot Token.
|
||||
* **Gateway**: Token is pre-filled (randomly generated). Change if desired.
|
||||
* **Encrypt**: Ensure the file is encrypted with `git crypt` before committing!
|
||||
3. **Deploy**: `kubectl apply -f openclaw/openclaw.secret.yaml`
|
||||
4. **Access**: `https://openclaw.haumdaucher.de`
|
||||
|
|
|
|||
|
|
@ -0,0 +1,135 @@
|
|||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: llm
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llama-cpp-server
|
||||
namespace: llm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llama-cpp-server
|
||||
strategy:
|
||||
type: Recreate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: llama-cpp-server
|
||||
spec:
|
||||
initContainers:
|
||||
- name: download-model
|
||||
image: curlimages/curl
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
MODEL_URL="https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q8_0.gguf?download=true"
|
||||
MODEL_FILE="/models/Meta-Llama-3-8B-Instruct.Q8_0.gguf"
|
||||
# Purge everything except the desired model file
|
||||
find /models -type f ! -name "$(basename $MODEL_FILE)" -delete
|
||||
# Check if the model file does not exist and then download it
|
||||
if [ ! -f $MODEL_FILE ]; then
|
||||
curl -L -o $MODEL_FILE $MODEL_URL
|
||||
fi
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
containers:
|
||||
- name: llama-cpp-server
|
||||
image: ghcr.io/ggerganov/llama.cpp:server
|
||||
command:
|
||||
- /server
|
||||
- -m
|
||||
- "/models/Meta-Llama-3-8B-Instruct.Q8_0.gguf"
|
||||
- --port
|
||||
- "8000"
|
||||
- --host
|
||||
- "0.0.0.0"
|
||||
- -n
|
||||
- "512"
|
||||
resources:
|
||||
requests:
|
||||
memory: "18Gi"
|
||||
cpu: 0.1
|
||||
volumeMounts:
|
||||
- name: model-storage
|
||||
mountPath: /models
|
||||
volumes:
|
||||
- name: model-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: llama-model-pvc
|
||||
|
||||
---
|
||||
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: llama-model-pvc
|
||||
namespace: llm
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 20Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llama-server-service
|
||||
namespace: llm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llama-cpp-server
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: llama-server-service
|
||||
namespace: llm
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/auth-realm: Authentication Required - llama webui
|
||||
nginx.ingress.kubernetes.io/auth-secret: llama-auth
|
||||
nginx.ingress.kubernetes.io/auth-type: basic
|
||||
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
||||
kubernetes.io/ingress.class: nginx
|
||||
spec:
|
||||
ingressClassName: nginx
|
||||
tls:
|
||||
- hosts:
|
||||
- "llama.moritzgraf.de"
|
||||
secretName: llama-moritzgraf-de
|
||||
rules:
|
||||
- host: llama.moritzgraf.de
|
||||
http:
|
||||
paths:
|
||||
- backend:
|
||||
service:
|
||||
name: llama-server-service
|
||||
port:
|
||||
number: 8000
|
||||
path: /
|
||||
pathType: Prefix
|
||||
---
|
||||
apiVersion: v1
|
||||
data:
|
||||
# fabian:stinkt
|
||||
# $htpasswd -c auth fabian
|
||||
# -> Creates file auth with creds, does not work in git repo. unkn why.
|
||||
auth: ZmFiaWFuOiRhcHIxJHRTV3YzU3hOJHJPZEJ5WXhYdG4vbVJtSzhtaENWZy4K
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: llama-auth
|
||||
namespace: llm
|
||||
type: Opaque
|
||||
|
|
@ -1,109 +0,0 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: llm
|
||||
labels:
|
||||
app: ollama
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ollama
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ollama
|
||||
spec:
|
||||
initContainers:
|
||||
- name: pull-model
|
||||
image: curlimages/curl
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
echo "Waiting for Ollama service..."
|
||||
# Simple wait loop (naive check, better to use readiness probe/postStart but init runs before app)
|
||||
# Actually, init container runs BEFORE the main container, so it can't interact with the main container's localhost.
|
||||
# We need to perform the model pull *after* Ollama starts.
|
||||
# Changing strategy: Use a postStart hook or sidecar.
|
||||
# Or simpler: Just let it start, and rely on user/execution time pull, or use an entrypoint script wrapper in main container.
|
||||
# Best approach for k8s simplicity: Use a command wrapper.
|
||||
echo "Init container cannot pull because main container is not up. Skipping pre-pull in init."
|
||||
echo "Model pull will require manual trigger or standard entrypoint behavior."
|
||||
# To automate: We can run a sidecar that waits for port 11434 and then pulls.
|
||||
containers:
|
||||
- name: ollama
|
||||
image: ollama/ollama:latest
|
||||
env:
|
||||
- name: OLLAMA_KEEP_ALIVE
|
||||
value: "-1"
|
||||
- name: OLLAMA_HOST
|
||||
value: "0.0.0.0"
|
||||
resources:
|
||||
requests:
|
||||
memory: "8Gi"
|
||||
cpu: "2"
|
||||
limits:
|
||||
memory: "12Gi"
|
||||
cpu: "4"
|
||||
ports:
|
||||
- containerPort: 11434
|
||||
name: http
|
||||
volumeMounts:
|
||||
- name: ollama-storage
|
||||
mountPath: /root/.ollama
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /api/health
|
||||
port: http
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /api/health
|
||||
port: http
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 5
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
# Start Ollama in background
|
||||
/bin/ollama serve &
|
||||
PID=$!
|
||||
echo "Waiting for Ollama..."
|
||||
sleep 10
|
||||
echo "Pulling model..."
|
||||
ollama pull llama3.1:8b-instruct-q8_0
|
||||
echo "Model pulled. Keeping container alive."
|
||||
wait $PID
|
||||
volumes:
|
||||
- name: ollama-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: ollama-storage
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: llm
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: ollama
|
||||
ports:
|
||||
- port: 11434
|
||||
targetPort: 11434
|
||||
protocol: TCP
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: ollama-storage
|
||||
namespace: llm
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
storageClassName: openebs-hostpath
|
||||
resources:
|
||||
requests:
|
||||
storage: 50Gi
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
# k8s/openclaw/AGENTS.md
|
||||
|
||||
> [!NOTE]
|
||||
> This directory contains the deployment configuration for **OpenClaw**, an open-source AI agent platform.
|
||||
|
||||
## Overview
|
||||
* **Namespace**: `openclaw`
|
||||
* **Workload**: `openclaw` (Deployment)
|
||||
* **Dependencies**:
|
||||
* **LLM**: Connects to `ollama` in `llm` namespace.
|
||||
* **Secrets**: Requires `GEMINI_API_KEY`.
|
||||
|
||||
## Deployment
|
||||
1. **Apply Namespace**: `kubectl apply -f namespace.yaml`
|
||||
2. **Secrets**:
|
||||
* Edit `openclaw.secret.yaml` to set `api-key`.
|
||||
* Ensure `openclaw.secret.yaml` is encrypted with `git-crypt`.
|
||||
3. **Apply Workload**: `kubectl apply -f openclaw.secret.yaml`
|
||||
|
||||
## Configuration
|
||||
* **LLM Provider**: `ollama`
|
||||
* **Ollama URL**: `http://ollama.llm.svc.cluster.local:11434`
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: openclaw
|
||||
Binary file not shown.
Loading…
Reference in New Issue