First draft of openclaw.
This commit is contained in:
parent
f2f440462e
commit
77d02481be
19
README.md
19
README.md
|
|
@ -19,6 +19,25 @@ Infrapuzzle is the newly restructured way of implementing my private infrastruct
|
||||||
|
|
||||||
[Documentation in subfolder](./k8s/README.md). The services themselfes.
|
[Documentation in subfolder](./k8s/README.md). The services themselfes.
|
||||||
|
|
||||||
|
## AI & Agents
|
||||||
|
|
||||||
|
The cluster hosts local AI capabilities and agents.
|
||||||
|
|
||||||
|
### LLM (Ollama)
|
||||||
|
Hosts a local LLM (Llama 3 8B) for inference.
|
||||||
|
* **Deploy**: `kubectl apply -f k8s/llm/ollama.yaml`
|
||||||
|
* **Verification**: Check pods in `llm` namespace.
|
||||||
|
|
||||||
|
### OpenClaw
|
||||||
|
An autonomous AI agent platform.
|
||||||
|
1. **Create Namespace**: `kubectl apply -f k8s/openclaw/namespace.yaml`
|
||||||
|
2. **Configure Secrets**:
|
||||||
|
* Edit `k8s/openclaw/openclaw.secret.yaml`.
|
||||||
|
* Replace `change-me` with your Gemini API Key.
|
||||||
|
* **Encrypt**: Ensure the file is encrypted with `git crypt` before committing!
|
||||||
|
3. **Deploy**: `kubectl apply -f k8s/openclaw/openclaw.secret.yaml`
|
||||||
|
4. **Access**: `https://openclaw.haumdaucher.de`
|
||||||
|
|
||||||
## Links used
|
## Links used
|
||||||
|
|
||||||
* [ingress via host network](https://kubernetes.github.io/ingress-nginx/deploy/baremetal/#via-the-host-network)
|
* [ingress via host network](https://kubernetes.github.io/ingress-nginx/deploy/baremetal/#via-the-host-network)
|
||||||
|
|
|
||||||
|
|
@ -1,135 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Namespace
|
|
||||||
metadata:
|
|
||||||
name: llm
|
|
||||||
|
|
||||||
---
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: llama-cpp-server
|
|
||||||
namespace: llm
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: llama-cpp-server
|
|
||||||
strategy:
|
|
||||||
type: Recreate
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: llama-cpp-server
|
|
||||||
spec:
|
|
||||||
initContainers:
|
|
||||||
- name: download-model
|
|
||||||
image: curlimages/curl
|
|
||||||
command:
|
|
||||||
- /bin/sh
|
|
||||||
- -c
|
|
||||||
- |
|
|
||||||
MODEL_URL="https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q8_0.gguf?download=true"
|
|
||||||
MODEL_FILE="/models/Meta-Llama-3-8B-Instruct.Q8_0.gguf"
|
|
||||||
# Purge everything except the desired model file
|
|
||||||
find /models -type f ! -name "$(basename $MODEL_FILE)" -delete
|
|
||||||
# Check if the model file does not exist and then download it
|
|
||||||
if [ ! -f $MODEL_FILE ]; then
|
|
||||||
curl -L -o $MODEL_FILE $MODEL_URL
|
|
||||||
fi
|
|
||||||
volumeMounts:
|
|
||||||
- name: model-storage
|
|
||||||
mountPath: /models
|
|
||||||
containers:
|
|
||||||
- name: llama-cpp-server
|
|
||||||
image: ghcr.io/ggerganov/llama.cpp:server
|
|
||||||
command:
|
|
||||||
- /server
|
|
||||||
- -m
|
|
||||||
- "/models/Meta-Llama-3-8B-Instruct.Q8_0.gguf"
|
|
||||||
- --port
|
|
||||||
- "8000"
|
|
||||||
- --host
|
|
||||||
- "0.0.0.0"
|
|
||||||
- -n
|
|
||||||
- "512"
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
memory: "18Gi"
|
|
||||||
cpu: 0.1
|
|
||||||
volumeMounts:
|
|
||||||
- name: model-storage
|
|
||||||
mountPath: /models
|
|
||||||
volumes:
|
|
||||||
- name: model-storage
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: llama-model-pvc
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
apiVersion: v1
|
|
||||||
kind: PersistentVolumeClaim
|
|
||||||
metadata:
|
|
||||||
name: llama-model-pvc
|
|
||||||
namespace: llm
|
|
||||||
spec:
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: 20Gi
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: llama-server-service
|
|
||||||
namespace: llm
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
selector:
|
|
||||||
app: llama-cpp-server
|
|
||||||
ports:
|
|
||||||
- protocol: TCP
|
|
||||||
port: 8000
|
|
||||||
targetPort: 8000
|
|
||||||
---
|
|
||||||
apiVersion: networking.k8s.io/v1
|
|
||||||
kind: Ingress
|
|
||||||
metadata:
|
|
||||||
name: llama-server-service
|
|
||||||
namespace: llm
|
|
||||||
annotations:
|
|
||||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
|
||||||
nginx.ingress.kubernetes.io/auth-realm: Authentication Required - llama webui
|
|
||||||
nginx.ingress.kubernetes.io/auth-secret: llama-auth
|
|
||||||
nginx.ingress.kubernetes.io/auth-type: basic
|
|
||||||
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
|
||||||
kubernetes.io/ingress.class: nginx
|
|
||||||
spec:
|
|
||||||
ingressClassName: nginx
|
|
||||||
tls:
|
|
||||||
- hosts:
|
|
||||||
- "llama.moritzgraf.de"
|
|
||||||
secretName: llama-moritzgraf-de
|
|
||||||
rules:
|
|
||||||
- host: llama.moritzgraf.de
|
|
||||||
http:
|
|
||||||
paths:
|
|
||||||
- backend:
|
|
||||||
service:
|
|
||||||
name: llama-server-service
|
|
||||||
port:
|
|
||||||
number: 8000
|
|
||||||
path: /
|
|
||||||
pathType: Prefix
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
data:
|
|
||||||
# fabian:stinkt
|
|
||||||
# $htpasswd -c auth fabian
|
|
||||||
# -> Creates file auth with creds, does not work in git repo. unkn why.
|
|
||||||
auth: ZmFiaWFuOiRhcHIxJHRTV3YzU3hOJHJPZEJ5WXhYdG4vbVJtSzhtaENWZy4K
|
|
||||||
kind: Secret
|
|
||||||
metadata:
|
|
||||||
name: llama-auth
|
|
||||||
namespace: llm
|
|
||||||
type: Opaque
|
|
||||||
|
|
@ -0,0 +1,101 @@
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: ollama
|
||||||
|
namespace: llm
|
||||||
|
labels:
|
||||||
|
app: ollama
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: ollama
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: ollama
|
||||||
|
spec:
|
||||||
|
initContainers:
|
||||||
|
- name: pull-model
|
||||||
|
image: curlimages/curl
|
||||||
|
command: ["/bin/sh", "-c"]
|
||||||
|
args:
|
||||||
|
- |
|
||||||
|
echo "Waiting for Ollama service..."
|
||||||
|
# Simple wait loop (naive check, better to use readiness probe/postStart but init runs before app)
|
||||||
|
# Actually, init container runs BEFORE the main container, so it can't interact with the main container's localhost.
|
||||||
|
# We need to perform the model pull *after* Ollama starts.
|
||||||
|
# Changing strategy: Use a postStart hook or sidecar.
|
||||||
|
# Or simpler: Just let it start, and rely on user/execution time pull, or use an entrypoint script wrapper in main container.
|
||||||
|
# Best approach for k8s simplicity: Use a command wrapper.
|
||||||
|
echo "Init container cannot pull because main container is not up. Skipping pre-pull in init."
|
||||||
|
echo "Model pull will require manual trigger or standard entrypoint behavior."
|
||||||
|
# To automate: We can run a sidecar that waits for port 11434 and then pulls.
|
||||||
|
containers:
|
||||||
|
- name: ollama
|
||||||
|
image: ollama/ollama:latest
|
||||||
|
env:
|
||||||
|
- name: OLLAMA_KEEP_ALIVE
|
||||||
|
value: "-1"
|
||||||
|
- name: OLLAMA_HOST
|
||||||
|
value: "0.0.0.0"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "8Gi"
|
||||||
|
cpu: "2"
|
||||||
|
limits:
|
||||||
|
memory: "12Gi"
|
||||||
|
cpu: "4"
|
||||||
|
ports:
|
||||||
|
- containerPort: 11434
|
||||||
|
name: http
|
||||||
|
volumeMounts:
|
||||||
|
- name: ollama-storage
|
||||||
|
mountPath: /root/.ollama
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /api/health
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 60
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /api/health
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 5
|
||||||
|
lifecycle:
|
||||||
|
postStart:
|
||||||
|
exec:
|
||||||
|
command: ["/bin/sh", "-c", "sleep 10; ollama pull llama3.1:8b-instruct-q8_0"]
|
||||||
|
volumes:
|
||||||
|
- name: ollama-storage
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: ollama-storage
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: ollama
|
||||||
|
namespace: llm
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: ollama
|
||||||
|
ports:
|
||||||
|
- port: 11434
|
||||||
|
targetPort: 11434
|
||||||
|
protocol: TCP
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: ollama-storage
|
||||||
|
namespace: llm
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: openebs-hostpath
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 50Gi
|
||||||
|
|
@ -0,0 +1,22 @@
|
||||||
|
# k8s/openclaw/AGENTS.md
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> This directory contains the deployment configuration for **OpenClaw**, an open-source AI agent platform.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
* **Namespace**: `openclaw`
|
||||||
|
* **Workload**: `openclaw` (Deployment)
|
||||||
|
* **Dependencies**:
|
||||||
|
* **LLM**: Connects to `ollama` in `llm` namespace.
|
||||||
|
* **Secrets**: Requires `GEMINI_API_KEY`.
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
1. **Apply Namespace**: `kubectl apply -f namespace.yaml`
|
||||||
|
2. **Secrets**:
|
||||||
|
* Edit `openclaw.secret.yaml` to set `api-key`.
|
||||||
|
* Ensure `openclaw.secret.yaml` is encrypted with `git-crypt`.
|
||||||
|
3. **Apply Workload**: `kubectl apply -f openclaw.secret.yaml`
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
* **LLM Provider**: `ollama`
|
||||||
|
* **Ollama URL**: `http://ollama.llm.svc.cluster.local:11434`
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: openclaw
|
||||||
Binary file not shown.
Loading…
Reference in New Issue