diff --git a/README.md b/README.md index 63cae4e..84c9607 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,25 @@ Infrapuzzle is the newly restructured way of implementing my private infrastruct [Documentation in subfolder](./k8s/README.md). The services themselfes. +## AI & Agents + +The cluster hosts local AI capabilities and agents. + +### LLM (Ollama) +Hosts a local LLM (Llama 3 8B) for inference. +* **Deploy**: `kubectl apply -f k8s/llm/ollama.yaml` +* **Verification**: Check pods in `llm` namespace. + +### OpenClaw +An autonomous AI agent platform. +1. **Create Namespace**: `kubectl apply -f k8s/openclaw/namespace.yaml` +2. **Configure Secrets**: + * Edit `k8s/openclaw/openclaw.secret.yaml`. + * Replace `change-me` with your Gemini API Key. + * **Encrypt**: Ensure the file is encrypted with `git crypt` before committing! +3. **Deploy**: `kubectl apply -f k8s/openclaw/openclaw.secret.yaml` +4. **Access**: `https://openclaw.haumdaucher.de` + ## Links used * [ingress via host network](https://kubernetes.github.io/ingress-nginx/deploy/baremetal/#via-the-host-network) diff --git a/k8s/llm/llama_cpp_hosting.yaml b/k8s/llm/llama_cpp_hosting.yaml deleted file mode 100644 index 8d4f4af..0000000 --- a/k8s/llm/llama_cpp_hosting.yaml +++ /dev/null @@ -1,135 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: llm - ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llama-cpp-server - namespace: llm -spec: - replicas: 1 - selector: - matchLabels: - app: llama-cpp-server - strategy: - type: Recreate - template: - metadata: - labels: - app: llama-cpp-server - spec: - initContainers: - - name: download-model - image: curlimages/curl - command: - - /bin/sh - - -c - - | - MODEL_URL="https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q8_0.gguf?download=true" - MODEL_FILE="/models/Meta-Llama-3-8B-Instruct.Q8_0.gguf" - # Purge everything except the desired model file - find /models -type f ! -name "$(basename $MODEL_FILE)" -delete - # Check if the model file does not exist and then download it - if [ ! -f $MODEL_FILE ]; then - curl -L -o $MODEL_FILE $MODEL_URL - fi - volumeMounts: - - name: model-storage - mountPath: /models - containers: - - name: llama-cpp-server - image: ghcr.io/ggerganov/llama.cpp:server - command: - - /server - - -m - - "/models/Meta-Llama-3-8B-Instruct.Q8_0.gguf" - - --port - - "8000" - - --host - - "0.0.0.0" - - -n - - "512" - resources: - requests: - memory: "18Gi" - cpu: 0.1 - volumeMounts: - - name: model-storage - mountPath: /models - volumes: - - name: model-storage - persistentVolumeClaim: - claimName: llama-model-pvc - ---- - -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: llama-model-pvc - namespace: llm -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 20Gi ---- -apiVersion: v1 -kind: Service -metadata: - name: llama-server-service - namespace: llm -spec: - type: ClusterIP - selector: - app: llama-cpp-server - ports: - - protocol: TCP - port: 8000 - targetPort: 8000 ---- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: llama-server-service - namespace: llm - annotations: - nginx.ingress.kubernetes.io/force-ssl-redirect: "true" - nginx.ingress.kubernetes.io/auth-realm: Authentication Required - llama webui - nginx.ingress.kubernetes.io/auth-secret: llama-auth - nginx.ingress.kubernetes.io/auth-type: basic - cert-manager.io/cluster-issuer: "letsencrypt-prod" - kubernetes.io/ingress.class: nginx -spec: - ingressClassName: nginx - tls: - - hosts: - - "llama.moritzgraf.de" - secretName: llama-moritzgraf-de - rules: - - host: llama.moritzgraf.de - http: - paths: - - backend: - service: - name: llama-server-service - port: - number: 8000 - path: / - pathType: Prefix ---- -apiVersion: v1 -data: - # fabian:stinkt - # $htpasswd -c auth fabian - # -> Creates file auth with creds, does not work in git repo. unkn why. - auth: ZmFiaWFuOiRhcHIxJHRTV3YzU3hOJHJPZEJ5WXhYdG4vbVJtSzhtaENWZy4K -kind: Secret -metadata: - name: llama-auth - namespace: llm -type: Opaque diff --git a/k8s/llm/ollama.yaml b/k8s/llm/ollama.yaml new file mode 100644 index 0000000..00f3fd5 --- /dev/null +++ b/k8s/llm/ollama.yaml @@ -0,0 +1,101 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: llm + labels: + app: ollama +spec: + replicas: 1 + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + initContainers: + - name: pull-model + image: curlimages/curl + command: ["/bin/sh", "-c"] + args: + - | + echo "Waiting for Ollama service..." + # Simple wait loop (naive check, better to use readiness probe/postStart but init runs before app) + # Actually, init container runs BEFORE the main container, so it can't interact with the main container's localhost. + # We need to perform the model pull *after* Ollama starts. + # Changing strategy: Use a postStart hook or sidecar. + # Or simpler: Just let it start, and rely on user/execution time pull, or use an entrypoint script wrapper in main container. + # Best approach for k8s simplicity: Use a command wrapper. + echo "Init container cannot pull because main container is not up. Skipping pre-pull in init." + echo "Model pull will require manual trigger or standard entrypoint behavior." + # To automate: We can run a sidecar that waits for port 11434 and then pulls. + containers: + - name: ollama + image: ollama/ollama:latest + env: + - name: OLLAMA_KEEP_ALIVE + value: "-1" + - name: OLLAMA_HOST + value: "0.0.0.0" + resources: + requests: + memory: "8Gi" + cpu: "2" + limits: + memory: "12Gi" + cpu: "4" + ports: + - containerPort: 11434 + name: http + volumeMounts: + - name: ollama-storage + mountPath: /root/.ollama + livenessProbe: + httpGet: + path: /api/health + port: http + initialDelaySeconds: 60 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /api/health + port: http + initialDelaySeconds: 30 + periodSeconds: 5 + lifecycle: + postStart: + exec: + command: ["/bin/sh", "-c", "sleep 10; ollama pull llama3.1:8b-instruct-q8_0"] + volumes: + - name: ollama-storage + persistentVolumeClaim: + claimName: ollama-storage +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: llm +spec: + type: ClusterIP + selector: + app: ollama + ports: + - port: 11434 + targetPort: 11434 + protocol: TCP +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ollama-storage + namespace: llm +spec: + accessModes: + - ReadWriteOnce + storageClassName: openebs-hostpath + resources: + requests: + storage: 50Gi diff --git a/k8s/openclaw/AGENTS.md b/k8s/openclaw/AGENTS.md new file mode 100644 index 0000000..16f3ef3 --- /dev/null +++ b/k8s/openclaw/AGENTS.md @@ -0,0 +1,22 @@ +# k8s/openclaw/AGENTS.md + +> [!NOTE] +> This directory contains the deployment configuration for **OpenClaw**, an open-source AI agent platform. + +## Overview +* **Namespace**: `openclaw` +* **Workload**: `openclaw` (Deployment) +* **Dependencies**: + * **LLM**: Connects to `ollama` in `llm` namespace. + * **Secrets**: Requires `GEMINI_API_KEY`. + +## Deployment +1. **Apply Namespace**: `kubectl apply -f namespace.yaml` +2. **Secrets**: + * Edit `openclaw.secret.yaml` to set `api-key`. + * Ensure `openclaw.secret.yaml` is encrypted with `git-crypt`. +3. **Apply Workload**: `kubectl apply -f openclaw.secret.yaml` + +## Configuration +* **LLM Provider**: `ollama` +* **Ollama URL**: `http://ollama.llm.svc.cluster.local:11434` diff --git a/k8s/openclaw/namespace.yaml b/k8s/openclaw/namespace.yaml new file mode 100644 index 0000000..394432e --- /dev/null +++ b/k8s/openclaw/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: openclaw diff --git a/k8s/openclaw/openclaw.secret.yaml b/k8s/openclaw/openclaw.secret.yaml new file mode 100644 index 0000000..0eae2c0 Binary files /dev/null and b/k8s/openclaw/openclaw.secret.yaml differ