94 lines
1.9 KiB
YAML
94 lines
1.9 KiB
YAML
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: ollama
|
|
namespace: llm
|
|
labels:
|
|
app: ollama
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: ollama
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: ollama
|
|
spec:
|
|
containers:
|
|
- name: ollama
|
|
image: ollama/ollama:latest
|
|
env:
|
|
- name: OLLAMA_KEEP_ALIVE
|
|
value: "-1"
|
|
- name: OLLAMA_HOST
|
|
value: "0.0.0.0"
|
|
resources:
|
|
requests:
|
|
memory: "8Gi"
|
|
cpu: "2"
|
|
limits:
|
|
memory: "12Gi"
|
|
cpu: "4"
|
|
ports:
|
|
- containerPort: 11434
|
|
name: http
|
|
volumeMounts:
|
|
- name: ollama-storage
|
|
mountPath: /root/.ollama
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /
|
|
port: http
|
|
initialDelaySeconds: 300
|
|
periodSeconds: 10
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /
|
|
port: http
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 5
|
|
command: ["/bin/sh", "-c"]
|
|
args:
|
|
- |
|
|
# Start Ollama in background
|
|
/bin/ollama serve &
|
|
PID=$!
|
|
echo "Waiting for Ollama..."
|
|
sleep 10
|
|
echo "Pulling model..."
|
|
ollama pull llama3.1:8b-instruct-q8_0
|
|
echo "Model pulled. Keeping container alive."
|
|
wait $PID
|
|
volumes:
|
|
- name: ollama-storage
|
|
persistentVolumeClaim:
|
|
claimName: ollama-storage
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: ollama
|
|
namespace: llm
|
|
spec:
|
|
type: ClusterIP
|
|
selector:
|
|
app: ollama
|
|
ports:
|
|
- port: 11434
|
|
targetPort: 11434
|
|
protocol: TCP
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: ollama-storage
|
|
namespace: llm
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
storageClassName: openebs-hostpath
|
|
resources:
|
|
requests:
|
|
storage: 50Gi
|