apiVersion: apps/v1 kind: Deployment metadata: name: ollama namespace: llm labels: app: ollama spec: replicas: 1 selector: matchLabels: app: ollama template: metadata: labels: app: ollama spec: containers: - name: ollama image: ollama/ollama:latest env: - name: OLLAMA_KEEP_ALIVE value: "-1" - name: OLLAMA_HOST value: "0.0.0.0" resources: requests: memory: "8Gi" cpu: "2" limits: memory: "12Gi" cpu: "4" ports: - containerPort: 11434 name: http volumeMounts: - name: ollama-storage mountPath: /root/.ollama livenessProbe: httpGet: path: / port: http initialDelaySeconds: 300 periodSeconds: 10 readinessProbe: httpGet: path: / port: http initialDelaySeconds: 30 periodSeconds: 5 command: ["/bin/sh", "-c"] args: - | # Start Ollama in background /bin/ollama serve & PID=$! echo "Waiting for Ollama..." sleep 10 echo "Pulling model..." ollama pull llama3.1:8b-instruct-q8_0 echo "Model pulled. Keeping container alive." wait $PID volumes: - name: ollama-storage persistentVolumeClaim: claimName: ollama-storage --- apiVersion: v1 kind: Service metadata: name: ollama namespace: llm spec: type: ClusterIP selector: app: ollama ports: - port: 11434 targetPort: 11434 protocol: TCP --- apiVersion: v1 kind: PersistentVolumeClaim metadata: name: ollama-storage namespace: llm spec: accessModes: - ReadWriteOnce storageClassName: openebs-hostpath resources: requests: storage: 50Gi