apiVersion: apps/v1 kind: Deployment metadata: name: ollama namespace: llm labels: app: ollama spec: replicas: 1 selector: matchLabels: app: ollama template: metadata: labels: app: ollama spec: initContainers: - name: pull-model image: curlimages/curl command: ["/bin/sh", "-c"] args: - | echo "Waiting for Ollama service..." # Simple wait loop (naive check, better to use readiness probe/postStart but init runs before app) # Actually, init container runs BEFORE the main container, so it can't interact with the main container's localhost. # We need to perform the model pull *after* Ollama starts. # Changing strategy: Use a postStart hook or sidecar. # Or simpler: Just let it start, and rely on user/execution time pull, or use an entrypoint script wrapper in main container. # Best approach for k8s simplicity: Use a command wrapper. echo "Init container cannot pull because main container is not up. Skipping pre-pull in init." echo "Model pull will require manual trigger or standard entrypoint behavior." # To automate: We can run a sidecar that waits for port 11434 and then pulls. containers: - name: ollama image: ollama/ollama:latest env: - name: OLLAMA_KEEP_ALIVE value: "-1" - name: OLLAMA_HOST value: "0.0.0.0" resources: requests: memory: "8Gi" cpu: "2" limits: memory: "12Gi" cpu: "4" ports: - containerPort: 11434 name: http volumeMounts: - name: ollama-storage mountPath: /root/.ollama livenessProbe: httpGet: path: /api/health port: http initialDelaySeconds: 60 periodSeconds: 10 readinessProbe: httpGet: path: /api/health port: http initialDelaySeconds: 30 periodSeconds: 5 command: ["/bin/sh", "-c"] args: - | # Start Ollama in background /bin/ollama serve & PID=$! echo "Waiting for Ollama..." sleep 10 echo "Pulling model..." ollama pull llama3.1:8b-instruct-q8_0 echo "Model pulled. Keeping container alive." wait $PID volumes: - name: ollama-storage persistentVolumeClaim: claimName: ollama-storage --- apiVersion: v1 kind: Service metadata: name: ollama namespace: llm spec: type: ClusterIP selector: app: ollama ports: - port: 11434 targetPort: 11434 protocol: TCP --- apiVersion: v1 kind: PersistentVolumeClaim metadata: name: ollama-storage namespace: llm spec: accessModes: - ReadWriteOnce storageClassName: openebs-hostpath resources: requests: storage: 50Gi