infrapuzzle/k8s/llm/ollama.yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: ollama
  namespace: llm
  labels:
    app: ollama
spec:
  replicas: 1
  selector:
    matchLabels:
      app: ollama
  template:
    metadata:
      labels:
        app: ollama
    spec:
      containers:
      - name: ollama
        image: ollama/ollama:latest
        env:
        - name: OLLAMA_KEEP_ALIVE
          value: "-1"
        - name: OLLAMA_HOST
          value: "0.0.0.0"
        resources:
          requests:
            memory: "8Gi"
            cpu: "2"
          limits:
            memory: "12Gi"
            cpu: "4"
        ports:
        - containerPort: 11434
          name: http
        volumeMounts:
        - name: ollama-storage
          mountPath: /root/.ollama
        livenessProbe:
          httpGet:
            path: /
            port: http
          initialDelaySeconds: 300
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /
            port: http
          initialDelaySeconds: 30
          periodSeconds: 5
        command: ["/bin/sh", "-c"]
        args:
          - |
            # Start Ollama in background
            /bin/ollama serve &
            PID=$!
            echo "Waiting for Ollama..."
            sleep 10
            echo "Pulling model..."
            ollama pull llama3.1:8b-instruct-q8_0
            echo "Model pulled. Keeping container alive."
            wait $PID
      volumes:
      - name: ollama-storage
        persistentVolumeClaim:
          claimName: ollama-storage
---
apiVersion: v1
kind: Service
metadata:
  name: ollama
  namespace: llm
spec:
  type: ClusterIP
  selector:
    app: ollama
  ports:
  - port: 11434
    targetPort: 11434
    protocol: TCP
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: ollama-storage
  namespace: llm
spec:
  accessModes:
    - ReadWriteOnce
  storageClassName: openebs-hostpath
  resources:
    requests:
      storage: 50Gi