Add GPU support and improve Docker deployment

- Add GPU deployment support with NVIDIA runtime - Update Dockerfile.allinone with GPU environment variables - Add comprehensive GPU_DEPLOYMENT.md guide - Make port 11434 (Ollama) optional for security - Update DEPLOYMENT.md with CPU and GPU deployment options - Simplify default docker run commands - Update healthcheck to only check web application - Add memory requirements documentation - Create MEMORY_REQUIREMENTS.md with model comparison - Add build-8b.sh script for lower memory usage - Document OOM troubleshooting steps - Improve Docker build process - Add BUILD_TROUBLESHOOTING.md for common issues - Add DISTRIBUTION.md for image distribution methods - Update .gitignore to exclude large binary files - Improve docker-entrypoint.sh with better diagnostics - Update .dockerignore to include ollama-linux-amd64.tgz - Add backup file exclusions to .gitignore
2025-12-08 17:08:45 +08:00
parent 6426b73a5e
commit 0b5319b31c
7 changed files with 387 additions and 20 deletions
--- a/Dockerfile.allinone
+++ b/Dockerfile.allinone
@@ -1,16 +1,20 @@
-FROM python:3.10-slim
+FROM --platform=linux/amd64 python:3.10-slim

 # Set working directory
 WORKDIR /app

-# Install system dependencies including curl for Ollama
+# Install system dependencies
 RUN apt-get update && apt-get install -y \
    curl \
    ca-certificates \
    && rm -rf /var/lib/apt/lists/*

-# Install Ollama
-RUN curl -fsSL https://ollama.com/install.sh | sh
+# Install Ollama manually for amd64
+# Copy pre-downloaded Ollama binary to avoid slow downloads during build
+# Using v0.13.1 (latest stable as of Dec 2024)
+COPY ollama-linux-amd64.tgz /tmp/ollama-linux-amd64.tgz
+RUN tar -C /usr -xzf /tmp/ollama-linux-amd64.tgz \
+    && rm /tmp/ollama-linux-amd64.tgz

 # Copy requirements file
 COPY requirements.txt .
@@ -36,14 +40,18 @@ EXPOSE 8010 11434
 # Set environment variables
 ENV PYTHONUNBUFFERED=1
 ENV OLLAMA_HOST=http://localhost:11434
+# Enable GPU support for Ollama (will auto-detect NVIDIA GPU if available)
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility

 # Copy startup script
 COPY docker-entrypoint.sh /docker-entrypoint.sh
 RUN chmod +x /docker-entrypoint.sh

 # Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
-    CMD curl -f http://localhost:8010/health && curl -f http://localhost:11434/api/tags || exit 1
+# Only check the web application, not Ollama (internal service)
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:8010/health || exit 1

 # Run the startup script
 ENTRYPOINT ["/docker-entrypoint.sh"]