FROM --platform=linux/amd64 python:3.10-slim # Set working directory WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ curl \ ca-certificates \ && rm -rf /var/lib/apt/lists/* # Install Ollama manually for amd64 # Copy pre-downloaded Ollama binary to avoid slow downloads during build # Using v0.13.1 (latest stable as of Dec 2024) COPY ollama-linux-amd64.tgz /tmp/ollama-linux-amd64.tgz RUN tar -C /usr -xzf /tmp/ollama-linux-amd64.tgz \ && rm /tmp/ollama-linux-amd64.tgz # Copy requirements file COPY requirements.txt . # Install Python dependencies RUN pip install --no-cache-dir -r requirements.txt # Copy application code COPY _qwen_xinference_demo/ ./_qwen_xinference_demo/ COPY frontend/ ./frontend/ COPY config.py . # Create necessary directories RUN mkdir -p outputs /root/.ollama # Copy pre-downloaded Ollama models # This includes qwen3:14b and qwen3-embedding:4b COPY ollama-models/ /root/.ollama/ # Expose ports EXPOSE 8010 11434 # Set environment variables ENV PYTHONUNBUFFERED=1 ENV OLLAMA_HOST=http://localhost:11434 # Enable GPU support for Ollama (will auto-detect NVIDIA GPU if available) ENV NVIDIA_VISIBLE_DEVICES=all ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility # Copy startup script COPY docker-entrypoint.sh /docker-entrypoint.sh RUN chmod +x /docker-entrypoint.sh # Health check # Only check the web application, not Ollama (internal service) HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ CMD curl -f http://localhost:8010/health || exit 1 # Run the startup script ENTRYPOINT ["/docker-entrypoint.sh"]