opro_demo/Dockerfile.allinone

FROM --platform=linux/amd64 python:3.10-slim

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \
    curl \
    ca-certificates \
    && rm -rf /var/lib/apt/lists/*

# Install Ollama manually for amd64
# Copy pre-downloaded Ollama binary to avoid slow downloads during build
# Using v0.13.1 (latest stable as of Dec 2024)
COPY ollama-linux-amd64.tgz /tmp/ollama-linux-amd64.tgz
RUN tar -C /usr -xzf /tmp/ollama-linux-amd64.tgz \
    && rm /tmp/ollama-linux-amd64.tgz

# Copy requirements file
COPY requirements.txt .

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Copy application code
COPY _qwen_xinference_demo/ ./_qwen_xinference_demo/
COPY frontend/ ./frontend/
COPY config.py .

# Create necessary directories
RUN mkdir -p outputs /root/.ollama

# Copy pre-downloaded Ollama models
# This includes qwen3:14b and qwen3-embedding:4b
COPY ollama-models/ /root/.ollama/

# Expose ports
EXPOSE 8010 11434

# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV OLLAMA_HOST=http://localhost:11434
# Enable GPU support for Ollama (will auto-detect NVIDIA GPU if available)
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility

# Copy startup script
COPY docker-entrypoint.sh /docker-entrypoint.sh
RUN chmod +x /docker-entrypoint.sh

# Health check
# Only check the web application, not Ollama (internal service)
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:8010/health || exit 1

# Run the startup script
ENTRYPOINT ["/docker-entrypoint.sh"]