#!/bin/bash # Build all-in-one Docker image with Ollama and models # This creates a complete offline-deployable image set -e IMAGE_NAME="system-prompt-optimizer" IMAGE_TAG="allinone" EXPORT_FILE="${IMAGE_NAME}-${IMAGE_TAG}.tar" echo "==========================================" echo "Building All-in-One Docker Image" echo "==========================================" echo "" echo "This will create a Docker image containing:" echo " - Python application" echo " - Ollama service (v0.13.1)" echo " - qwen3:14b model" echo " - qwen3-embedding:4b model" echo "" echo "Target platform: linux/amd64 (x86_64)" echo "" echo "WARNING: The final image will be 10-20GB in size!" echo "" echo "NOTE: If you're building on Apple Silicon (M1/M2/M3)," echo " Docker will use emulation which may be slower." echo " The image will still work on x86_64 servers." echo "" # Check if ollama-models directory exists if [ ! -d "ollama-models" ]; then echo "ERROR: ollama-models directory not found!" echo "" echo "Please run ./export-ollama-models.sh first to export the models." exit 1 fi echo "✓ Found ollama-models directory" echo "" # Check if Ollama binary exists if [ ! -f "ollama-linux-amd64.tgz" ]; then echo "ERROR: ollama-linux-amd64.tgz not found!" echo "" echo "Please download it first:" echo " curl -L -o ollama-linux-amd64.tgz https://github.com/ollama/ollama/releases/download/v0.13.1/ollama-linux-amd64.tgz" echo "" exit 1 fi echo "✓ Found ollama-linux-amd64.tgz" echo "" # Check disk space AVAILABLE_SPACE=$(df -h . | awk 'NR==2 {print $4}') echo "Available disk space: $AVAILABLE_SPACE" echo "Required: ~20GB for build process" echo "" read -p "Continue with build? (y/n) " -n 1 -r echo if [[ ! $REPLY =~ ^[Yy]$ ]]; then echo "Build cancelled." exit 1 fi echo "" echo "==========================================" echo "Building Docker image..." echo "==========================================" echo "Platform: linux/amd64 (x86_64)" echo "This may take 20-40 minutes depending on your machine..." echo "" # Build for amd64 platform explicitly docker build --platform linux/amd64 -f Dockerfile.allinone -t ${IMAGE_NAME}:${IMAGE_TAG} . echo "" echo "==========================================" echo "Build complete!" echo "==========================================" docker images | grep ${IMAGE_NAME} echo "" echo "==========================================" echo "Exporting image to ${EXPORT_FILE}..." echo "==========================================" echo "This will take several minutes..." docker save -o ${EXPORT_FILE} ${IMAGE_NAME}:${IMAGE_TAG} echo "" echo "==========================================" echo "Export complete!" echo "==========================================" ls -lh ${EXPORT_FILE} echo "" echo "==========================================" echo "Deployment Instructions" echo "==========================================" echo "" echo "1. Transfer ${EXPORT_FILE} to target server:" echo " scp ${EXPORT_FILE} user@server:/path/" echo "" echo "2. On target server, load the image:" echo " docker load -i ${EXPORT_FILE}" echo "" echo "3. Run the container:" echo "" echo " CPU mode:" echo " docker run -d \\" echo " --name system-prompt-optimizer \\" echo " -p 8010:8010 \\" echo " --restart unless-stopped \\" echo " ${IMAGE_NAME}:${IMAGE_TAG}" echo "" echo " GPU mode (recommended if NVIDIA GPU available):" echo " docker run -d \\" echo " --name system-prompt-optimizer \\" echo " --gpus all \\" echo " -p 8010:8010 \\" echo " --restart unless-stopped \\" echo " ${IMAGE_NAME}:${IMAGE_TAG}" echo "" echo " Note: Port 11434 (Ollama) is optional and only needed for debugging." echo " GPU mode provides 5-10x faster inference. See GPU_DEPLOYMENT.md for details." echo "" echo "4. Access the application:" echo " http://:8010/ui/opro.html" echo "" echo "See DEPLOYMENT.md for more details."