Files
opro_demo/build-allinone.sh

134 lines
3.9 KiB
Bash
Raw Normal View History

#!/bin/bash
# Build all-in-one Docker image with Ollama and models
# This creates a complete offline-deployable image
set -e
IMAGE_NAME="system-prompt-optimizer"
IMAGE_TAG="allinone"
EXPORT_FILE="${IMAGE_NAME}-${IMAGE_TAG}.tar"
echo "=========================================="
echo "Building All-in-One Docker Image"
echo "=========================================="
echo ""
echo "This will create a Docker image containing:"
echo " - Python application"
echo " - Ollama service (v0.13.1)"
echo " - qwen3:14b model"
echo " - qwen3-embedding:4b model"
echo ""
echo "Target platform: linux/amd64 (x86_64)"
echo ""
echo "WARNING: The final image will be 10-20GB in size!"
echo ""
echo "NOTE: If you're building on Apple Silicon (M1/M2/M3),"
echo " Docker will use emulation which may be slower."
echo " The image will still work on x86_64 servers."
echo ""
# Check if ollama-models directory exists
if [ ! -d "ollama-models" ]; then
echo "ERROR: ollama-models directory not found!"
echo ""
echo "Please run ./export-ollama-models.sh first to export the models."
exit 1
fi
echo "✓ Found ollama-models directory"
echo ""
# Check if Ollama binary exists
if [ ! -f "ollama-linux-amd64.tgz" ]; then
echo "ERROR: ollama-linux-amd64.tgz not found!"
echo ""
echo "Please download it first:"
echo " curl -L -o ollama-linux-amd64.tgz https://github.com/ollama/ollama/releases/download/v0.13.1/ollama-linux-amd64.tgz"
echo ""
exit 1
fi
echo "✓ Found ollama-linux-amd64.tgz"
echo ""
# Check disk space
AVAILABLE_SPACE=$(df -h . | awk 'NR==2 {print $4}')
echo "Available disk space: $AVAILABLE_SPACE"
echo "Required: ~20GB for build process"
echo ""
read -p "Continue with build? (y/n) " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
echo "Build cancelled."
exit 1
fi
echo ""
echo "=========================================="
echo "Building Docker image..."
echo "=========================================="
echo "Platform: linux/amd64 (x86_64)"
echo "This may take 20-40 minutes depending on your machine..."
echo ""
# Build for amd64 platform explicitly
docker build --platform linux/amd64 -f Dockerfile.allinone -t ${IMAGE_NAME}:${IMAGE_TAG} .
echo ""
echo "=========================================="
echo "Build complete!"
echo "=========================================="
docker images | grep ${IMAGE_NAME}
echo ""
echo "=========================================="
echo "Exporting image to ${EXPORT_FILE}..."
echo "=========================================="
echo "This will take several minutes..."
docker save -o ${EXPORT_FILE} ${IMAGE_NAME}:${IMAGE_TAG}
echo ""
echo "=========================================="
echo "Export complete!"
echo "=========================================="
ls -lh ${EXPORT_FILE}
echo ""
echo "=========================================="
echo "Deployment Instructions"
echo "=========================================="
echo ""
echo "1. Transfer ${EXPORT_FILE} to target server:"
echo " scp ${EXPORT_FILE} user@server:/path/"
echo ""
echo "2. On target server, load the image:"
echo " docker load -i ${EXPORT_FILE}"
echo ""
echo "3. Run the container:"
echo ""
echo " CPU mode:"
echo " docker run -d \\"
echo " --name system-prompt-optimizer \\"
echo " -p 8010:8010 \\"
echo " --restart unless-stopped \\"
echo " ${IMAGE_NAME}:${IMAGE_TAG}"
echo ""
echo " GPU mode (recommended if NVIDIA GPU available):"
echo " docker run -d \\"
echo " --name system-prompt-optimizer \\"
echo " --gpus all \\"
echo " -p 8010:8010 \\"
echo " --restart unless-stopped \\"
echo " ${IMAGE_NAME}:${IMAGE_TAG}"
echo ""
echo " Note: Port 11434 (Ollama) is optional and only needed for debugging."
echo " GPU mode provides 5-10x faster inference. See GPU_DEPLOYMENT.md for details."
echo ""
echo "4. Access the application:"
echo " http://<server-ip>:8010/ui/opro.html"
echo ""
echo "See DEPLOYMENT.md for more details."