#!/bin/bash # Quick build script for qwen3:8b (lower memory usage) # Use this if your server has less than 12GB RAM set -e echo "==========================================" echo "Building with qwen3:8b (Lower Memory)" echo "==========================================" echo "" echo "Memory requirements:" echo " - qwen3:8b: ~5GB RAM" echo " - qwen3:14b: ~10GB RAM" echo "" # Check if 8b model is available if ! ollama list | grep -q "qwen3:8b"; then echo "ERROR: qwen3:8b model not found!" echo "" echo "Please download it first:" echo " ollama pull qwen3:8b" echo "" exit 1 fi # Clean up echo "Cleaning up previous builds..." rm -rf ollama-models/ docker rmi system-prompt-optimizer:allinone 2>/dev/null || true # Export 8b model echo "" echo "Exporting qwen3:8b model..." mkdir -p ollama-models/models/{manifests/registry.ollama.ai/library,blobs} # Function to get blob hashes from manifest get_blobs_from_manifest() { local manifest_file=$1 grep -o 'sha256:[a-f0-9]\{64\}' "$manifest_file" | sed 's/sha256://' | sort -u } # Function to copy model files copy_model() { local model_name=$1 local model_tag=$2 local manifest_dir="$HOME/.ollama/models/manifests/registry.ollama.ai/library/$model_name" if [ ! -d "$manifest_dir" ]; then echo "ERROR: Model manifest not found: $manifest_dir" return 1 fi echo " Copying $model_name:$model_tag manifest..." mkdir -p "ollama-models/models/manifests/registry.ollama.ai/library/$model_name" if [ -f "$manifest_dir/$model_tag" ]; then cp "$manifest_dir/$model_tag" "ollama-models/models/manifests/registry.ollama.ai/library/$model_name/" echo " Finding blob files for $model_name:$model_tag..." local blob_hashes=$(get_blobs_from_manifest "$manifest_dir/$model_tag") local blob_count=0 for blob_hash in $blob_hashes; do local blob_file="$HOME/.ollama/models/blobs/sha256-$blob_hash" if [ -f "$blob_file" ]; then cp "$blob_file" "ollama-models/models/blobs/" 2>/dev/null blob_count=$((blob_count + 1)) fi done echo " ✓ $model_name:$model_tag copied ($blob_count blobs)" else echo "ERROR: Manifest file not found: $manifest_dir/$model_tag" return 1 fi } # Copy models copy_model "qwen3" "8b" || exit 1 copy_model "qwen3-embedding" "4b" || exit 1 echo "" echo "✓ Models exported successfully" echo "" # Update config.py to use 8b echo "Updating config.py to use qwen3:8b..." sed -i.bak 's/DEFAULT_CHAT_MODEL = "qwen3:14b"/DEFAULT_CHAT_MODEL = "qwen3:8b"/' config.py # Update docker-entrypoint.sh to check for 8b echo "Updating docker-entrypoint.sh to check for qwen3:8b..." sed -i.bak 's/qwen3:14b/qwen3:8b/g' docker-entrypoint.sh # Build image echo "" echo "Building Docker image..." docker build --platform linux/amd64 \ -f Dockerfile.allinone \ -t system-prompt-optimizer:allinone . if [ $? -ne 0 ]; then echo "" echo "Build failed!" # Restore backups mv config.py.bak config.py mv docker-entrypoint.sh.bak docker-entrypoint.sh exit 1 fi # Export image echo "" echo "Exporting Docker image..." docker save -o system-prompt-optimizer-allinone.tar system-prompt-optimizer:allinone # Restore original files mv config.py.bak config.py mv docker-entrypoint.sh.bak docker-entrypoint.sh echo "" echo "==========================================" echo "Build Complete!" echo "==========================================" ls -lh system-prompt-optimizer-allinone.tar echo "" echo "This image uses qwen3:8b (~5GB RAM required)" echo "" echo "Transfer to server and run:" echo "" echo " CPU mode:" echo " docker load -i system-prompt-optimizer-allinone.tar" echo " docker run -d -p 8010:8010 --restart unless-stopped system-prompt-optimizer:allinone" echo "" echo " GPU mode (recommended):" echo " docker load -i system-prompt-optimizer-allinone.tar" echo " docker run -d --gpus all -p 8010:8010 --restart unless-stopped system-prompt-optimizer:allinone" echo "" echo "Note: GPU mode provides 5-10x faster inference." echo " See GPU_DEPLOYMENT.md for GPU setup instructions." echo ""