- Add GPU deployment support with NVIDIA runtime - Update Dockerfile.allinone with GPU environment variables - Add comprehensive GPU_DEPLOYMENT.md guide - Make port 11434 (Ollama) optional for security - Update DEPLOYMENT.md with CPU and GPU deployment options - Simplify default docker run commands - Update healthcheck to only check web application - Add memory requirements documentation - Create MEMORY_REQUIREMENTS.md with model comparison - Add build-8b.sh script for lower memory usage - Document OOM troubleshooting steps - Improve Docker build process - Add BUILD_TROUBLESHOOTING.md for common issues - Add DISTRIBUTION.md for image distribution methods - Update .gitignore to exclude large binary files - Improve docker-entrypoint.sh with better diagnostics - Update .dockerignore to include ollama-linux-amd64.tgz - Add backup file exclusions to .gitignore
142 lines
4.2 KiB
Bash
Executable File
142 lines
4.2 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Quick build script for qwen3:8b (lower memory usage)
|
|
# Use this if your server has less than 12GB RAM
|
|
|
|
set -e
|
|
|
|
echo "=========================================="
|
|
echo "Building with qwen3:8b (Lower Memory)"
|
|
echo "=========================================="
|
|
echo ""
|
|
echo "Memory requirements:"
|
|
echo " - qwen3:8b: ~5GB RAM"
|
|
echo " - qwen3:14b: ~10GB RAM"
|
|
echo ""
|
|
|
|
# Check if 8b model is available
|
|
if ! ollama list | grep -q "qwen3:8b"; then
|
|
echo "ERROR: qwen3:8b model not found!"
|
|
echo ""
|
|
echo "Please download it first:"
|
|
echo " ollama pull qwen3:8b"
|
|
echo ""
|
|
exit 1
|
|
fi
|
|
|
|
# Clean up
|
|
echo "Cleaning up previous builds..."
|
|
rm -rf ollama-models/
|
|
docker rmi system-prompt-optimizer:allinone 2>/dev/null || true
|
|
|
|
# Export 8b model
|
|
echo ""
|
|
echo "Exporting qwen3:8b model..."
|
|
mkdir -p ollama-models/models/{manifests/registry.ollama.ai/library,blobs}
|
|
|
|
# Function to get blob hashes from manifest
|
|
get_blobs_from_manifest() {
|
|
local manifest_file=$1
|
|
grep -o 'sha256:[a-f0-9]\{64\}' "$manifest_file" | sed 's/sha256://' | sort -u
|
|
}
|
|
|
|
# Function to copy model files
|
|
copy_model() {
|
|
local model_name=$1
|
|
local model_tag=$2
|
|
local manifest_dir="$HOME/.ollama/models/manifests/registry.ollama.ai/library/$model_name"
|
|
|
|
if [ ! -d "$manifest_dir" ]; then
|
|
echo "ERROR: Model manifest not found: $manifest_dir"
|
|
return 1
|
|
fi
|
|
|
|
echo " Copying $model_name:$model_tag manifest..."
|
|
mkdir -p "ollama-models/models/manifests/registry.ollama.ai/library/$model_name"
|
|
|
|
if [ -f "$manifest_dir/$model_tag" ]; then
|
|
cp "$manifest_dir/$model_tag" "ollama-models/models/manifests/registry.ollama.ai/library/$model_name/"
|
|
|
|
echo " Finding blob files for $model_name:$model_tag..."
|
|
local blob_hashes=$(get_blobs_from_manifest "$manifest_dir/$model_tag")
|
|
local blob_count=0
|
|
|
|
for blob_hash in $blob_hashes; do
|
|
local blob_file="$HOME/.ollama/models/blobs/sha256-$blob_hash"
|
|
if [ -f "$blob_file" ]; then
|
|
cp "$blob_file" "ollama-models/models/blobs/" 2>/dev/null
|
|
blob_count=$((blob_count + 1))
|
|
fi
|
|
done
|
|
|
|
echo " ✓ $model_name:$model_tag copied ($blob_count blobs)"
|
|
else
|
|
echo "ERROR: Manifest file not found: $manifest_dir/$model_tag"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Copy models
|
|
copy_model "qwen3" "8b" || exit 1
|
|
copy_model "qwen3-embedding" "4b" || exit 1
|
|
|
|
echo ""
|
|
echo "✓ Models exported successfully"
|
|
echo ""
|
|
|
|
# Update config.py to use 8b
|
|
echo "Updating config.py to use qwen3:8b..."
|
|
sed -i.bak 's/DEFAULT_CHAT_MODEL = "qwen3:14b"/DEFAULT_CHAT_MODEL = "qwen3:8b"/' config.py
|
|
|
|
# Update docker-entrypoint.sh to check for 8b
|
|
echo "Updating docker-entrypoint.sh to check for qwen3:8b..."
|
|
sed -i.bak 's/qwen3:14b/qwen3:8b/g' docker-entrypoint.sh
|
|
|
|
# Build image
|
|
echo ""
|
|
echo "Building Docker image..."
|
|
docker build --platform linux/amd64 \
|
|
-f Dockerfile.allinone \
|
|
-t system-prompt-optimizer:allinone .
|
|
|
|
if [ $? -ne 0 ]; then
|
|
echo ""
|
|
echo "Build failed!"
|
|
# Restore backups
|
|
mv config.py.bak config.py
|
|
mv docker-entrypoint.sh.bak docker-entrypoint.sh
|
|
exit 1
|
|
fi
|
|
|
|
# Export image
|
|
echo ""
|
|
echo "Exporting Docker image..."
|
|
docker save -o system-prompt-optimizer-allinone.tar system-prompt-optimizer:allinone
|
|
|
|
# Restore original files
|
|
mv config.py.bak config.py
|
|
mv docker-entrypoint.sh.bak docker-entrypoint.sh
|
|
|
|
echo ""
|
|
echo "=========================================="
|
|
echo "Build Complete!"
|
|
echo "=========================================="
|
|
ls -lh system-prompt-optimizer-allinone.tar
|
|
echo ""
|
|
echo "This image uses qwen3:8b (~5GB RAM required)"
|
|
echo ""
|
|
echo "Transfer to server and run:"
|
|
echo ""
|
|
echo " CPU mode:"
|
|
echo " docker load -i system-prompt-optimizer-allinone.tar"
|
|
echo " docker run -d -p 8010:8010 --restart unless-stopped system-prompt-optimizer:allinone"
|
|
echo ""
|
|
echo " GPU mode (recommended):"
|
|
echo " docker load -i system-prompt-optimizer-allinone.tar"
|
|
echo " docker run -d --gpus all -p 8010:8010 --restart unless-stopped system-prompt-optimizer:allinone"
|
|
echo ""
|
|
echo "Note: GPU mode provides 5-10x faster inference."
|
|
echo " See GPU_DEPLOYMENT.md for GPU setup instructions."
|
|
echo ""
|
|
|