Add GPU support and improve Docker deployment

- Add GPU deployment support with NVIDIA runtime
  - Update Dockerfile.allinone with GPU environment variables
  - Add comprehensive GPU_DEPLOYMENT.md guide

- Make port 11434 (Ollama) optional for security
  - Update DEPLOYMENT.md with CPU and GPU deployment options
  - Simplify default docker run commands
  - Update healthcheck to only check web application

- Add memory requirements documentation
  - Create MEMORY_REQUIREMENTS.md with model comparison
  - Add build-8b.sh script for lower memory usage
  - Document OOM troubleshooting steps

- Improve Docker build process
  - Add BUILD_TROUBLESHOOTING.md for common issues
  - Add DISTRIBUTION.md for image distribution methods
  - Update .gitignore to exclude large binary files
  - Improve docker-entrypoint.sh with better diagnostics

- Update .dockerignore to include ollama-linux-amd64.tgz
- Add backup file exclusions to .gitignore
This commit is contained in:
2025-12-08 17:08:45 +08:00
parent 6426b73a5e
commit 0b5319b31c
7 changed files with 387 additions and 20 deletions

View File

@@ -2,34 +2,102 @@
set -e
echo "=========================================="
echo "System Prompt Optimizer - Starting Up"
echo "=========================================="
echo ""
# Check if Ollama binary exists
if ! command -v ollama &> /dev/null; then
echo "ERROR: Ollama binary not found!"
echo "Expected location: /usr/bin/ollama or /usr/local/bin/ollama"
ls -la /usr/bin/ollama* 2>/dev/null || echo "No ollama in /usr/bin/"
ls -la /usr/local/bin/ollama* 2>/dev/null || echo "No ollama in /usr/local/bin/"
exit 1
fi
echo "✓ Ollama binary found: $(which ollama)"
echo ""
# Check if model files exist
echo "Checking model files..."
if [ ! -d "/root/.ollama/models" ]; then
echo "ERROR: /root/.ollama/models directory not found!"
exit 1
fi
MANIFEST_COUNT=$(find /root/.ollama/models/manifests -type f 2>/dev/null | wc -l)
BLOB_COUNT=$(find /root/.ollama/models/blobs -type f 2>/dev/null | wc -l)
echo "✓ Found $MANIFEST_COUNT manifest files"
echo "✓ Found $BLOB_COUNT blob files"
if [ "$BLOB_COUNT" -lt 10 ]; then
echo "WARNING: Very few blob files found. Models may not be complete."
fi
echo ""
echo "Starting Ollama service..."
ollama serve &
ollama serve > /tmp/ollama.log 2>&1 &
OLLAMA_PID=$!
# Wait for Ollama to be ready
echo "Waiting for Ollama to start..."
for i in {1..30}; do
OLLAMA_READY=false
for i in {1..60}; do
if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
echo "Ollama is ready!"
OLLAMA_READY=true
break
fi
echo "Waiting for Ollama... ($i/30)"
sleep 2
echo "Waiting for Ollama... ($i/60)"
sleep 3
done
if [ "$OLLAMA_READY" = false ]; then
echo ""
echo "ERROR: Ollama failed to start within 3 minutes!"
echo ""
echo "Ollama logs:"
cat /tmp/ollama.log
echo ""
echo "Check full logs with: docker logs system-prompt-optimizer"
exit 1
fi
# Check if models exist, if not, show warning
echo ""
echo "Checking for models..."
ollama list
echo ""
if ! ollama list | grep -q "qwen3:14b"; then
echo "WARNING: qwen3:14b model not found!"
echo "ERROR: qwen3:14b model not found!"
echo "The application requires qwen3:14b to function properly."
echo ""
echo "Available models:"
ollama list
echo ""
exit 1
fi
if ! ollama list | grep -q "qwen3-embedding"; then
echo "WARNING: qwen3-embedding model not found!"
echo "The application requires qwen3-embedding:4b for embeddings."
echo "Continuing anyway, but embeddings may not work."
fi
echo ""
echo "✓ All required models are available"
echo ""
echo "=========================================="
echo "Starting FastAPI application..."
echo "=========================================="
echo "Application will be available at:"
echo " - Web UI: http://localhost:8010/ui/opro.html"
echo " - API Docs: http://localhost:8010/docs"
echo " - Ollama: http://localhost:11434"
echo ""
exec uvicorn _qwen_xinference_demo.api:app --host 0.0.0.0 --port 8010