Major additions: - All-in-One Docker image with Ollama + models bundled - Separate deployment option for existing Ollama installations - Changed default model from qwen3:8b to qwen3:14b - Comprehensive deployment documentation Files added: - Dockerfile: Basic app-only image - Dockerfile.allinone: Complete image with Ollama + models - docker-compose.yml: Easy deployment configuration - docker-entrypoint.sh: Startup script for all-in-one image - requirements.txt: Python dependencies - .dockerignore: Exclude unnecessary files from image Scripts: - export-ollama-models.sh: Export models from local Ollama - build-allinone.sh: Build complete offline-deployable image - build-and-export.sh: Build and export basic image Documentation: - DEPLOYMENT.md: Comprehensive deployment guide - QUICK_START.md: Quick reference for common tasks Configuration: - Updated config.py: DEFAULT_CHAT_MODEL = qwen3:14b - Updated frontend/opro.html: Page title to 系统提示词优化
21 lines
754 B
Python
21 lines
754 B
Python
APP_TITLE = "OPRO Prompt Optimizer API"
|
|
APP_DESCRIPTION = "提供提示优化、候选生成、会话聊天与模型管理的接口"
|
|
APP_VERSION = "0.1.0"
|
|
APP_CONTACT = {"name": "OPRO Team", "url": "http://127.0.0.1:8010/ui/"}
|
|
|
|
# Ollama endpoints
|
|
OLLAMA_HOST = "http://127.0.0.1:11434"
|
|
OLLAMA_GENERATE_URL = f"{OLLAMA_HOST}/api/generate"
|
|
OLLAMA_TAGS_URL = f"{OLLAMA_HOST}/api/tags"
|
|
DEFAULT_CHAT_MODEL = "qwen3:14b"
|
|
DEFAULT_EMBED_MODEL = "qwen3-embedding:4b"
|
|
|
|
# Xinference
|
|
XINFERENCE_EMBED_URL = "http://127.0.0.1:9997/models/bge-base-zh/embed"
|
|
|
|
# Clustering/selection
|
|
GENERATION_POOL_SIZE = 10 # Generate this many candidates before clustering
|
|
TOP_K = 5 # Return this many diverse candidates to user
|
|
CLUSTER_DISTANCE_THRESHOLD = 0.15
|
|
|