Files
opro_demo/_qwen_xinference_demo/api.py
leehwui 65cdcf29dc refactor: replace OPRO with simple iterative refinement
Major changes:
- Remove fake OPRO evaluation (no more fake 0.5 scores)
- Add simple refinement based on user selection
- New endpoint: POST /opro/refine (selected + rejected instructions)
- Update prompt generation to focus on comprehensive coverage instead of style variety
- All generated instructions now start with role definition (你是一个...)
- Update README to reflect new approach and API endpoints

Technical details:
- Added refine_based_on_selection() in prompt_utils.py
- Added refine_instruction_candidates() in user_prompt_optimizer.py
- Added OPRORefineReq model and /opro/refine endpoint in api.py
- Updated frontend handleContinueOptimize() to use new refinement flow
- Changed prompt requirements from 'different styles' to 'comprehensive coverage'
- Added role definition requirement as first item in all prompt templates
2025-12-08 09:43:20 +08:00

750 lines
25 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import RedirectResponse, FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from typing import List, Tuple, Optional
import config
# Legacy session management (query rewriting)
from .opro.session_state import create_session, get_session, update_session_add_candidates, log_user_choice
from .opro.session_state import log_user_reject
from .opro.session_state import set_selected_prompt, log_chat_message
from .opro.session_state import set_session_model
from .opro.session_state import USER_FEEDBACK_LOG
# True OPRO session management
from .opro.session_state import (
create_opro_session, get_opro_session, list_opro_sessions,
create_opro_run, get_opro_run, update_opro_iteration,
add_opro_evaluation, get_opro_trajectory, set_opro_test_cases,
complete_opro_run, list_opro_runs
)
# Optimization functions
from .opro.user_prompt_optimizer import generate_candidates
from .opro.user_prompt_optimizer import (
generate_system_instruction_candidates,
evaluate_system_instruction,
refine_instruction_candidates
)
from .opro.ollama_client import call_qwen
from .opro.ollama_client import list_models
from fastapi.middleware.cors import CORSMiddleware
app = FastAPI(
title=config.APP_TITLE,
description=config.APP_DESCRIPTION,
version=config.APP_VERSION,
contact=config.APP_CONTACT,
openapi_tags=[
{"name": "health", "description": "健康检查"},
{"name": "models", "description": "模型列表与设置"},
{"name": "sessions", "description": "会话管理(旧版查询重写)"},
{"name": "opro-legacy", "description": "旧版提示优化(查询重写)"},
{"name": "opro-true", "description": "真正的OPRO系统指令优化"},
{"name": "chat", "description": "会话聊天"},
{"name": "ui", "description": "静态页面"}
]
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
MAX_ROUNDS = 3
def ok(data=None):
return JSONResponse({"success": True, "data": data})
class AppException(HTTPException):
def __init__(self, status_code: int, detail: str, error_code: str):
super().__init__(status_code=status_code, detail=detail)
self.error_code = error_code
@app.exception_handler(AppException)
def _app_exc_handler(request: Request, exc: AppException):
return JSONResponse(status_code=exc.status_code, content={
"success": False,
"error": str(exc.detail),
"error_code": exc.error_code
})
@app.exception_handler(HTTPException)
def _http_exc_handler(request: Request, exc: HTTPException):
return JSONResponse(status_code=exc.status_code, content={
"success": False,
"error": str(exc.detail),
"error_code": "HTTP_ERROR"
})
@app.exception_handler(Exception)
def _generic_exc_handler(request: Request, exc: Exception):
return JSONResponse(status_code=500, content={
"success": False,
"error": "internal error",
"error_code": "INTERNAL_ERROR"
})
class StartReq(BaseModel):
query: str
class NextReq(BaseModel):
session_id: str
class SelectReq(BaseModel):
session_id: str
choice: str
class RejectReq(BaseModel):
session_id: str
candidate: str
reason: str | None = None
class SetModelReq(BaseModel):
session_id: str
model_name: str
# ============================================================================
# TRUE OPRO REQUEST MODELS
# ============================================================================
class TestCase(BaseModel):
"""A single test case for OPRO evaluation."""
input: str
expected_output: str
class CreateOPRORunReq(BaseModel):
"""Request to create a new OPRO optimization run."""
task_description: str
test_cases: Optional[List[TestCase]] = None
model_name: Optional[str] = None
session_id: Optional[str] = None # Optional session to associate with
class OPROIterateReq(BaseModel):
"""Request to run one OPRO iteration."""
run_id: str
top_k: Optional[int] = None
class OPROEvaluateReq(BaseModel):
"""Request to evaluate a system instruction."""
run_id: str
instruction: str
class OPROAddTestCasesReq(BaseModel):
"""Request to add test cases to an OPRO run."""
run_id: str
test_cases: List[TestCase]
class OPROGenerateAndEvaluateReq(BaseModel):
"""Request to generate and auto-evaluate candidates (for chat-like UX)."""
run_id: str
top_k: Optional[int] = None
pool_size: Optional[int] = None
auto_evaluate: Optional[bool] = True # If False, use diversity-based selection only
class OPROExecuteReq(BaseModel):
"""Request to execute a system instruction with user input."""
instruction: str
user_input: str
model_name: Optional[str] = None
class OPRORefineReq(BaseModel):
"""Request to refine based on selected instruction (simple iterative refinement, NOT OPRO)."""
run_id: str
selected_instruction: str
rejected_instructions: List[str]
top_k: Optional[int] = None
pool_size: Optional[int] = None
# ============================================================================
# LEGACY ENDPOINTS (Query Rewriting - NOT true OPRO)
# ============================================================================
@app.post("/start", tags=["opro-legacy"])
def start(req: StartReq):
sid = create_session(req.query)
cands = generate_candidates(req.query, [], model_name=get_session(sid).get("model_name"))
update_session_add_candidates(sid, cands)
return ok({"session_id": sid, "round": 0, "candidates": cands})
@app.post("/next", tags=["opro-legacy"])
def next_round(req: NextReq):
s = get_session(req.session_id)
if not s:
raise AppException(404, "session not found", "SESSION_NOT_FOUND")
if s["round"] >= MAX_ROUNDS:
ans = call_qwen(s["original_query"], temperature=0.3, max_tokens=512)
return ok({"final": True, "answer": ans})
cands = generate_candidates(s["original_query"], s["history_candidates"], model_name=s.get("model_name"))
update_session_add_candidates(req.session_id, cands)
return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})
@app.post("/select", tags=["opro-legacy"])
def select(req: SelectReq):
s = get_session(req.session_id)
if not s:
raise AppException(404, "session not found", "SESSION_NOT_FOUND")
log_user_choice(req.session_id, req.choice)
set_selected_prompt(req.session_id, req.choice)
log_chat_message(req.session_id, "system", req.choice)
try:
ans = call_qwen(req.choice, temperature=0.2, max_tokens=1024, model_name=s.get("model_name"))
except Exception as e:
raise AppException(400, f"ollama error: {e}", "OLLAMA_ERROR")
log_chat_message(req.session_id, "assistant", ans)
try:
import os, json
os.makedirs("outputs", exist_ok=True)
with open("outputs/user_feedback.jsonl", "a", encoding="utf-8") as f:
f.write(json.dumps({
"session_id": req.session_id,
"round": s["round"],
"choice": req.choice,
"answer": ans
}, ensure_ascii=False) + "\n")
except Exception:
pass
return ok({"prompt": req.choice, "answer": ans})
@app.post("/reject", tags=["opro-legacy"])
def reject(req: RejectReq):
s = get_session(req.session_id)
if not s:
raise AppException(404, "session not found", "SESSION_NOT_FOUND")
log_user_reject(req.session_id, req.candidate, req.reason)
cands = generate_candidates(s["original_query"], s["history_candidates"] + [req.candidate], model_name=s.get("model_name"))
update_session_add_candidates(req.session_id, cands)
return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})
class QueryReq(BaseModel):
query: str
session_id: str | None = None
@app.post("/query", tags=["opro-legacy"])
def query(req: QueryReq):
if req.session_id:
s = get_session(req.session_id)
if not s:
raise AppException(404, "session not found", "SESSION_NOT_FOUND")
cands = generate_candidates(s["original_query"], s["history_candidates"], model_name=s.get("model_name"))
update_session_add_candidates(req.session_id, cands)
return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})
else:
sid = create_session(req.query)
log_chat_message(sid, "user", req.query)
cands = generate_candidates(req.query, [], model_name=get_session(sid).get("model_name"))
update_session_add_candidates(sid, cands)
return ok({"session_id": sid, "round": 0, "candidates": cands})
app.mount("/ui", StaticFiles(directory="frontend", html=True), name="static")
@app.get("/", tags=["ui"])
def root():
return RedirectResponse(url="/ui/")
@app.get("/health", tags=["health"])
def health():
return ok({"status": "ok", "version": config.APP_VERSION})
@app.get("/version", tags=["health"])
def version():
return ok({"version": config.APP_VERSION})
# @app.get("/ui/react", tags=["ui"])
# def ui_react():
# return FileResponse("frontend/react/index.html")
# @app.get("/ui/offline", tags=["ui"])
# def ui_offline():
# return FileResponse("frontend/ui_offline.html")
@app.get("/react", tags=["ui"])
def react_root():
return FileResponse("frontend/react/index.html")
@app.get("/sessions", tags=["sessions"])
def sessions():
from .opro.session_state import SESSIONS
return ok({"sessions": [{
"session_id": sid,
"round": s.get("round", 0),
"selected_prompt": s.get("selected_prompt"),
"original_query": s.get("original_query")
} for sid, s in SESSIONS.items()]})
@app.get("/session/{sid}", tags=["sessions"])
def session_detail(sid: str):
s = get_session(sid)
if not s:
raise AppException(404, "session not found", "SESSION_NOT_FOUND")
return ok({
"session_id": sid,
"round": s["round"],
"original_query": s["original_query"],
"selected_prompt": s["selected_prompt"],
"candidates": s["history_candidates"],
"user_feedback": s["user_feedback"],
"rejected": s["rejected"],
"history": s["chat_history"],
})
class MessageReq(BaseModel):
session_id: str
message: str
@app.post("/message", tags=["chat"])
def message(req: MessageReq):
s = get_session(req.session_id)
if not s:
raise AppException(404, "session not found", "SESSION_NOT_FOUND")
log_chat_message(req.session_id, "user", req.message)
base_prompt = s.get("selected_prompt") or s["original_query"]
full_prompt = base_prompt + "\n\n" + req.message
try:
ans = call_qwen(full_prompt, temperature=0.3, max_tokens=1024, model_name=s.get("model_name"))
except Exception as e:
raise AppException(400, f"ollama error: {e}", "OLLAMA_ERROR")
log_chat_message(req.session_id, "assistant", ans)
return ok({"session_id": req.session_id, "answer": ans, "history": s["chat_history"]})
class QueryFromMsgReq(BaseModel):
session_id: str
@app.post("/query_from_message", tags=["opro-legacy"])
def query_from_message(req: QueryFromMsgReq):
s = get_session(req.session_id)
if not s:
raise AppException(404, "session not found", "SESSION_NOT_FOUND")
last_user = None
for m in reversed(s.get("chat_history", [])):
if m.get("role") == "user" and m.get("content"):
last_user = m["content"]
break
base = last_user or s["original_query"]
cands = generate_candidates(base, s["history_candidates"], model_name=s.get("model_name"))
update_session_add_candidates(req.session_id, cands)
return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})
class AnswerReq(BaseModel):
query: str
@app.post("/answer", tags=["opro-legacy"])
def answer(req: AnswerReq):
sid = create_session(req.query)
log_chat_message(sid, "user", req.query)
ans = call_qwen(req.query, temperature=0.2, max_tokens=1024)
log_chat_message(sid, "assistant", ans)
cands = generate_candidates(req.query, [])
update_session_add_candidates(sid, cands)
return ok({"session_id": sid, "answer": ans, "candidates": cands})
@app.get("/models", tags=["models"])
def models():
return ok({"models": list_models()})
@app.post("/set_model", tags=["models"])
def set_model(req: SetModelReq):
s = get_session(req.session_id)
if not s:
raise AppException(404, "session not found", "SESSION_NOT_FOUND")
avail = set(list_models() or [])
if req.model_name not in avail:
raise AppException(400, f"model not available: {req.model_name}", "MODEL_NOT_AVAILABLE")
set_session_model(req.session_id, req.model_name)
return ok({"session_id": req.session_id, "model_name": req.model_name})
# ============================================================================
# TRUE OPRO ENDPOINTS (System Instruction Optimization)
# ============================================================================
# Session Management
@app.post("/opro/session/create", tags=["opro-true"])
def opro_create_session(session_name: str = None):
"""
Create a new OPRO session that can contain multiple runs.
"""
session_id = create_opro_session(session_name=session_name)
session = get_opro_session(session_id)
return ok({
"session_id": session_id,
"session_name": session["session_name"],
"num_runs": len(session["run_ids"])
})
@app.get("/opro/sessions", tags=["opro-true"])
def opro_list_sessions():
"""
List all OPRO sessions.
"""
sessions = list_opro_sessions()
return ok({"sessions": sessions})
@app.get("/opro/session/{session_id}", tags=["opro-true"])
def opro_get_session(session_id: str):
"""
Get detailed information about an OPRO session.
"""
session = get_opro_session(session_id)
if not session:
raise AppException(404, "Session not found", "SESSION_NOT_FOUND")
# Get all runs in this session
runs = list_opro_runs(session_id=session_id)
return ok({
"session_id": session_id,
"session_name": session["session_name"],
"created_at": session["created_at"],
"num_runs": len(session["run_ids"]),
"runs": runs
})
# Run Management
@app.post("/opro/create", tags=["opro-true"])
def opro_create_run(req: CreateOPRORunReq):
"""
Create a new OPRO optimization run.
This starts a new system instruction optimization process for a given task.
Optionally can be associated with a session.
"""
# Convert test cases from Pydantic models to tuples
test_cases = None
if req.test_cases:
test_cases = [(tc.input, tc.expected_output) for tc in req.test_cases]
run_id = create_opro_run(
task_description=req.task_description,
test_cases=test_cases,
model_name=req.model_name,
session_id=req.session_id
)
run = get_opro_run(run_id)
return ok({
"run_id": run_id,
"task_description": run["task_description"],
"num_test_cases": len(run["test_cases"]),
"iteration": run["iteration"],
"status": run["status"],
"session_id": run.get("session_id")
})
@app.post("/opro/iterate", tags=["opro-true"])
def opro_iterate(req: OPROIterateReq):
"""
Run one OPRO iteration: generate new system instruction candidates.
This generates optimized system instructions based on the performance trajectory.
"""
run = get_opro_run(req.run_id)
if not run:
raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
# Get trajectory for optimization
trajectory = get_opro_trajectory(req.run_id)
# Generate candidates
top_k = req.top_k or config.TOP_K
try:
candidates = generate_system_instruction_candidates(
task_description=run["task_description"],
trajectory=trajectory if trajectory else None,
top_k=top_k,
model_name=run["model_name"]
)
except Exception as e:
raise AppException(500, f"Failed to generate candidates: {e}", "GENERATION_ERROR")
# Update run with new candidates
update_opro_iteration(req.run_id, candidates)
return ok({
"run_id": req.run_id,
"iteration": run["iteration"] + 1,
"candidates": candidates,
"num_candidates": len(candidates),
"best_score": run["best_score"]
})
@app.post("/opro/evaluate", tags=["opro-true"])
def opro_evaluate(req: OPROEvaluateReq):
"""
Evaluate a system instruction on the test cases.
This scores the instruction and updates the performance trajectory.
If no test cases are defined, uses a default score of 0.5 to indicate user selection.
"""
run = get_opro_run(req.run_id)
if not run:
raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
# Evaluate the instruction if test cases exist
if run["test_cases"] and len(run["test_cases"]) > 0:
try:
score = evaluate_system_instruction(
system_instruction=req.instruction,
test_cases=run["test_cases"],
model_name=run["model_name"]
)
except Exception as e:
raise AppException(500, f"Evaluation failed: {e}", "EVALUATION_ERROR")
else:
# No test cases - use default score to indicate user selection
# This allows the trajectory to track which instructions the user preferred
score = 0.5
# Add to trajectory
add_opro_evaluation(req.run_id, req.instruction, score)
# Get updated run info
run = get_opro_run(req.run_id)
return ok({
"run_id": req.run_id,
"instruction": req.instruction,
"score": score,
"best_score": run["best_score"],
"is_new_best": score == run["best_score"] and score > 0,
"has_test_cases": len(run["test_cases"]) > 0
})
@app.get("/opro/runs", tags=["opro-true"])
def opro_list_runs():
"""
List all OPRO optimization runs.
"""
runs = list_opro_runs()
return ok({"runs": runs, "total": len(runs)})
@app.get("/opro/run/{run_id}", tags=["opro-true"])
def opro_get_run(run_id: str):
"""
Get detailed information about an OPRO run.
"""
run = get_opro_run(run_id)
if not run:
raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
# Get sorted trajectory
trajectory = get_opro_trajectory(run_id)
return ok({
"run_id": run_id,
"task_description": run["task_description"],
"iteration": run["iteration"],
"status": run["status"],
"best_score": run["best_score"],
"best_instruction": run["best_instruction"],
"num_test_cases": len(run["test_cases"]),
"test_cases": [{"input": tc[0], "expected_output": tc[1]} for tc in run["test_cases"]],
"trajectory": [{"instruction": inst, "score": score} for inst, score in trajectory[:10]], # Top 10
"current_candidates": run["current_candidates"]
})
@app.post("/opro/test_cases", tags=["opro-true"])
def opro_add_test_cases(req: OPROAddTestCasesReq):
"""
Add or update test cases for an OPRO run.
"""
run = get_opro_run(req.run_id)
if not run:
raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
# Convert test cases
test_cases = [(tc.input, tc.expected_output) for tc in req.test_cases]
# Update test cases
set_opro_test_cases(req.run_id, test_cases)
return ok({
"run_id": req.run_id,
"num_test_cases": len(test_cases),
"test_cases": [{"input": tc[0], "expected_output": tc[1]} for tc in test_cases]
})
@app.post("/opro/generate_and_evaluate", tags=["opro-true"])
def opro_generate_and_evaluate(req: OPROGenerateAndEvaluateReq):
"""
Generate candidates and auto-evaluate them (for chat-like UX).
This is the main endpoint for the chat interface. It:
1. Generates candidates based on trajectory
2. Auto-evaluates them (if test cases exist and auto_evaluate=True)
3. Returns top-k sorted by score (or diversity if no evaluation)
"""
run = get_opro_run(req.run_id)
if not run:
raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
top_k = req.top_k or config.TOP_K
pool_size = req.pool_size or config.GENERATION_POOL_SIZE
# Get trajectory for optimization
trajectory = get_opro_trajectory(req.run_id)
# Generate candidates
try:
candidates = generate_system_instruction_candidates(
task_description=run["task_description"],
trajectory=trajectory if trajectory else None,
top_k=pool_size, # Generate pool_size candidates first
pool_size=pool_size,
model_name=run["model_name"]
)
except Exception as e:
raise AppException(500, f"Failed to generate candidates: {e}", "GENERATION_ERROR")
# Decide whether to evaluate
should_evaluate = req.auto_evaluate and len(run["test_cases"]) > 0
if should_evaluate:
# Auto-evaluate all candidates
scored_candidates = []
for candidate in candidates:
try:
score = evaluate_system_instruction(
system_instruction=candidate,
test_cases=run["test_cases"],
model_name=run["model_name"]
)
scored_candidates.append({"instruction": candidate, "score": score})
# Add to trajectory
add_opro_evaluation(req.run_id, candidate, score)
except Exception as e:
# If evaluation fails, assign score 0
scored_candidates.append({"instruction": candidate, "score": 0.0})
# Sort by score (highest first)
scored_candidates.sort(key=lambda x: x["score"], reverse=True)
# Return top-k
top_candidates = scored_candidates[:top_k]
# Update iteration
update_opro_iteration(req.run_id, [c["instruction"] for c in top_candidates])
return ok({
"run_id": req.run_id,
"candidates": top_candidates,
"iteration": run["iteration"] + 1,
"evaluated": True,
"best_score": run["best_score"]
})
else:
# No evaluation - use diversity-based selection (already done by clustering)
# Just return the candidates without scores
top_candidates = [
{"instruction": candidate, "score": None}
for candidate in candidates[:top_k]
]
# Update iteration
update_opro_iteration(req.run_id, [c["instruction"] for c in top_candidates])
return ok({
"run_id": req.run_id,
"candidates": top_candidates,
"iteration": run["iteration"] + 1,
"evaluated": False,
"best_score": run["best_score"]
})
@app.post("/opro/execute", tags=["opro-true"])
def opro_execute(req: OPROExecuteReq):
"""
Execute a system instruction with user input.
This uses the selected instruction as a system prompt and calls the LLM.
"""
try:
# Construct full prompt with system instruction
full_prompt = f"{req.instruction}\n\n{req.user_input}"
# Call LLM
response = call_qwen(
full_prompt,
temperature=0.2,
max_tokens=1024,
model_name=req.model_name
)
return ok({
"instruction": req.instruction,
"user_input": req.user_input,
"response": response
})
except Exception as e:
raise AppException(500, f"Execution failed: {e}", "EXECUTION_ERROR")
@app.post("/opro/refine", tags=["opro-true"])
def opro_refine(req: OPRORefineReq):
"""
Simple iterative refinement based on user selection (NOT OPRO).
This generates new candidates based on the selected instruction while avoiding rejected ones.
No scoring, no trajectory - just straightforward refinement based on user preference.
"""
run = get_opro_run(req.run_id)
if not run:
raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
top_k = req.top_k or config.TOP_K
pool_size = req.pool_size or config.GENERATION_POOL_SIZE
try:
candidates = refine_instruction_candidates(
task_description=run["task_description"],
selected_instruction=req.selected_instruction,
rejected_instructions=req.rejected_instructions,
top_k=top_k,
pool_size=pool_size,
model_name=run["model_name"]
)
# Update iteration counter
update_opro_iteration(req.run_id, candidates)
# Get updated run info
run = get_opro_run(req.run_id)
return ok({
"run_id": req.run_id,
"iteration": run["iteration"],
"candidates": [{"instruction": c, "score": None} for c in candidates],
"task_description": run["task_description"]
})
except Exception as e:
raise AppException(500, f"Refinement failed: {e}", "REFINEMENT_ERROR")