from fastapi import FastAPI, HTTPException, Request from fastapi.responses import RedirectResponse, FileResponse, JSONResponse from fastapi.staticfiles import StaticFiles from pydantic import BaseModel from typing import List, Tuple, Optional import config # Legacy session management (query rewriting) from .opro.session_state import create_session, get_session, update_session_add_candidates, log_user_choice from .opro.session_state import log_user_reject from .opro.session_state import set_selected_prompt, log_chat_message from .opro.session_state import set_session_model from .opro.session_state import USER_FEEDBACK_LOG # True OPRO session management from .opro.session_state import ( create_opro_session, get_opro_session, list_opro_sessions, create_opro_run, get_opro_run, update_opro_iteration, add_opro_evaluation, get_opro_trajectory, set_opro_test_cases, complete_opro_run, list_opro_runs ) # Optimization functions from .opro.user_prompt_optimizer import generate_candidates from .opro.user_prompt_optimizer import ( generate_system_instruction_candidates, evaluate_system_instruction ) from .opro.ollama_client import call_qwen from .opro.ollama_client import list_models from fastapi.middleware.cors import CORSMiddleware app = FastAPI( title=config.APP_TITLE, description=config.APP_DESCRIPTION, version=config.APP_VERSION, contact=config.APP_CONTACT, openapi_tags=[ {"name": "health", "description": "健康检查"}, {"name": "models", "description": "模型列表与设置"}, {"name": "sessions", "description": "会话管理(旧版查询重写)"}, {"name": "opro-legacy", "description": "旧版提示优化(查询重写)"}, {"name": "opro-true", "description": "真正的OPRO(系统指令优化)"}, {"name": "chat", "description": "会话聊天"}, {"name": "ui", "description": "静态页面"} ] ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) MAX_ROUNDS = 3 def ok(data=None): return JSONResponse({"success": True, "data": data}) class AppException(HTTPException): def __init__(self, status_code: int, detail: str, error_code: str): super().__init__(status_code=status_code, detail=detail) self.error_code = error_code @app.exception_handler(AppException) def _app_exc_handler(request: Request, exc: AppException): return JSONResponse(status_code=exc.status_code, content={ "success": False, "error": str(exc.detail), "error_code": exc.error_code }) @app.exception_handler(HTTPException) def _http_exc_handler(request: Request, exc: HTTPException): return JSONResponse(status_code=exc.status_code, content={ "success": False, "error": str(exc.detail), "error_code": "HTTP_ERROR" }) @app.exception_handler(Exception) def _generic_exc_handler(request: Request, exc: Exception): return JSONResponse(status_code=500, content={ "success": False, "error": "internal error", "error_code": "INTERNAL_ERROR" }) class StartReq(BaseModel): query: str class NextReq(BaseModel): session_id: str class SelectReq(BaseModel): session_id: str choice: str class RejectReq(BaseModel): session_id: str candidate: str reason: str | None = None class SetModelReq(BaseModel): session_id: str model_name: str # ============================================================================ # TRUE OPRO REQUEST MODELS # ============================================================================ class TestCase(BaseModel): """A single test case for OPRO evaluation.""" input: str expected_output: str class CreateOPRORunReq(BaseModel): """Request to create a new OPRO optimization run.""" task_description: str test_cases: Optional[List[TestCase]] = None model_name: Optional[str] = None session_id: Optional[str] = None # Optional session to associate with class OPROIterateReq(BaseModel): """Request to run one OPRO iteration.""" run_id: str top_k: Optional[int] = None class OPROEvaluateReq(BaseModel): """Request to evaluate a system instruction.""" run_id: str instruction: str class OPROAddTestCasesReq(BaseModel): """Request to add test cases to an OPRO run.""" run_id: str test_cases: List[TestCase] class OPROGenerateAndEvaluateReq(BaseModel): """Request to generate and auto-evaluate candidates (for chat-like UX).""" run_id: str top_k: Optional[int] = None pool_size: Optional[int] = None auto_evaluate: Optional[bool] = True # If False, use diversity-based selection only class OPROExecuteReq(BaseModel): """Request to execute a system instruction with user input.""" instruction: str user_input: str model_name: Optional[str] = None # ============================================================================ # LEGACY ENDPOINTS (Query Rewriting - NOT true OPRO) # ============================================================================ @app.post("/start", tags=["opro-legacy"]) def start(req: StartReq): sid = create_session(req.query) cands = generate_candidates(req.query, [], model_name=get_session(sid).get("model_name")) update_session_add_candidates(sid, cands) return ok({"session_id": sid, "round": 0, "candidates": cands}) @app.post("/next", tags=["opro-legacy"]) def next_round(req: NextReq): s = get_session(req.session_id) if not s: raise AppException(404, "session not found", "SESSION_NOT_FOUND") if s["round"] >= MAX_ROUNDS: ans = call_qwen(s["original_query"], temperature=0.3, max_tokens=512) return ok({"final": True, "answer": ans}) cands = generate_candidates(s["original_query"], s["history_candidates"], model_name=s.get("model_name")) update_session_add_candidates(req.session_id, cands) return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands}) @app.post("/select", tags=["opro-legacy"]) def select(req: SelectReq): s = get_session(req.session_id) if not s: raise AppException(404, "session not found", "SESSION_NOT_FOUND") log_user_choice(req.session_id, req.choice) set_selected_prompt(req.session_id, req.choice) log_chat_message(req.session_id, "system", req.choice) try: ans = call_qwen(req.choice, temperature=0.2, max_tokens=1024, model_name=s.get("model_name")) except Exception as e: raise AppException(400, f"ollama error: {e}", "OLLAMA_ERROR") log_chat_message(req.session_id, "assistant", ans) try: import os, json os.makedirs("outputs", exist_ok=True) with open("outputs/user_feedback.jsonl", "a", encoding="utf-8") as f: f.write(json.dumps({ "session_id": req.session_id, "round": s["round"], "choice": req.choice, "answer": ans }, ensure_ascii=False) + "\n") except Exception: pass return ok({"prompt": req.choice, "answer": ans}) @app.post("/reject", tags=["opro-legacy"]) def reject(req: RejectReq): s = get_session(req.session_id) if not s: raise AppException(404, "session not found", "SESSION_NOT_FOUND") log_user_reject(req.session_id, req.candidate, req.reason) cands = generate_candidates(s["original_query"], s["history_candidates"] + [req.candidate], model_name=s.get("model_name")) update_session_add_candidates(req.session_id, cands) return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands}) class QueryReq(BaseModel): query: str session_id: str | None = None @app.post("/query", tags=["opro-legacy"]) def query(req: QueryReq): if req.session_id: s = get_session(req.session_id) if not s: raise AppException(404, "session not found", "SESSION_NOT_FOUND") cands = generate_candidates(s["original_query"], s["history_candidates"], model_name=s.get("model_name")) update_session_add_candidates(req.session_id, cands) return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands}) else: sid = create_session(req.query) log_chat_message(sid, "user", req.query) cands = generate_candidates(req.query, [], model_name=get_session(sid).get("model_name")) update_session_add_candidates(sid, cands) return ok({"session_id": sid, "round": 0, "candidates": cands}) app.mount("/ui", StaticFiles(directory="frontend", html=True), name="static") @app.get("/", tags=["ui"]) def root(): return RedirectResponse(url="/ui/") @app.get("/health", tags=["health"]) def health(): return ok({"status": "ok", "version": config.APP_VERSION}) @app.get("/version", tags=["health"]) def version(): return ok({"version": config.APP_VERSION}) # @app.get("/ui/react", tags=["ui"]) # def ui_react(): # return FileResponse("frontend/react/index.html") # @app.get("/ui/offline", tags=["ui"]) # def ui_offline(): # return FileResponse("frontend/ui_offline.html") @app.get("/react", tags=["ui"]) def react_root(): return FileResponse("frontend/react/index.html") @app.get("/sessions", tags=["sessions"]) def sessions(): from .opro.session_state import SESSIONS return ok({"sessions": [{ "session_id": sid, "round": s.get("round", 0), "selected_prompt": s.get("selected_prompt"), "original_query": s.get("original_query") } for sid, s in SESSIONS.items()]}) @app.get("/session/{sid}", tags=["sessions"]) def session_detail(sid: str): s = get_session(sid) if not s: raise AppException(404, "session not found", "SESSION_NOT_FOUND") return ok({ "session_id": sid, "round": s["round"], "original_query": s["original_query"], "selected_prompt": s["selected_prompt"], "candidates": s["history_candidates"], "user_feedback": s["user_feedback"], "rejected": s["rejected"], "history": s["chat_history"], }) class MessageReq(BaseModel): session_id: str message: str @app.post("/message", tags=["chat"]) def message(req: MessageReq): s = get_session(req.session_id) if not s: raise AppException(404, "session not found", "SESSION_NOT_FOUND") log_chat_message(req.session_id, "user", req.message) base_prompt = s.get("selected_prompt") or s["original_query"] full_prompt = base_prompt + "\n\n" + req.message try: ans = call_qwen(full_prompt, temperature=0.3, max_tokens=1024, model_name=s.get("model_name")) except Exception as e: raise AppException(400, f"ollama error: {e}", "OLLAMA_ERROR") log_chat_message(req.session_id, "assistant", ans) return ok({"session_id": req.session_id, "answer": ans, "history": s["chat_history"]}) class QueryFromMsgReq(BaseModel): session_id: str @app.post("/query_from_message", tags=["opro-legacy"]) def query_from_message(req: QueryFromMsgReq): s = get_session(req.session_id) if not s: raise AppException(404, "session not found", "SESSION_NOT_FOUND") last_user = None for m in reversed(s.get("chat_history", [])): if m.get("role") == "user" and m.get("content"): last_user = m["content"] break base = last_user or s["original_query"] cands = generate_candidates(base, s["history_candidates"], model_name=s.get("model_name")) update_session_add_candidates(req.session_id, cands) return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands}) class AnswerReq(BaseModel): query: str @app.post("/answer", tags=["opro-legacy"]) def answer(req: AnswerReq): sid = create_session(req.query) log_chat_message(sid, "user", req.query) ans = call_qwen(req.query, temperature=0.2, max_tokens=1024) log_chat_message(sid, "assistant", ans) cands = generate_candidates(req.query, []) update_session_add_candidates(sid, cands) return ok({"session_id": sid, "answer": ans, "candidates": cands}) @app.get("/models", tags=["models"]) def models(): return ok({"models": list_models()}) @app.post("/set_model", tags=["models"]) def set_model(req: SetModelReq): s = get_session(req.session_id) if not s: raise AppException(404, "session not found", "SESSION_NOT_FOUND") avail = set(list_models() or []) if req.model_name not in avail: raise AppException(400, f"model not available: {req.model_name}", "MODEL_NOT_AVAILABLE") set_session_model(req.session_id, req.model_name) return ok({"session_id": req.session_id, "model_name": req.model_name}) # ============================================================================ # TRUE OPRO ENDPOINTS (System Instruction Optimization) # ============================================================================ # Session Management @app.post("/opro/session/create", tags=["opro-true"]) def opro_create_session(session_name: str = None): """ Create a new OPRO session that can contain multiple runs. """ session_id = create_opro_session(session_name=session_name) session = get_opro_session(session_id) return ok({ "session_id": session_id, "session_name": session["session_name"], "num_runs": len(session["run_ids"]) }) @app.get("/opro/sessions", tags=["opro-true"]) def opro_list_sessions(): """ List all OPRO sessions. """ sessions = list_opro_sessions() return ok({"sessions": sessions}) @app.get("/opro/session/{session_id}", tags=["opro-true"]) def opro_get_session(session_id: str): """ Get detailed information about an OPRO session. """ session = get_opro_session(session_id) if not session: raise AppException(404, "Session not found", "SESSION_NOT_FOUND") # Get all runs in this session runs = list_opro_runs(session_id=session_id) return ok({ "session_id": session_id, "session_name": session["session_name"], "created_at": session["created_at"], "num_runs": len(session["run_ids"]), "runs": runs }) # Run Management @app.post("/opro/create", tags=["opro-true"]) def opro_create_run(req: CreateOPRORunReq): """ Create a new OPRO optimization run. This starts a new system instruction optimization process for a given task. Optionally can be associated with a session. """ # Convert test cases from Pydantic models to tuples test_cases = None if req.test_cases: test_cases = [(tc.input, tc.expected_output) for tc in req.test_cases] run_id = create_opro_run( task_description=req.task_description, test_cases=test_cases, model_name=req.model_name, session_id=req.session_id ) run = get_opro_run(run_id) return ok({ "run_id": run_id, "task_description": run["task_description"], "num_test_cases": len(run["test_cases"]), "iteration": run["iteration"], "status": run["status"], "session_id": run.get("session_id") }) @app.post("/opro/iterate", tags=["opro-true"]) def opro_iterate(req: OPROIterateReq): """ Run one OPRO iteration: generate new system instruction candidates. This generates optimized system instructions based on the performance trajectory. """ run = get_opro_run(req.run_id) if not run: raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND") # Get trajectory for optimization trajectory = get_opro_trajectory(req.run_id) # Generate candidates top_k = req.top_k or config.TOP_K try: candidates = generate_system_instruction_candidates( task_description=run["task_description"], trajectory=trajectory if trajectory else None, top_k=top_k, model_name=run["model_name"] ) except Exception as e: raise AppException(500, f"Failed to generate candidates: {e}", "GENERATION_ERROR") # Update run with new candidates update_opro_iteration(req.run_id, candidates) return ok({ "run_id": req.run_id, "iteration": run["iteration"] + 1, "candidates": candidates, "num_candidates": len(candidates), "best_score": run["best_score"] }) @app.post("/opro/evaluate", tags=["opro-true"]) def opro_evaluate(req: OPROEvaluateReq): """ Evaluate a system instruction on the test cases. This scores the instruction and updates the performance trajectory. """ run = get_opro_run(req.run_id) if not run: raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND") if not run["test_cases"]: raise AppException(400, "No test cases defined for this run", "NO_TEST_CASES") # Evaluate the instruction try: score = evaluate_system_instruction( system_instruction=req.instruction, test_cases=run["test_cases"], model_name=run["model_name"] ) except Exception as e: raise AppException(500, f"Evaluation failed: {e}", "EVALUATION_ERROR") # Add to trajectory add_opro_evaluation(req.run_id, req.instruction, score) # Get updated run info run = get_opro_run(req.run_id) return ok({ "run_id": req.run_id, "instruction": req.instruction, "score": score, "best_score": run["best_score"], "is_new_best": score == run["best_score"] and score > 0 }) @app.get("/opro/runs", tags=["opro-true"]) def opro_list_runs(): """ List all OPRO optimization runs. """ runs = list_opro_runs() return ok({"runs": runs, "total": len(runs)}) @app.get("/opro/run/{run_id}", tags=["opro-true"]) def opro_get_run(run_id: str): """ Get detailed information about an OPRO run. """ run = get_opro_run(run_id) if not run: raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND") # Get sorted trajectory trajectory = get_opro_trajectory(run_id) return ok({ "run_id": run_id, "task_description": run["task_description"], "iteration": run["iteration"], "status": run["status"], "best_score": run["best_score"], "best_instruction": run["best_instruction"], "num_test_cases": len(run["test_cases"]), "test_cases": [{"input": tc[0], "expected_output": tc[1]} for tc in run["test_cases"]], "trajectory": [{"instruction": inst, "score": score} for inst, score in trajectory[:10]], # Top 10 "current_candidates": run["current_candidates"] }) @app.post("/opro/test_cases", tags=["opro-true"]) def opro_add_test_cases(req: OPROAddTestCasesReq): """ Add or update test cases for an OPRO run. """ run = get_opro_run(req.run_id) if not run: raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND") # Convert test cases test_cases = [(tc.input, tc.expected_output) for tc in req.test_cases] # Update test cases set_opro_test_cases(req.run_id, test_cases) return ok({ "run_id": req.run_id, "num_test_cases": len(test_cases), "test_cases": [{"input": tc[0], "expected_output": tc[1]} for tc in test_cases] }) @app.post("/opro/generate_and_evaluate", tags=["opro-true"]) def opro_generate_and_evaluate(req: OPROGenerateAndEvaluateReq): """ Generate candidates and auto-evaluate them (for chat-like UX). This is the main endpoint for the chat interface. It: 1. Generates candidates based on trajectory 2. Auto-evaluates them (if test cases exist and auto_evaluate=True) 3. Returns top-k sorted by score (or diversity if no evaluation) """ run = get_opro_run(req.run_id) if not run: raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND") top_k = req.top_k or config.TOP_K pool_size = req.pool_size or config.GENERATION_POOL_SIZE # Get trajectory for optimization trajectory = get_opro_trajectory(req.run_id) # Generate candidates try: candidates = generate_system_instruction_candidates( task_description=run["task_description"], trajectory=trajectory if trajectory else None, top_k=pool_size, # Generate pool_size candidates first pool_size=pool_size, model_name=run["model_name"] ) except Exception as e: raise AppException(500, f"Failed to generate candidates: {e}", "GENERATION_ERROR") # Decide whether to evaluate should_evaluate = req.auto_evaluate and len(run["test_cases"]) > 0 if should_evaluate: # Auto-evaluate all candidates scored_candidates = [] for candidate in candidates: try: score = evaluate_system_instruction( system_instruction=candidate, test_cases=run["test_cases"], model_name=run["model_name"] ) scored_candidates.append({"instruction": candidate, "score": score}) # Add to trajectory add_opro_evaluation(req.run_id, candidate, score) except Exception as e: # If evaluation fails, assign score 0 scored_candidates.append({"instruction": candidate, "score": 0.0}) # Sort by score (highest first) scored_candidates.sort(key=lambda x: x["score"], reverse=True) # Return top-k top_candidates = scored_candidates[:top_k] # Update iteration update_opro_iteration(req.run_id, [c["instruction"] for c in top_candidates]) return ok({ "run_id": req.run_id, "candidates": top_candidates, "iteration": run["iteration"] + 1, "evaluated": True, "best_score": run["best_score"] }) else: # No evaluation - use diversity-based selection (already done by clustering) # Just return the candidates without scores top_candidates = [ {"instruction": candidate, "score": None} for candidate in candidates[:top_k] ] # Update iteration update_opro_iteration(req.run_id, [c["instruction"] for c in top_candidates]) return ok({ "run_id": req.run_id, "candidates": top_candidates, "iteration": run["iteration"] + 1, "evaluated": False, "best_score": run["best_score"] }) @app.post("/opro/execute", tags=["opro-true"]) def opro_execute(req: OPROExecuteReq): """ Execute a system instruction with user input. This uses the selected instruction as a system prompt and calls the LLM. """ try: # Construct full prompt with system instruction full_prompt = f"{req.instruction}\n\n{req.user_input}" # Call LLM response = call_qwen( full_prompt, temperature=0.2, max_tokens=1024, model_name=req.model_name ) return ok({ "instruction": req.instruction, "user_input": req.user_input, "response": response }) except Exception as e: raise AppException(500, f"Execution failed: {e}", "EXECUTION_ERROR")