opro_demo/_qwen_xinference_demo/api.py

from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import RedirectResponse, FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from typing import List, Tuple, Optional
import config

# Legacy session management (query rewriting)
from .opro.session_state import create_session, get_session, update_session_add_candidates, log_user_choice
from .opro.session_state import log_user_reject
from .opro.session_state import set_selected_prompt, log_chat_message
from .opro.session_state import set_session_model
from .opro.session_state import USER_FEEDBACK_LOG

# True OPRO session management
from .opro.session_state import (
    create_opro_run, get_opro_run, update_opro_iteration,
    add_opro_evaluation, get_opro_trajectory, set_opro_test_cases,
    complete_opro_run, list_opro_runs
)

# Optimization functions
from .opro.user_prompt_optimizer import generate_candidates
from .opro.user_prompt_optimizer import (
    generate_system_instruction_candidates,
    evaluate_system_instruction
)

from .opro.ollama_client import call_qwen
from .opro.ollama_client import list_models

from fastapi.middleware.cors import CORSMiddleware

app = FastAPI(
    title=config.APP_TITLE,
    description=config.APP_DESCRIPTION,
    version=config.APP_VERSION,
    contact=config.APP_CONTACT,
    openapi_tags=[
        {"name": "health", "description": "健康检查"},
        {"name": "models", "description": "模型列表与设置"},
        {"name": "sessions", "description": "会话管理（旧版查询重写）"},
        {"name": "opro-legacy", "description": "旧版提示优化（查询重写）"},
        {"name": "opro-true", "description": "真正的OPRO（系统指令优化）"},
        {"name": "chat", "description": "会话聊天"},
        {"name": "ui", "description": "静态页面"}
    ]
)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
MAX_ROUNDS = 3

def ok(data=None):
    return JSONResponse({"success": True, "data": data})

class AppException(HTTPException):
    def __init__(self, status_code: int, detail: str, error_code: str):
        super().__init__(status_code=status_code, detail=detail)
        self.error_code = error_code

@app.exception_handler(AppException)
def _app_exc_handler(request: Request, exc: AppException):
    return JSONResponse(status_code=exc.status_code, content={
        "success": False,
        "error": str(exc.detail),
        "error_code": exc.error_code
    })

@app.exception_handler(HTTPException)
def _http_exc_handler(request: Request, exc: HTTPException):
    return JSONResponse(status_code=exc.status_code, content={
        "success": False,
        "error": str(exc.detail),
        "error_code": "HTTP_ERROR"
    })

@app.exception_handler(Exception)
def _generic_exc_handler(request: Request, exc: Exception):
    return JSONResponse(status_code=500, content={
        "success": False,
        "error": "internal error",
        "error_code": "INTERNAL_ERROR"
    })

class StartReq(BaseModel):
    query: str

class NextReq(BaseModel):
    session_id: str

class SelectReq(BaseModel):
    session_id: str
    choice: str

class RejectReq(BaseModel):
    session_id: str
    candidate: str
    reason: str | None = None

class SetModelReq(BaseModel):
    session_id: str
    model_name: str


# ============================================================================
# TRUE OPRO REQUEST MODELS
# ============================================================================

class TestCase(BaseModel):
    """A single test case for OPRO evaluation."""
    input: str
    expected_output: str


class CreateOPRORunReq(BaseModel):
    """Request to create a new OPRO optimization run."""
    task_description: str
    test_cases: Optional[List[TestCase]] = None
    model_name: Optional[str] = None


class OPROIterateReq(BaseModel):
    """Request to run one OPRO iteration."""
    run_id: str
    top_k: Optional[int] = None


class OPROEvaluateReq(BaseModel):
    """Request to evaluate a system instruction."""
    run_id: str
    instruction: str


class OPROAddTestCasesReq(BaseModel):
    """Request to add test cases to an OPRO run."""
    run_id: str
    test_cases: List[TestCase]


class OPROGenerateAndEvaluateReq(BaseModel):
    """Request to generate and auto-evaluate candidates (for chat-like UX)."""
    run_id: str
    top_k: Optional[int] = None
    pool_size: Optional[int] = None
    auto_evaluate: Optional[bool] = True  # If False, use diversity-based selection only


class OPROExecuteReq(BaseModel):
    """Request to execute a system instruction with user input."""
    instruction: str
    user_input: str
    model_name: Optional[str] = None


# ============================================================================
# LEGACY ENDPOINTS (Query Rewriting - NOT true OPRO)
# ============================================================================

@app.post("/start", tags=["opro-legacy"])
def start(req: StartReq):
    sid = create_session(req.query)
    cands = generate_candidates(req.query, [], model_name=get_session(sid).get("model_name"))
    update_session_add_candidates(sid, cands)
    return ok({"session_id": sid, "round": 0, "candidates": cands})

@app.post("/next", tags=["opro-legacy"])
def next_round(req: NextReq):
    s = get_session(req.session_id)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")

    if s["round"] >= MAX_ROUNDS:
        ans = call_qwen(s["original_query"], temperature=0.3, max_tokens=512)
        return ok({"final": True, "answer": ans})

    cands = generate_candidates(s["original_query"], s["history_candidates"], model_name=s.get("model_name"))
    update_session_add_candidates(req.session_id, cands)
    return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})

@app.post("/select", tags=["opro-legacy"])
def select(req: SelectReq):
    s = get_session(req.session_id)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")

    log_user_choice(req.session_id, req.choice)
    set_selected_prompt(req.session_id, req.choice)
    log_chat_message(req.session_id, "system", req.choice)
    try:
        ans = call_qwen(req.choice, temperature=0.2, max_tokens=1024, model_name=s.get("model_name"))
    except Exception as e:
        raise AppException(400, f"ollama error: {e}", "OLLAMA_ERROR")
    log_chat_message(req.session_id, "assistant", ans)
    try:
        import os, json
        os.makedirs("outputs", exist_ok=True)
        with open("outputs/user_feedback.jsonl", "a", encoding="utf-8") as f:
            f.write(json.dumps({
                "session_id": req.session_id,
                "round": s["round"],
                "choice": req.choice,
                "answer": ans
            }, ensure_ascii=False) + "\n")
    except Exception:
        pass
    return ok({"prompt": req.choice, "answer": ans})

@app.post("/reject", tags=["opro-legacy"])
def reject(req: RejectReq):
    s = get_session(req.session_id)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
    log_user_reject(req.session_id, req.candidate, req.reason)
    cands = generate_candidates(s["original_query"], s["history_candidates"] + [req.candidate], model_name=s.get("model_name"))
    update_session_add_candidates(req.session_id, cands)
    return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})
class QueryReq(BaseModel):
    query: str
    session_id: str | None = None

@app.post("/query", tags=["opro-legacy"])
def query(req: QueryReq):
    if req.session_id:
        s = get_session(req.session_id)
        if not s:
            raise AppException(404, "session not found", "SESSION_NOT_FOUND")
        cands = generate_candidates(s["original_query"], s["history_candidates"], model_name=s.get("model_name"))
        update_session_add_candidates(req.session_id, cands)
        return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})
    else:
        sid = create_session(req.query)
        log_chat_message(sid, "user", req.query)
        cands = generate_candidates(req.query, [], model_name=get_session(sid).get("model_name"))
        update_session_add_candidates(sid, cands)
        return ok({"session_id": sid, "round": 0, "candidates": cands})
app.mount("/ui", StaticFiles(directory="frontend", html=True), name="static")

@app.get("/", tags=["ui"])
def root():
    return RedirectResponse(url="/ui/")

@app.get("/health", tags=["health"])
def health():
    return ok({"status": "ok", "version": config.APP_VERSION})

@app.get("/version", tags=["health"])
def version():
    return ok({"version": config.APP_VERSION})

# @app.get("/ui/react", tags=["ui"])
# def ui_react():
#     return FileResponse("frontend/react/index.html")

# @app.get("/ui/offline", tags=["ui"])
# def ui_offline():
#     return FileResponse("frontend/ui_offline.html")

@app.get("/react", tags=["ui"])
def react_root():
    return FileResponse("frontend/react/index.html")

@app.get("/sessions", tags=["sessions"])
def sessions():
    from .opro.session_state import SESSIONS
    return ok({"sessions": [{
        "session_id": sid,
        "round": s.get("round", 0),
        "selected_prompt": s.get("selected_prompt"),
        "original_query": s.get("original_query")
    } for sid, s in SESSIONS.items()]})

@app.get("/session/{sid}", tags=["sessions"])
def session_detail(sid: str):
    s = get_session(sid)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
    return ok({
        "session_id": sid,
        "round": s["round"],
        "original_query": s["original_query"],
        "selected_prompt": s["selected_prompt"],
        "candidates": s["history_candidates"],
        "user_feedback": s["user_feedback"],
        "rejected": s["rejected"],
        "history": s["chat_history"],
    })

class MessageReq(BaseModel):
    session_id: str
    message: str

@app.post("/message", tags=["chat"])
def message(req: MessageReq):
    s = get_session(req.session_id)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
    log_chat_message(req.session_id, "user", req.message)
    base_prompt = s.get("selected_prompt") or s["original_query"]
    full_prompt = base_prompt + "\n\n" + req.message
    try:
        ans = call_qwen(full_prompt, temperature=0.3, max_tokens=1024, model_name=s.get("model_name"))
    except Exception as e:
        raise AppException(400, f"ollama error: {e}", "OLLAMA_ERROR")
    log_chat_message(req.session_id, "assistant", ans)
    return ok({"session_id": req.session_id, "answer": ans, "history": s["chat_history"]})

class QueryFromMsgReq(BaseModel):
    session_id: str

@app.post("/query_from_message", tags=["opro-legacy"])
def query_from_message(req: QueryFromMsgReq):
    s = get_session(req.session_id)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
    last_user = None
    for m in reversed(s.get("chat_history", [])):
        if m.get("role") == "user" and m.get("content"):
            last_user = m["content"]
            break
    base = last_user or s["original_query"]
    cands = generate_candidates(base, s["history_candidates"], model_name=s.get("model_name"))
    update_session_add_candidates(req.session_id, cands)
    return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})

class AnswerReq(BaseModel):
    query: str

@app.post("/answer", tags=["opro-legacy"])
def answer(req: AnswerReq):
    sid = create_session(req.query)
    log_chat_message(sid, "user", req.query)
    ans = call_qwen(req.query, temperature=0.2, max_tokens=1024)
    log_chat_message(sid, "assistant", ans)
    cands = generate_candidates(req.query, [])
    update_session_add_candidates(sid, cands)
    return ok({"session_id": sid, "answer": ans, "candidates": cands})

@app.get("/models", tags=["models"])
def models():
    return ok({"models": list_models()})

@app.post("/set_model", tags=["models"])
def set_model(req: SetModelReq):
    s = get_session(req.session_id)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
    avail = set(list_models() or [])
    if req.model_name not in avail:
        raise AppException(400, f"model not available: {req.model_name}", "MODEL_NOT_AVAILABLE")
    set_session_model(req.session_id, req.model_name)
    return ok({"session_id": req.session_id, "model_name": req.model_name})


# ============================================================================
# TRUE OPRO ENDPOINTS (System Instruction Optimization)
# ============================================================================

@app.post("/opro/create", tags=["opro-true"])
def opro_create_run(req: CreateOPRORunReq):
    """
    Create a new OPRO optimization run.

    This starts a new system instruction optimization process for a given task.
    """
    # Convert test cases from Pydantic models to tuples
    test_cases = None
    if req.test_cases:
        test_cases = [(tc.input, tc.expected_output) for tc in req.test_cases]

    run_id = create_opro_run(
        task_description=req.task_description,
        test_cases=test_cases,
        model_name=req.model_name
    )

    run = get_opro_run(run_id)

    return ok({
        "run_id": run_id,
        "task_description": run["task_description"],
        "num_test_cases": len(run["test_cases"]),
        "iteration": run["iteration"],
        "status": run["status"]
    })


@app.post("/opro/iterate", tags=["opro-true"])
def opro_iterate(req: OPROIterateReq):
    """
    Run one OPRO iteration: generate new system instruction candidates.

    This generates optimized system instructions based on the performance trajectory.
    """
    run = get_opro_run(req.run_id)
    if not run:
        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")

    # Get trajectory for optimization
    trajectory = get_opro_trajectory(req.run_id)

    # Generate candidates
    top_k = req.top_k or config.TOP_K
    try:
        candidates = generate_system_instruction_candidates(
            task_description=run["task_description"],
            trajectory=trajectory if trajectory else None,
            top_k=top_k,
            model_name=run["model_name"]
        )
    except Exception as e:
        raise AppException(500, f"Failed to generate candidates: {e}", "GENERATION_ERROR")

    # Update run with new candidates
    update_opro_iteration(req.run_id, candidates)

    return ok({
        "run_id": req.run_id,
        "iteration": run["iteration"] + 1,
        "candidates": candidates,
        "num_candidates": len(candidates),
        "best_score": run["best_score"]
    })


@app.post("/opro/evaluate", tags=["opro-true"])
def opro_evaluate(req: OPROEvaluateReq):
    """
    Evaluate a system instruction on the test cases.

    This scores the instruction and updates the performance trajectory.
    """
    run = get_opro_run(req.run_id)
    if not run:
        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")

    if not run["test_cases"]:
        raise AppException(400, "No test cases defined for this run", "NO_TEST_CASES")

    # Evaluate the instruction
    try:
        score = evaluate_system_instruction(
            system_instruction=req.instruction,
            test_cases=run["test_cases"],
            model_name=run["model_name"]
        )
    except Exception as e:
        raise AppException(500, f"Evaluation failed: {e}", "EVALUATION_ERROR")

    # Add to trajectory
    add_opro_evaluation(req.run_id, req.instruction, score)

    # Get updated run info
    run = get_opro_run(req.run_id)

    return ok({
        "run_id": req.run_id,
        "instruction": req.instruction,
        "score": score,
        "best_score": run["best_score"],
        "is_new_best": score == run["best_score"] and score > 0
    })


@app.get("/opro/runs", tags=["opro-true"])
def opro_list_runs():
    """
    List all OPRO optimization runs.
    """
    runs = list_opro_runs()
    return ok({"runs": runs, "total": len(runs)})


@app.get("/opro/run/{run_id}", tags=["opro-true"])
def opro_get_run(run_id: str):
    """
    Get detailed information about an OPRO run.
    """
    run = get_opro_run(run_id)
    if not run:
        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")

    # Get sorted trajectory
    trajectory = get_opro_trajectory(run_id)

    return ok({
        "run_id": run_id,
        "task_description": run["task_description"],
        "iteration": run["iteration"],
        "status": run["status"],
        "best_score": run["best_score"],
        "best_instruction": run["best_instruction"],
        "num_test_cases": len(run["test_cases"]),
        "test_cases": [{"input": tc[0], "expected_output": tc[1]} for tc in run["test_cases"]],
        "trajectory": [{"instruction": inst, "score": score} for inst, score in trajectory[:10]],  # Top 10
        "current_candidates": run["current_candidates"]
    })


@app.post("/opro/test_cases", tags=["opro-true"])
def opro_add_test_cases(req: OPROAddTestCasesReq):
    """
    Add or update test cases for an OPRO run.
    """
    run = get_opro_run(req.run_id)
    if not run:
        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")

    # Convert test cases
    test_cases = [(tc.input, tc.expected_output) for tc in req.test_cases]

    # Update test cases
    set_opro_test_cases(req.run_id, test_cases)

    return ok({
        "run_id": req.run_id,
        "num_test_cases": len(test_cases),
        "test_cases": [{"input": tc[0], "expected_output": tc[1]} for tc in test_cases]
    })


@app.post("/opro/generate_and_evaluate", tags=["opro-true"])
def opro_generate_and_evaluate(req: OPROGenerateAndEvaluateReq):
    """
    Generate candidates and auto-evaluate them (for chat-like UX).

    This is the main endpoint for the chat interface. It:
    1. Generates candidates based on trajectory
    2. Auto-evaluates them (if test cases exist and auto_evaluate=True)
    3. Returns top-k sorted by score (or diversity if no evaluation)
    """
    run = get_opro_run(req.run_id)
    if not run:
        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")

    top_k = req.top_k or config.TOP_K
    pool_size = req.pool_size or config.GENERATION_POOL_SIZE

    # Get trajectory for optimization
    trajectory = get_opro_trajectory(req.run_id)

    # Generate candidates
    try:
        candidates = generate_system_instruction_candidates(
            task_description=run["task_description"],
            trajectory=trajectory if trajectory else None,
            top_k=pool_size,  # Generate pool_size candidates first
            pool_size=pool_size,
            model_name=run["model_name"]
        )
    except Exception as e:
        raise AppException(500, f"Failed to generate candidates: {e}", "GENERATION_ERROR")

    # Decide whether to evaluate
    should_evaluate = req.auto_evaluate and len(run["test_cases"]) > 0

    if should_evaluate:
        # Auto-evaluate all candidates
        scored_candidates = []
        for candidate in candidates:
            try:
                score = evaluate_system_instruction(
                    system_instruction=candidate,
                    test_cases=run["test_cases"],
                    model_name=run["model_name"]
                )
                scored_candidates.append({"instruction": candidate, "score": score})

                # Add to trajectory
                add_opro_evaluation(req.run_id, candidate, score)
            except Exception as e:
                # If evaluation fails, assign score 0
                scored_candidates.append({"instruction": candidate, "score": 0.0})

        # Sort by score (highest first)
        scored_candidates.sort(key=lambda x: x["score"], reverse=True)

        # Return top-k
        top_candidates = scored_candidates[:top_k]

        # Update iteration
        update_opro_iteration(req.run_id, [c["instruction"] for c in top_candidates])

        return ok({
            "run_id": req.run_id,
            "candidates": top_candidates,
            "iteration": run["iteration"] + 1,
            "evaluated": True,
            "best_score": run["best_score"]
        })
    else:
        # No evaluation - use diversity-based selection (already done by clustering)
        # Just return the candidates without scores
        top_candidates = [
            {"instruction": candidate, "score": None}
            for candidate in candidates[:top_k]
        ]

        # Update iteration
        update_opro_iteration(req.run_id, [c["instruction"] for c in top_candidates])

        return ok({
            "run_id": req.run_id,
            "candidates": top_candidates,
            "iteration": run["iteration"] + 1,
            "evaluated": False,
            "best_score": run["best_score"]
        })


@app.post("/opro/execute", tags=["opro-true"])
def opro_execute(req: OPROExecuteReq):
    """
    Execute a system instruction with user input.

    This uses the selected instruction as a system prompt and calls the LLM.
    """
    try:
        # Construct full prompt with system instruction
        full_prompt = f"{req.instruction}\n\n{req.user_input}"

        # Call LLM
        response = call_qwen(
            full_prompt,
            temperature=0.2,
            max_tokens=1024,
            model_name=req.model_name
        )

        return ok({
            "instruction": req.instruction,
            "user_input": req.user_input,
            "response": response
        })
    except Exception as e:
        raise AppException(500, f"Execution failed: {e}", "EXECUTION_ERROR")
-												原始代码

											
										
										
											2025-12-05 07:11:25 +00:00
+								from fastapi import FastAPI, HTTPException, Request
 								from fastapi.responses import RedirectResponse, FileResponse, JSONResponse
 								from fastapi.staticfiles import StaticFiles
 								from pydantic import BaseModel
-												feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting)
- Implement iterative optimization with performance trajectory
- Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute)
- Create modern Gemini-style chat UI (frontend/opro.html)
- Optimize performance: reduce candidates from 20 to 10 (2x faster)
- Add model selector in UI toolbar
- Add collapsible sidebar with session management
- Add copy button for instructions
- Ensure all generated prompts use simplified Chinese
- Update README with comprehensive documentation
- Add .gitignore for local_docs folder

											
										
										
											2025-12-06 17:24:28 +08:00
+								from typing import List, Tuple, Optional
-												原始代码

											
										
										
											2025-12-05 07:11:25 +00:00
+								import config
-												feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting)
- Implement iterative optimization with performance trajectory
- Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute)
- Create modern Gemini-style chat UI (frontend/opro.html)
- Optimize performance: reduce candidates from 20 to 10 (2x faster)
- Add model selector in UI toolbar
- Add collapsible sidebar with session management
- Add copy button for instructions
- Ensure all generated prompts use simplified Chinese
- Update README with comprehensive documentation
- Add .gitignore for local_docs folder

											
										
										
											2025-12-06 17:24:28 +08:00
+								# Legacy session management (query rewriting)
-												原始代码

											
										
										
											2025-12-05 07:11:25 +00:00
+								from .opro.session_state import create_session, get_session, update_session_add_candidates, log_user_choice
 								from .opro.session_state import log_user_reject
 								from .opro.session_state import set_selected_prompt, log_chat_message
 								from .opro.session_state import set_session_model
 								from .opro.session_state import USER_FEEDBACK_LOG
-												feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting)
- Implement iterative optimization with performance trajectory
- Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute)
- Create modern Gemini-style chat UI (frontend/opro.html)
- Optimize performance: reduce candidates from 20 to 10 (2x faster)
- Add model selector in UI toolbar
- Add collapsible sidebar with session management
- Add copy button for instructions
- Ensure all generated prompts use simplified Chinese
- Update README with comprehensive documentation
- Add .gitignore for local_docs folder

											
										
										
											2025-12-06 17:24:28 +08:00
 								# True OPRO session management
 								from .opro.session_state import (
 								    create_opro_run, get_opro_run, update_opro_iteration,
 								    add_opro_evaluation, get_opro_trajectory, set_opro_test_cases,
 								    complete_opro_run, list_opro_runs
 								)
 								# Optimization functions
-												原始代码

											
										
										
											2025-12-05 07:11:25 +00:00
+								from .opro.user_prompt_optimizer import generate_candidates
-												feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting)
- Implement iterative optimization with performance trajectory
- Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute)
- Create modern Gemini-style chat UI (frontend/opro.html)
- Optimize performance: reduce candidates from 20 to 10 (2x faster)
- Add model selector in UI toolbar
- Add collapsible sidebar with session management
- Add copy button for instructions
- Ensure all generated prompts use simplified Chinese
- Update README with comprehensive documentation
- Add .gitignore for local_docs folder

											
										
										
											2025-12-06 17:24:28 +08:00
+								from .opro.user_prompt_optimizer import (
 								    generate_system_instruction_candidates,
 								    evaluate_system_instruction
 								)
-												原始代码

											
										
										
											2025-12-05 07:11:25 +00:00
+								from .opro.ollama_client import call_qwen
 								from .opro.ollama_client import list_models
 								from fastapi.middleware.cors import CORSMiddleware
 								app = FastAPI(
 								    title=config.APP_TITLE,
 								    description=config.APP_DESCRIPTION,
 								    version=config.APP_VERSION,
 								    contact=config.APP_CONTACT,
 								    openapi_tags=[
 								        {"name": "health", "description": "健康检查"},
 								        {"name": "models", "description": "模型列表与设置"},
-												feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting)
- Implement iterative optimization with performance trajectory
- Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute)
- Create modern Gemini-style chat UI (frontend/opro.html)
- Optimize performance: reduce candidates from 20 to 10 (2x faster)
- Add model selector in UI toolbar
- Add collapsible sidebar with session management
- Add copy button for instructions
- Ensure all generated prompts use simplified Chinese
- Update README with comprehensive documentation
- Add .gitignore for local_docs folder

											
										
										
											2025-12-06 17:24:28 +08:00
+								        {"name": "sessions", "description": "会话管理（旧版查询重写）"},
 								        {"name": "opro-legacy", "description": "旧版提示优化（查询重写）"},
 								        {"name": "opro-true", "description": "真正的OPRO（系统指令优化）"},
-												原始代码

											
										
										
											2025-12-05 07:11:25 +00:00
+								        {"name": "chat", "description": "会话聊天"},
 								        {"name": "ui", "description": "静态页面"}
 								    ]
 								)
 								app.add_middleware(
 								    CORSMiddleware,
 								    allow_origins=["*"],
 								    allow_credentials=True,
 								    allow_methods=["*"],
 								    allow_headers=["*"],
 								)
 								MAX_ROUNDS = 3
 								def ok(data=None):
 								    return JSONResponse({"success": True, "data": data})
 								class AppException(HTTPException):
 								    def __init__(self, status_code: int, detail: str, error_code: str):
 								        super().__init__(status_code=status_code, detail=detail)
 								        self.error_code = error_code
 								@app.exception_handler(AppException)
 								def _app_exc_handler(request: Request, exc: AppException):
 								    return JSONResponse(status_code=exc.status_code, content={
 								        "success": False,
 								        "error": str(exc.detail),
 								        "error_code": exc.error_code
 								    })
 								@app.exception_handler(HTTPException)
 								def _http_exc_handler(request: Request, exc: HTTPException):
 								    return JSONResponse(status_code=exc.status_code, content={
 								        "success": False,
 								        "error": str(exc.detail),
 								        "error_code": "HTTP_ERROR"
 								    })
 								@app.exception_handler(Exception)
 								def _generic_exc_handler(request: Request, exc: Exception):
 								    return JSONResponse(status_code=500, content={
 								        "success": False,
 								        "error": "internal error",
 								        "error_code": "INTERNAL_ERROR"
 								    })
 								class StartReq(BaseModel):
 								    query: str
 								class NextReq(BaseModel):
 								    session_id: str
 								class SelectReq(BaseModel):
 								    session_id: str
 								    choice: str
 								class RejectReq(BaseModel):
 								    session_id: str
 								    candidate: str
 								    reason: str | None = None
 								class SetModelReq(BaseModel):
 								    session_id: str
 								    model_name: str
-												feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting)
- Implement iterative optimization with performance trajectory
- Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute)
- Create modern Gemini-style chat UI (frontend/opro.html)
- Optimize performance: reduce candidates from 20 to 10 (2x faster)
- Add model selector in UI toolbar
- Add collapsible sidebar with session management
- Add copy button for instructions
- Ensure all generated prompts use simplified Chinese
- Update README with comprehensive documentation
- Add .gitignore for local_docs folder

											
										
										
											2025-12-06 17:24:28 +08:00
 								# ============================================================================
 								# TRUE OPRO REQUEST MODELS
 								# ============================================================================
 								class TestCase(BaseModel):
 								    """A single test case for OPRO evaluation."""
 								    input: str
 								    expected_output: str
 								class CreateOPRORunReq(BaseModel):
 								    """Request to create a new OPRO optimization run."""
 								    task_description: str
 								    test_cases: Optional[List[TestCase]] = None
 								    model_name: Optional[str] = None
 								class OPROIterateReq(BaseModel):
 								    """Request to run one OPRO iteration."""
 								    run_id: str
 								    top_k: Optional[int] = None
 								class OPROEvaluateReq(BaseModel):
 								    """Request to evaluate a system instruction."""
 								    run_id: str
 								    instruction: str
 								class OPROAddTestCasesReq(BaseModel):
 								    """Request to add test cases to an OPRO run."""
 								    run_id: str
 								    test_cases: List[TestCase]
 								class OPROGenerateAndEvaluateReq(BaseModel):
 								    """Request to generate and auto-evaluate candidates (for chat-like UX)."""
 								    run_id: str
 								    top_k: Optional[int] = None
 								    pool_size: Optional[int] = None
 								    auto_evaluate: Optional[bool] = True  # If False, use diversity-based selection only
 								class OPROExecuteReq(BaseModel):
 								    """Request to execute a system instruction with user input."""
 								    instruction: str
 								    user_input: str
 								    model_name: Optional[str] = None
 								# ============================================================================
 								# LEGACY ENDPOINTS (Query Rewriting - NOT true OPRO)
 								# ============================================================================
 								@app.post("/start", tags=["opro-legacy"])
-												原始代码

											
										
										
											2025-12-05 07:11:25 +00:00
+								def start(req: StartReq):
 								    sid = create_session(req.query)
 								    cands = generate_candidates(req.query, [], model_name=get_session(sid).get("model_name"))
 								    update_session_add_candidates(sid, cands)
 								    return ok({"session_id": sid, "round": 0, "candidates": cands})
-												feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting)
- Implement iterative optimization with performance trajectory
- Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute)
- Create modern Gemini-style chat UI (frontend/opro.html)
- Optimize performance: reduce candidates from 20 to 10 (2x faster)
- Add model selector in UI toolbar
- Add collapsible sidebar with session management
- Add copy button for instructions
- Ensure all generated prompts use simplified Chinese
- Update README with comprehensive documentation
- Add .gitignore for local_docs folder

											
										
										
											2025-12-06 17:24:28 +08:00
+								@app.post("/next", tags=["opro-legacy"])
-												原始代码

											
										
										
											2025-12-05 07:11:25 +00:00
+								def next_round(req: NextReq):
 								    s = get_session(req.session_id)
 								    if not s:
 								        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
 								    if s["round"] >= MAX_ROUNDS:
 								        ans = call_qwen(s["original_query"], temperature=0.3, max_tokens=512)
 								        return ok({"final": True, "answer": ans})
 								    cands = generate_candidates(s["original_query"], s["history_candidates"], model_name=s.get("model_name"))
 								    update_session_add_candidates(req.session_id, cands)
 								    return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})
-												feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting)
- Implement iterative optimization with performance trajectory
- Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute)
- Create modern Gemini-style chat UI (frontend/opro.html)
- Optimize performance: reduce candidates from 20 to 10 (2x faster)
- Add model selector in UI toolbar
- Add collapsible sidebar with session management
- Add copy button for instructions
- Ensure all generated prompts use simplified Chinese
- Update README with comprehensive documentation
- Add .gitignore for local_docs folder

											
										
										
											2025-12-06 17:24:28 +08:00
+								@app.post("/select", tags=["opro-legacy"])
-												原始代码

											
										
										
											2025-12-05 07:11:25 +00:00
+								def select(req: SelectReq):
 								    s = get_session(req.session_id)
 								    if not s:
 								        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
 								    log_user_choice(req.session_id, req.choice)
 								    set_selected_prompt(req.session_id, req.choice)
 								    log_chat_message(req.session_id, "system", req.choice)
 								    try:
 								        ans = call_qwen(req.choice, temperature=0.2, max_tokens=1024, model_name=s.get("model_name"))
 								    except Exception as e:
 								        raise AppException(400, f"ollama error: {e}", "OLLAMA_ERROR")
 								    log_chat_message(req.session_id, "assistant", ans)
 								    try:
 								        import os, json
 								        os.makedirs("outputs", exist_ok=True)
 								        with open("outputs/user_feedback.jsonl", "a", encoding="utf-8") as f:
 								            f.write(json.dumps({
 								                "session_id": req.session_id,
 								                "round": s["round"],
 								                "choice": req.choice,
 								                "answer": ans
 								            }, ensure_ascii=False) + "\n")
 								    except Exception:
 								        pass
 								    return ok({"prompt": req.choice, "answer": ans})
-												feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting)
- Implement iterative optimization with performance trajectory
- Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute)
- Create modern Gemini-style chat UI (frontend/opro.html)
- Optimize performance: reduce candidates from 20 to 10 (2x faster)
- Add model selector in UI toolbar
- Add collapsible sidebar with session management
- Add copy button for instructions
- Ensure all generated prompts use simplified Chinese
- Update README with comprehensive documentation
- Add .gitignore for local_docs folder

											
										
										
											2025-12-06 17:24:28 +08:00
+								@app.post("/reject", tags=["opro-legacy"])
-												原始代码

											
										
										
											2025-12-05 07:11:25 +00:00
+								def reject(req: RejectReq):
 								    s = get_session(req.session_id)
 								    if not s:
 								        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
 								    log_user_reject(req.session_id, req.candidate, req.reason)
 								    cands = generate_candidates(s["original_query"], s["history_candidates"] + [req.candidate], model_name=s.get("model_name"))
 								    update_session_add_candidates(req.session_id, cands)
 								    return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})
 								class QueryReq(BaseModel):
 								    query: str
 								    session_id: str | None = None
-												feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting)
- Implement iterative optimization with performance trajectory
- Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute)
- Create modern Gemini-style chat UI (frontend/opro.html)
- Optimize performance: reduce candidates from 20 to 10 (2x faster)
- Add model selector in UI toolbar
- Add collapsible sidebar with session management
- Add copy button for instructions
- Ensure all generated prompts use simplified Chinese
- Update README with comprehensive documentation
- Add .gitignore for local_docs folder

											
										
										
											2025-12-06 17:24:28 +08:00
+								@app.post("/query", tags=["opro-legacy"])
-												原始代码

											
										
										
											2025-12-05 07:11:25 +00:00
+								def query(req: QueryReq):
 								    if req.session_id:
 								        s = get_session(req.session_id)
 								        if not s:
 								            raise AppException(404, "session not found", "SESSION_NOT_FOUND")
 								        cands = generate_candidates(s["original_query"], s["history_candidates"], model_name=s.get("model_name"))
 								        update_session_add_candidates(req.session_id, cands)
 								        return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})
 								    else:
 								        sid = create_session(req.query)
 								        log_chat_message(sid, "user", req.query)
 								        cands = generate_candidates(req.query, [], model_name=get_session(sid).get("model_name"))
 								        update_session_add_candidates(sid, cands)
 								        return ok({"session_id": sid, "round": 0, "candidates": cands})
 								app.mount("/ui", StaticFiles(directory="frontend", html=True), name="static")
 								@app.get("/", tags=["ui"])
 								def root():
 								    return RedirectResponse(url="/ui/")
 								@app.get("/health", tags=["health"])
 								def health():
 								    return ok({"status": "ok", "version": config.APP_VERSION})
 								@app.get("/version", tags=["health"])
 								def version():
 								    return ok({"version": config.APP_VERSION})
 								# @app.get("/ui/react", tags=["ui"])
 								# def ui_react():
 								#     return FileResponse("frontend/react/index.html")
 								# @app.get("/ui/offline", tags=["ui"])
 								# def ui_offline():
 								#     return FileResponse("frontend/ui_offline.html")
 								@app.get("/react", tags=["ui"])
 								def react_root():
 								    return FileResponse("frontend/react/index.html")
 								@app.get("/sessions", tags=["sessions"])
 								def sessions():
 								    from .opro.session_state import SESSIONS
 								    return ok({"sessions": [{
 								        "session_id": sid,
 								        "round": s.get("round", 0),
 								        "selected_prompt": s.get("selected_prompt"),
 								        "original_query": s.get("original_query")
 								    } for sid, s in SESSIONS.items()]})
 								@app.get("/session/{sid}", tags=["sessions"])
 								def session_detail(sid: str):
 								    s = get_session(sid)
 								    if not s:
 								        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
 								    return ok({
 								        "session_id": sid,
 								        "round": s["round"],
 								        "original_query": s["original_query"],
 								        "selected_prompt": s["selected_prompt"],
 								        "candidates": s["history_candidates"],
 								        "user_feedback": s["user_feedback"],
 								        "rejected": s["rejected"],
 								        "history": s["chat_history"],
 								    })
 								class MessageReq(BaseModel):
 								    session_id: str
 								    message: str
 								@app.post("/message", tags=["chat"])
 								def message(req: MessageReq):
 								    s = get_session(req.session_id)
 								    if not s:
 								        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
 								    log_chat_message(req.session_id, "user", req.message)
 								    base_prompt = s.get("selected_prompt") or s["original_query"]
 								    full_prompt = base_prompt + "\n\n" + req.message
 								    try:
 								        ans = call_qwen(full_prompt, temperature=0.3, max_tokens=1024, model_name=s.get("model_name"))
 								    except Exception as e:
 								        raise AppException(400, f"ollama error: {e}", "OLLAMA_ERROR")
 								    log_chat_message(req.session_id, "assistant", ans)
 								    return ok({"session_id": req.session_id, "answer": ans, "history": s["chat_history"]})
 								class QueryFromMsgReq(BaseModel):
 								    session_id: str
-												feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting)
- Implement iterative optimization with performance trajectory
- Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute)
- Create modern Gemini-style chat UI (frontend/opro.html)
- Optimize performance: reduce candidates from 20 to 10 (2x faster)
- Add model selector in UI toolbar
- Add collapsible sidebar with session management
- Add copy button for instructions
- Ensure all generated prompts use simplified Chinese
- Update README with comprehensive documentation
- Add .gitignore for local_docs folder

											
										
										
											2025-12-06 17:24:28 +08:00
+								@app.post("/query_from_message", tags=["opro-legacy"])
-												原始代码

											
										
										
											2025-12-05 07:11:25 +00:00
+								def query_from_message(req: QueryFromMsgReq):
 								    s = get_session(req.session_id)
 								    if not s:
 								        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
 								    last_user = None
 								    for m in reversed(s.get("chat_history", [])):
 								        if m.get("role") == "user" and m.get("content"):
 								            last_user = m["content"]
 								            break
 								    base = last_user or s["original_query"]
 								    cands = generate_candidates(base, s["history_candidates"], model_name=s.get("model_name"))
 								    update_session_add_candidates(req.session_id, cands)
 								    return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})
 								class AnswerReq(BaseModel):
 								    query: str
-												feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting)
- Implement iterative optimization with performance trajectory
- Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute)
- Create modern Gemini-style chat UI (frontend/opro.html)
- Optimize performance: reduce candidates from 20 to 10 (2x faster)
- Add model selector in UI toolbar
- Add collapsible sidebar with session management
- Add copy button for instructions
- Ensure all generated prompts use simplified Chinese
- Update README with comprehensive documentation
- Add .gitignore for local_docs folder

											
										
										
											2025-12-06 17:24:28 +08:00
+								@app.post("/answer", tags=["opro-legacy"])
-												原始代码

											
										
										
											2025-12-05 07:11:25 +00:00
+								def answer(req: AnswerReq):
 								    sid = create_session(req.query)
 								    log_chat_message(sid, "user", req.query)
 								    ans = call_qwen(req.query, temperature=0.2, max_tokens=1024)
 								    log_chat_message(sid, "assistant", ans)
 								    cands = generate_candidates(req.query, [])
 								    update_session_add_candidates(sid, cands)
 								    return ok({"session_id": sid, "answer": ans, "candidates": cands})
 								@app.get("/models", tags=["models"])
 								def models():
 								    return ok({"models": list_models()})
 								@app.post("/set_model", tags=["models"])
 								def set_model(req: SetModelReq):
 								    s = get_session(req.session_id)
 								    if not s:
 								        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
 								    avail = set(list_models() or [])
 								    if req.model_name not in avail:
 								        raise AppException(400, f"model not available: {req.model_name}", "MODEL_NOT_AVAILABLE")
 								    set_session_model(req.session_id, req.model_name)
 								    return ok({"session_id": req.session_id, "model_name": req.model_name})
-												feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting)
- Implement iterative optimization with performance trajectory
- Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute)
- Create modern Gemini-style chat UI (frontend/opro.html)
- Optimize performance: reduce candidates from 20 to 10 (2x faster)
- Add model selector in UI toolbar
- Add collapsible sidebar with session management
- Add copy button for instructions
- Ensure all generated prompts use simplified Chinese
- Update README with comprehensive documentation
- Add .gitignore for local_docs folder

											
										
										
											2025-12-06 17:24:28 +08:00
 								# ============================================================================
 								# TRUE OPRO ENDPOINTS (System Instruction Optimization)
 								# ============================================================================
 								@app.post("/opro/create", tags=["opro-true"])
 								def opro_create_run(req: CreateOPRORunReq):
 								    """
 								    Create a new OPRO optimization run.
 								    This starts a new system instruction optimization process for a given task.
 								    """
 								    # Convert test cases from Pydantic models to tuples
 								    test_cases = None
 								    if req.test_cases:
 								        test_cases = [(tc.input, tc.expected_output) for tc in req.test_cases]
 								    run_id = create_opro_run(
 								        task_description=req.task_description,
 								        test_cases=test_cases,
 								        model_name=req.model_name
 								    )
 								    run = get_opro_run(run_id)
 								    return ok({
 								        "run_id": run_id,
 								        "task_description": run["task_description"],
 								        "num_test_cases": len(run["test_cases"]),
 								        "iteration": run["iteration"],
 								        "status": run["status"]
 								    })
 								@app.post("/opro/iterate", tags=["opro-true"])
 								def opro_iterate(req: OPROIterateReq):
 								    """
 								    Run one OPRO iteration: generate new system instruction candidates.
 								    This generates optimized system instructions based on the performance trajectory.
 								    """
 								    run = get_opro_run(req.run_id)
 								    if not run:
 								        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
 								    # Get trajectory for optimization
 								    trajectory = get_opro_trajectory(req.run_id)
 								    # Generate candidates
 								    top_k = req.top_k or config.TOP_K
 								    try:
 								        candidates = generate_system_instruction_candidates(
 								            task_description=run["task_description"],
 								            trajectory=trajectory if trajectory else None,
 								            top_k=top_k,
 								            model_name=run["model_name"]
 								        )
 								    except Exception as e:
 								        raise AppException(500, f"Failed to generate candidates: {e}", "GENERATION_ERROR")
 								    # Update run with new candidates
 								    update_opro_iteration(req.run_id, candidates)
 								    return ok({
 								        "run_id": req.run_id,
 								        "iteration": run["iteration"] + 1,
 								        "candidates": candidates,
 								        "num_candidates": len(candidates),
 								        "best_score": run["best_score"]
 								    })
 								@app.post("/opro/evaluate", tags=["opro-true"])
 								def opro_evaluate(req: OPROEvaluateReq):
 								    """
 								    Evaluate a system instruction on the test cases.
 								    This scores the instruction and updates the performance trajectory.
 								    """
 								    run = get_opro_run(req.run_id)
 								    if not run:
 								        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
 								    if not run["test_cases"]:
 								        raise AppException(400, "No test cases defined for this run", "NO_TEST_CASES")
 								    # Evaluate the instruction
 								    try:
 								        score = evaluate_system_instruction(
 								            system_instruction=req.instruction,
 								            test_cases=run["test_cases"],
 								            model_name=run["model_name"]
 								        )
 								    except Exception as e:
 								        raise AppException(500, f"Evaluation failed: {e}", "EVALUATION_ERROR")
 								    # Add to trajectory
 								    add_opro_evaluation(req.run_id, req.instruction, score)
 								    # Get updated run info
 								    run = get_opro_run(req.run_id)
 								    return ok({
 								        "run_id": req.run_id,
 								        "instruction": req.instruction,
 								        "score": score,
 								        "best_score": run["best_score"],
 								        "is_new_best": score == run["best_score"] and score > 0
 								    })
 								@app.get("/opro/runs", tags=["opro-true"])
 								def opro_list_runs():
 								    """
 								    List all OPRO optimization runs.
 								    """
 								    runs = list_opro_runs()
 								    return ok({"runs": runs, "total": len(runs)})
 								@app.get("/opro/run/{run_id}", tags=["opro-true"])
 								def opro_get_run(run_id: str):
 								    """
 								    Get detailed information about an OPRO run.
 								    """
 								    run = get_opro_run(run_id)
 								    if not run:
 								        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
 								    # Get sorted trajectory
 								    trajectory = get_opro_trajectory(run_id)
 								    return ok({
 								        "run_id": run_id,
 								        "task_description": run["task_description"],
 								        "iteration": run["iteration"],
 								        "status": run["status"],
 								        "best_score": run["best_score"],
 								        "best_instruction": run["best_instruction"],
 								        "num_test_cases": len(run["test_cases"]),
 								        "test_cases": [{"input": tc[0], "expected_output": tc[1]} for tc in run["test_cases"]],
 								        "trajectory": [{"instruction": inst, "score": score} for inst, score in trajectory[:10]],  # Top 10
 								        "current_candidates": run["current_candidates"]
 								    })
 								@app.post("/opro/test_cases", tags=["opro-true"])
 								def opro_add_test_cases(req: OPROAddTestCasesReq):
 								    """
 								    Add or update test cases for an OPRO run.
 								    """
 								    run = get_opro_run(req.run_id)
 								    if not run:
 								        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
 								    # Convert test cases
 								    test_cases = [(tc.input, tc.expected_output) for tc in req.test_cases]
 								    # Update test cases
 								    set_opro_test_cases(req.run_id, test_cases)
 								    return ok({
 								        "run_id": req.run_id,
 								        "num_test_cases": len(test_cases),
 								        "test_cases": [{"input": tc[0], "expected_output": tc[1]} for tc in test_cases]
 								    })
 								@app.post("/opro/generate_and_evaluate", tags=["opro-true"])
 								def opro_generate_and_evaluate(req: OPROGenerateAndEvaluateReq):
 								    """
 								    Generate candidates and auto-evaluate them (for chat-like UX).
 								    This is the main endpoint for the chat interface. It:
 . Generates candidates based on trajectory
 . Auto-evaluates them (if test cases exist and auto_evaluate=True)
 . Returns top-k sorted by score (or diversity if no evaluation)
 								    """
 								    run = get_opro_run(req.run_id)
 								    if not run:
 								        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
 								    top_k = req.top_k or config.TOP_K
 								    pool_size = req.pool_size or config.GENERATION_POOL_SIZE
 								    # Get trajectory for optimization
 								    trajectory = get_opro_trajectory(req.run_id)
 								    # Generate candidates
 								    try:
 								        candidates = generate_system_instruction_candidates(
 								            task_description=run["task_description"],
 								            trajectory=trajectory if trajectory else None,
 								            top_k=pool_size,  # Generate pool_size candidates first
 								            pool_size=pool_size,
 								            model_name=run["model_name"]
 								        )
 								    except Exception as e:
 								        raise AppException(500, f"Failed to generate candidates: {e}", "GENERATION_ERROR")
 								    # Decide whether to evaluate
 								    should_evaluate = req.auto_evaluate and len(run["test_cases"]) > 0
 								    if should_evaluate:
 								        # Auto-evaluate all candidates
 								        scored_candidates = []
 								        for candidate in candidates:
 								            try:
 								                score = evaluate_system_instruction(
 								                    system_instruction=candidate,
 								                    test_cases=run["test_cases"],
 								                    model_name=run["model_name"]
 								                )
 								                scored_candidates.append({"instruction": candidate, "score": score})
 								                # Add to trajectory
 								                add_opro_evaluation(req.run_id, candidate, score)
 								            except Exception as e:
 								                # If evaluation fails, assign score 0
 								                scored_candidates.append({"instruction": candidate, "score": 0.0})
 								        # Sort by score (highest first)
 								        scored_candidates.sort(key=lambda x: x["score"], reverse=True)
 								        # Return top-k
 								        top_candidates = scored_candidates[:top_k]
 								        # Update iteration
 								        update_opro_iteration(req.run_id, [c["instruction"] for c in top_candidates])
 								        return ok({
 								            "run_id": req.run_id,
 								            "candidates": top_candidates,
 								            "iteration": run["iteration"] + 1,
 								            "evaluated": True,
 								            "best_score": run["best_score"]
 								        })
 								    else:
 								        # No evaluation - use diversity-based selection (already done by clustering)
 								        # Just return the candidates without scores
 								        top_candidates = [
 								            {"instruction": candidate, "score": None}
 								            for candidate in candidates[:top_k]
 								        ]
 								        # Update iteration
 								        update_opro_iteration(req.run_id, [c["instruction"] for c in top_candidates])
 								        return ok({
 								            "run_id": req.run_id,
 								            "candidates": top_candidates,
 								            "iteration": run["iteration"] + 1,
 								            "evaluated": False,
 								            "best_score": run["best_score"]
 								        })
 								@app.post("/opro/execute", tags=["opro-true"])
 								def opro_execute(req: OPROExecuteReq):
 								    """
 								    Execute a system instruction with user input.
 								    This uses the selected instruction as a system prompt and calls the LLM.
 								    """
 								    try:
 								        # Construct full prompt with system instruction
 								        full_prompt = f"{req.instruction}\n\n{req.user_input}"
 								        # Call LLM
 								        response = call_qwen(
 								            full_prompt,
 								            temperature=0.2,
 								            max_tokens=1024,
 								            model_name=req.model_name
 								        )
 								        return ok({
 								            "instruction": req.instruction,
 								            "user_input": req.user_input,
 								            "response": response
 								        })
 								    except Exception as e:
 								        raise AppException(500, f"Execution failed: {e}", "EXECUTION_ERROR")