opro_demo/_qwen_xinference_demo/api.py

from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import RedirectResponse, FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from typing import List, Tuple, Optional
import config

# Legacy session management (query rewriting)
from .opro.session_state import create_session, get_session, update_session_add_candidates, log_user_choice
from .opro.session_state import log_user_reject
from .opro.session_state import set_selected_prompt, log_chat_message
from .opro.session_state import set_session_model
from .opro.session_state import USER_FEEDBACK_LOG

# True OPRO session management
from .opro.session_state import (
    create_opro_session, get_opro_session, list_opro_sessions,
    create_opro_run, get_opro_run, update_opro_iteration,
    add_opro_evaluation, get_opro_trajectory, set_opro_test_cases,
    complete_opro_run, list_opro_runs
)

# Optimization functions
from .opro.user_prompt_optimizer import generate_candidates
from .opro.user_prompt_optimizer import (
    generate_system_instruction_candidates,
    evaluate_system_instruction,
    refine_instruction_candidates
)

from .opro.ollama_client import call_qwen
from .opro.ollama_client import list_models

from fastapi.middleware.cors import CORSMiddleware

app = FastAPI(
    title=config.APP_TITLE,
    description=config.APP_DESCRIPTION,
    version=config.APP_VERSION,
    contact=config.APP_CONTACT,
    openapi_tags=[
        {"name": "health", "description": "健康检查"},
        {"name": "models", "description": "模型列表与设置"},
        {"name": "sessions", "description": "会话管理（旧版查询重写）"},
        {"name": "opro-legacy", "description": "旧版提示优化（查询重写）"},
        {"name": "opro-true", "description": "真正的OPRO（系统指令优化）"},
        {"name": "chat", "description": "会话聊天"},
        {"name": "ui", "description": "静态页面"}
    ]
)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
MAX_ROUNDS = 3

def ok(data=None):
    return JSONResponse({"success": True, "data": data})

class AppException(HTTPException):
    def __init__(self, status_code: int, detail: str, error_code: str):
        super().__init__(status_code=status_code, detail=detail)
        self.error_code = error_code

@app.exception_handler(AppException)
def _app_exc_handler(request: Request, exc: AppException):
    return JSONResponse(status_code=exc.status_code, content={
        "success": False,
        "error": str(exc.detail),
        "error_code": exc.error_code
    })

@app.exception_handler(HTTPException)
def _http_exc_handler(request: Request, exc: HTTPException):
    return JSONResponse(status_code=exc.status_code, content={
        "success": False,
        "error": str(exc.detail),
        "error_code": "HTTP_ERROR"
    })

@app.exception_handler(Exception)
def _generic_exc_handler(request: Request, exc: Exception):
    return JSONResponse(status_code=500, content={
        "success": False,
        "error": "internal error",
        "error_code": "INTERNAL_ERROR"
    })

class StartReq(BaseModel):
    query: str

class NextReq(BaseModel):
    session_id: str

class SelectReq(BaseModel):
    session_id: str
    choice: str

class RejectReq(BaseModel):
    session_id: str
    candidate: str
    reason: str | None = None

class SetModelReq(BaseModel):
    session_id: str
    model_name: str


# ============================================================================
# TRUE OPRO REQUEST MODELS
# ============================================================================

class TestCase(BaseModel):
    """A single test case for OPRO evaluation."""
    input: str
    expected_output: str


class CreateOPRORunReq(BaseModel):
    """Request to create a new OPRO optimization run."""
    task_description: str
    test_cases: Optional[List[TestCase]] = None
    model_name: Optional[str] = None
    session_id: Optional[str] = None  # Optional session to associate with


class OPROIterateReq(BaseModel):
    """Request to run one OPRO iteration."""
    run_id: str
    top_k: Optional[int] = None


class OPROEvaluateReq(BaseModel):
    """Request to evaluate a system instruction."""
    run_id: str
    instruction: str


class OPROAddTestCasesReq(BaseModel):
    """Request to add test cases to an OPRO run."""
    run_id: str
    test_cases: List[TestCase]


class OPROGenerateAndEvaluateReq(BaseModel):
    """Request to generate and auto-evaluate candidates (for chat-like UX)."""
    run_id: str
    top_k: Optional[int] = None
    pool_size: Optional[int] = None
    auto_evaluate: Optional[bool] = True  # If False, use diversity-based selection only


class OPROExecuteReq(BaseModel):
    """Request to execute a system instruction with user input."""
    instruction: str
    user_input: str
    model_name: Optional[str] = None


class OPRORefineReq(BaseModel):
    """Request to refine based on selected instruction (simple iterative refinement, NOT OPRO)."""
    run_id: str
    selected_instruction: str
    rejected_instructions: List[str]
    top_k: Optional[int] = None
    pool_size: Optional[int] = None


# ============================================================================
# LEGACY ENDPOINTS (Query Rewriting - NOT true OPRO)
# ============================================================================

@app.post("/start", tags=["opro-legacy"])
def start(req: StartReq):
    sid = create_session(req.query)
    cands = generate_candidates(req.query, [], model_name=get_session(sid).get("model_name"))
    update_session_add_candidates(sid, cands)
    return ok({"session_id": sid, "round": 0, "candidates": cands})

@app.post("/next", tags=["opro-legacy"])
def next_round(req: NextReq):
    s = get_session(req.session_id)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")

    if s["round"] >= MAX_ROUNDS:
        ans = call_qwen(s["original_query"], temperature=0.3, max_tokens=512)
        return ok({"final": True, "answer": ans})

    cands = generate_candidates(s["original_query"], s["history_candidates"], model_name=s.get("model_name"))
    update_session_add_candidates(req.session_id, cands)
    return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})

@app.post("/select", tags=["opro-legacy"])
def select(req: SelectReq):
    s = get_session(req.session_id)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")

    log_user_choice(req.session_id, req.choice)
    set_selected_prompt(req.session_id, req.choice)
    log_chat_message(req.session_id, "system", req.choice)
    try:
        ans = call_qwen(req.choice, temperature=0.2, max_tokens=1024, model_name=s.get("model_name"))
    except Exception as e:
        raise AppException(400, f"ollama error: {e}", "OLLAMA_ERROR")
    log_chat_message(req.session_id, "assistant", ans)
    try:
        import os, json
        os.makedirs("outputs", exist_ok=True)
        with open("outputs/user_feedback.jsonl", "a", encoding="utf-8") as f:
            f.write(json.dumps({
                "session_id": req.session_id,
                "round": s["round"],
                "choice": req.choice,
                "answer": ans
            }, ensure_ascii=False) + "\n")
    except Exception:
        pass
    return ok({"prompt": req.choice, "answer": ans})

@app.post("/reject", tags=["opro-legacy"])
def reject(req: RejectReq):
    s = get_session(req.session_id)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
    log_user_reject(req.session_id, req.candidate, req.reason)
    cands = generate_candidates(s["original_query"], s["history_candidates"] + [req.candidate], model_name=s.get("model_name"))
    update_session_add_candidates(req.session_id, cands)
    return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})
class QueryReq(BaseModel):
    query: str
    session_id: str | None = None

@app.post("/query", tags=["opro-legacy"])
def query(req: QueryReq):
    if req.session_id:
        s = get_session(req.session_id)
        if not s:
            raise AppException(404, "session not found", "SESSION_NOT_FOUND")
        cands = generate_candidates(s["original_query"], s["history_candidates"], model_name=s.get("model_name"))
        update_session_add_candidates(req.session_id, cands)
        return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})
    else:
        sid = create_session(req.query)
        log_chat_message(sid, "user", req.query)
        cands = generate_candidates(req.query, [], model_name=get_session(sid).get("model_name"))
        update_session_add_candidates(sid, cands)
        return ok({"session_id": sid, "round": 0, "candidates": cands})
app.mount("/ui", StaticFiles(directory="frontend", html=True), name="static")

@app.get("/", tags=["ui"])
def root():
    return RedirectResponse(url="/ui/")

@app.get("/health", tags=["health"])
def health():
    return ok({"status": "ok", "version": config.APP_VERSION})

@app.get("/version", tags=["health"])
def version():
    return ok({"version": config.APP_VERSION})

# @app.get("/ui/react", tags=["ui"])
# def ui_react():
#     return FileResponse("frontend/react/index.html")

# @app.get("/ui/offline", tags=["ui"])
# def ui_offline():
#     return FileResponse("frontend/ui_offline.html")

@app.get("/react", tags=["ui"])
def react_root():
    return FileResponse("frontend/react/index.html")

@app.get("/sessions", tags=["sessions"])
def sessions():
    from .opro.session_state import SESSIONS
    return ok({"sessions": [{
        "session_id": sid,
        "round": s.get("round", 0),
        "selected_prompt": s.get("selected_prompt"),
        "original_query": s.get("original_query")
    } for sid, s in SESSIONS.items()]})

@app.get("/session/{sid}", tags=["sessions"])
def session_detail(sid: str):
    s = get_session(sid)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
    return ok({
        "session_id": sid,
        "round": s["round"],
        "original_query": s["original_query"],
        "selected_prompt": s["selected_prompt"],
        "candidates": s["history_candidates"],
        "user_feedback": s["user_feedback"],
        "rejected": s["rejected"],
        "history": s["chat_history"],
    })

class MessageReq(BaseModel):
    session_id: str
    message: str

@app.post("/message", tags=["chat"])
def message(req: MessageReq):
    s = get_session(req.session_id)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
    log_chat_message(req.session_id, "user", req.message)
    base_prompt = s.get("selected_prompt") or s["original_query"]
    full_prompt = base_prompt + "\n\n" + req.message
    try:
        ans = call_qwen(full_prompt, temperature=0.3, max_tokens=1024, model_name=s.get("model_name"))
    except Exception as e:
        raise AppException(400, f"ollama error: {e}", "OLLAMA_ERROR")
    log_chat_message(req.session_id, "assistant", ans)
    return ok({"session_id": req.session_id, "answer": ans, "history": s["chat_history"]})

class QueryFromMsgReq(BaseModel):
    session_id: str

@app.post("/query_from_message", tags=["opro-legacy"])
def query_from_message(req: QueryFromMsgReq):
    s = get_session(req.session_id)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
    last_user = None
    for m in reversed(s.get("chat_history", [])):
        if m.get("role") == "user" and m.get("content"):
            last_user = m["content"]
            break
    base = last_user or s["original_query"]
    cands = generate_candidates(base, s["history_candidates"], model_name=s.get("model_name"))
    update_session_add_candidates(req.session_id, cands)
    return ok({"session_id": req.session_id, "round": s["round"], "candidates": cands})

class AnswerReq(BaseModel):
    query: str

@app.post("/answer", tags=["opro-legacy"])
def answer(req: AnswerReq):
    sid = create_session(req.query)
    log_chat_message(sid, "user", req.query)
    ans = call_qwen(req.query, temperature=0.2, max_tokens=1024)
    log_chat_message(sid, "assistant", ans)
    cands = generate_candidates(req.query, [])
    update_session_add_candidates(sid, cands)
    return ok({"session_id": sid, "answer": ans, "candidates": cands})

@app.get("/models", tags=["models"])
def models():
    return ok({"models": list_models()})

@app.post("/set_model", tags=["models"])
def set_model(req: SetModelReq):
    s = get_session(req.session_id)
    if not s:
        raise AppException(404, "session not found", "SESSION_NOT_FOUND")
    avail = set(list_models() or [])
    if req.model_name not in avail:
        raise AppException(400, f"model not available: {req.model_name}", "MODEL_NOT_AVAILABLE")
    set_session_model(req.session_id, req.model_name)
    return ok({"session_id": req.session_id, "model_name": req.model_name})


# ============================================================================
# TRUE OPRO ENDPOINTS (System Instruction Optimization)
# ============================================================================

# Session Management

@app.post("/opro/session/create", tags=["opro-true"])
def opro_create_session(session_name: str = None):
    """
    Create a new OPRO session that can contain multiple runs.
    """
    session_id = create_opro_session(session_name=session_name)
    session = get_opro_session(session_id)

    return ok({
        "session_id": session_id,
        "session_name": session["session_name"],
        "num_runs": len(session["run_ids"])
    })


@app.get("/opro/sessions", tags=["opro-true"])
def opro_list_sessions():
    """
    List all OPRO sessions.
    """
    sessions = list_opro_sessions()
    return ok({"sessions": sessions})


@app.get("/opro/session/{session_id}", tags=["opro-true"])
def opro_get_session(session_id: str):
    """
    Get detailed information about an OPRO session.
    """
    session = get_opro_session(session_id)
    if not session:
        raise AppException(404, "Session not found", "SESSION_NOT_FOUND")

    # Get all runs in this session
    runs = list_opro_runs(session_id=session_id)

    return ok({
        "session_id": session_id,
        "session_name": session["session_name"],
        "created_at": session["created_at"],
        "num_runs": len(session["run_ids"]),
        "runs": runs
    })


# Run Management

@app.post("/opro/create", tags=["opro-true"])
def opro_create_run(req: CreateOPRORunReq):
    """
    Create a new OPRO optimization run.

    This starts a new system instruction optimization process for a given task.
    Optionally can be associated with a session.
    """
    # Convert test cases from Pydantic models to tuples
    test_cases = None
    if req.test_cases:
        test_cases = [(tc.input, tc.expected_output) for tc in req.test_cases]

    run_id = create_opro_run(
        task_description=req.task_description,
        test_cases=test_cases,
        model_name=req.model_name,
        session_id=req.session_id
    )

    run = get_opro_run(run_id)

    return ok({
        "run_id": run_id,
        "task_description": run["task_description"],
        "num_test_cases": len(run["test_cases"]),
        "iteration": run["iteration"],
        "status": run["status"],
        "session_id": run.get("session_id")
    })


@app.post("/opro/iterate", tags=["opro-true"])
def opro_iterate(req: OPROIterateReq):
    """
    Run one OPRO iteration: generate new system instruction candidates.

    This generates optimized system instructions based on the performance trajectory.
    """
    run = get_opro_run(req.run_id)
    if not run:
        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")

    # Get trajectory for optimization
    trajectory = get_opro_trajectory(req.run_id)

    # Generate candidates
    top_k = req.top_k or config.TOP_K
    try:
        candidates = generate_system_instruction_candidates(
            task_description=run["task_description"],
            trajectory=trajectory if trajectory else None,
            top_k=top_k,
            model_name=run["model_name"]
        )
    except Exception as e:
        raise AppException(500, f"Failed to generate candidates: {e}", "GENERATION_ERROR")

    # Update run with new candidates
    update_opro_iteration(req.run_id, candidates)

    return ok({
        "run_id": req.run_id,
        "iteration": run["iteration"] + 1,
        "candidates": candidates,
        "num_candidates": len(candidates),
        "best_score": run["best_score"]
    })


@app.post("/opro/evaluate", tags=["opro-true"])
def opro_evaluate(req: OPROEvaluateReq):
    """
    Evaluate a system instruction on the test cases.

    This scores the instruction and updates the performance trajectory.
    If no test cases are defined, uses a default score of 0.5 to indicate user selection.
    """
    run = get_opro_run(req.run_id)
    if not run:
        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")

    # Evaluate the instruction if test cases exist
    if run["test_cases"] and len(run["test_cases"]) > 0:
        try:
            score = evaluate_system_instruction(
                system_instruction=req.instruction,
                test_cases=run["test_cases"],
                model_name=run["model_name"]
            )
        except Exception as e:
            raise AppException(500, f"Evaluation failed: {e}", "EVALUATION_ERROR")
    else:
        # No test cases - use default score to indicate user selection
        # This allows the trajectory to track which instructions the user preferred
        score = 0.5

    # Add to trajectory
    add_opro_evaluation(req.run_id, req.instruction, score)

    # Get updated run info
    run = get_opro_run(req.run_id)

    return ok({
        "run_id": req.run_id,
        "instruction": req.instruction,
        "score": score,
        "best_score": run["best_score"],
        "is_new_best": score == run["best_score"] and score > 0,
        "has_test_cases": len(run["test_cases"]) > 0
    })


@app.get("/opro/runs", tags=["opro-true"])
def opro_list_runs():
    """
    List all OPRO optimization runs.
    """
    runs = list_opro_runs()
    return ok({"runs": runs, "total": len(runs)})


@app.get("/opro/run/{run_id}", tags=["opro-true"])
def opro_get_run(run_id: str):
    """
    Get detailed information about an OPRO run.
    """
    run = get_opro_run(run_id)
    if not run:
        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")

    # Get sorted trajectory
    trajectory = get_opro_trajectory(run_id)

    return ok({
        "run_id": run_id,
        "task_description": run["task_description"],
        "iteration": run["iteration"],
        "status": run["status"],
        "best_score": run["best_score"],
        "best_instruction": run["best_instruction"],
        "num_test_cases": len(run["test_cases"]),
        "test_cases": [{"input": tc[0], "expected_output": tc[1]} for tc in run["test_cases"]],
        "trajectory": [{"instruction": inst, "score": score} for inst, score in trajectory[:10]],  # Top 10
        "current_candidates": run["current_candidates"]
    })


@app.post("/opro/test_cases", tags=["opro-true"])
def opro_add_test_cases(req: OPROAddTestCasesReq):
    """
    Add or update test cases for an OPRO run.
    """
    run = get_opro_run(req.run_id)
    if not run:
        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")

    # Convert test cases
    test_cases = [(tc.input, tc.expected_output) for tc in req.test_cases]

    # Update test cases
    set_opro_test_cases(req.run_id, test_cases)

    return ok({
        "run_id": req.run_id,
        "num_test_cases": len(test_cases),
        "test_cases": [{"input": tc[0], "expected_output": tc[1]} for tc in test_cases]
    })


@app.post("/opro/generate_and_evaluate", tags=["opro-true"])
def opro_generate_and_evaluate(req: OPROGenerateAndEvaluateReq):
    """
    Generate candidates and auto-evaluate them (for chat-like UX).

    This is the main endpoint for the chat interface. It:
    1. Generates candidates based on trajectory
    2. Auto-evaluates them (if test cases exist and auto_evaluate=True)
    3. Returns top-k sorted by score (or diversity if no evaluation)
    """
    run = get_opro_run(req.run_id)
    if not run:
        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")

    top_k = req.top_k or config.TOP_K
    pool_size = req.pool_size or config.GENERATION_POOL_SIZE

    # Get trajectory for optimization
    trajectory = get_opro_trajectory(req.run_id)

    # Generate candidates
    try:
        candidates = generate_system_instruction_candidates(
            task_description=run["task_description"],
            trajectory=trajectory if trajectory else None,
            top_k=pool_size,  # Generate pool_size candidates first
            pool_size=pool_size,
            model_name=run["model_name"]
        )
    except Exception as e:
        raise AppException(500, f"Failed to generate candidates: {e}", "GENERATION_ERROR")

    # Decide whether to evaluate
    should_evaluate = req.auto_evaluate and len(run["test_cases"]) > 0

    if should_evaluate:
        # Auto-evaluate all candidates
        scored_candidates = []
        for candidate in candidates:
            try:
                score = evaluate_system_instruction(
                    system_instruction=candidate,
                    test_cases=run["test_cases"],
                    model_name=run["model_name"]
                )
                scored_candidates.append({"instruction": candidate, "score": score})

                # Add to trajectory
                add_opro_evaluation(req.run_id, candidate, score)
            except Exception as e:
                # If evaluation fails, assign score 0
                scored_candidates.append({"instruction": candidate, "score": 0.0})

        # Sort by score (highest first)
        scored_candidates.sort(key=lambda x: x["score"], reverse=True)

        # Return top-k
        top_candidates = scored_candidates[:top_k]

        # Update iteration
        update_opro_iteration(req.run_id, [c["instruction"] for c in top_candidates])

        return ok({
            "run_id": req.run_id,
            "candidates": top_candidates,
            "iteration": run["iteration"] + 1,
            "evaluated": True,
            "best_score": run["best_score"]
        })
    else:
        # No evaluation - use diversity-based selection (already done by clustering)
        # Just return the candidates without scores
        top_candidates = [
            {"instruction": candidate, "score": None}
            for candidate in candidates[:top_k]
        ]

        # Update iteration
        update_opro_iteration(req.run_id, [c["instruction"] for c in top_candidates])

        return ok({
            "run_id": req.run_id,
            "candidates": top_candidates,
            "iteration": run["iteration"] + 1,
            "evaluated": False,
            "best_score": run["best_score"]
        })


@app.post("/opro/execute", tags=["opro-true"])
def opro_execute(req: OPROExecuteReq):
    """
    Execute a system instruction with user input.

    This uses the selected instruction as a system prompt and calls the LLM.
    """
    try:
        # Construct full prompt with system instruction
        full_prompt = f"{req.instruction}\n\n{req.user_input}"

        # Call LLM
        response = call_qwen(
            full_prompt,
            temperature=0.2,
            max_tokens=1024,
            model_name=req.model_name
        )

        return ok({
            "instruction": req.instruction,
            "user_input": req.user_input,
            "response": response
        })
    except Exception as e:
        raise AppException(500, f"Execution failed: {e}", "EXECUTION_ERROR")


@app.post("/opro/refine", tags=["opro-true"])
def opro_refine(req: OPRORefineReq):
    """
    Simple iterative refinement based on user selection (NOT OPRO).

    This generates new candidates based on the selected instruction while avoiding rejected ones.
    No scoring, no trajectory - just straightforward refinement based on user preference.
    """
    run = get_opro_run(req.run_id)
    if not run:
        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")

    top_k = req.top_k or config.TOP_K
    pool_size = req.pool_size or config.GENERATION_POOL_SIZE

    try:
        candidates = refine_instruction_candidates(
            task_description=run["task_description"],
            selected_instruction=req.selected_instruction,
            rejected_instructions=req.rejected_instructions,
            top_k=top_k,
            pool_size=pool_size,
            model_name=run["model_name"]
        )

        # Update iteration counter
        update_opro_iteration(req.run_id, candidates)

        # Get updated run info
        run = get_opro_run(req.run_id)

        return ok({
            "run_id": req.run_id,
            "iteration": run["iteration"],
            "candidates": [{"instruction": c, "score": None} for c in candidates],
            "task_description": run["task_description"]
        })
    except Exception as e:
        raise AppException(500, f"Refinement failed: {e}", "REFINEMENT_ERROR")