refactor: replace OPRO with simple iterative refinement

Major changes:
- Remove fake OPRO evaluation (no more fake 0.5 scores)
- Add simple refinement based on user selection
- New endpoint: POST /opro/refine (selected + rejected instructions)
- Update prompt generation to focus on comprehensive coverage instead of style variety
- All generated instructions now start with role definition (你是一个...)
- Update README to reflect new approach and API endpoints

Technical details:
- Added refine_based_on_selection() in prompt_utils.py
- Added refine_instruction_candidates() in user_prompt_optimizer.py
- Added OPRORefineReq model and /opro/refine endpoint in api.py
- Updated frontend handleContinueOptimize() to use new refinement flow
- Changed prompt requirements from 'different styles' to 'comprehensive coverage'
- Added role definition requirement as first item in all prompt templates
This commit is contained in:
2025-12-08 09:43:20 +08:00
parent 602875b08c
commit 65cdcf29dc
5 changed files with 315 additions and 86 deletions

View File

@@ -24,7 +24,8 @@ from .opro.session_state import (
from .opro.user_prompt_optimizer import generate_candidates
from .opro.user_prompt_optimizer import (
generate_system_instruction_candidates,
evaluate_system_instruction
evaluate_system_instruction,
refine_instruction_candidates
)
from .opro.ollama_client import call_qwen
@@ -159,6 +160,15 @@ class OPROExecuteReq(BaseModel):
model_name: Optional[str] = None
class OPRORefineReq(BaseModel):
"""Request to refine based on selected instruction (simple iterative refinement, NOT OPRO)."""
run_id: str
selected_instruction: str
rejected_instructions: List[str]
top_k: Optional[int] = None
pool_size: Optional[int] = None
# ============================================================================
# LEGACY ENDPOINTS (Query Rewriting - NOT true OPRO)
# ============================================================================
@@ -696,3 +706,44 @@ def opro_execute(req: OPROExecuteReq):
})
except Exception as e:
raise AppException(500, f"Execution failed: {e}", "EXECUTION_ERROR")
@app.post("/opro/refine", tags=["opro-true"])
def opro_refine(req: OPRORefineReq):
"""
Simple iterative refinement based on user selection (NOT OPRO).
This generates new candidates based on the selected instruction while avoiding rejected ones.
No scoring, no trajectory - just straightforward refinement based on user preference.
"""
run = get_opro_run(req.run_id)
if not run:
raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
top_k = req.top_k or config.TOP_K
pool_size = req.pool_size or config.GENERATION_POOL_SIZE
try:
candidates = refine_instruction_candidates(
task_description=run["task_description"],
selected_instruction=req.selected_instruction,
rejected_instructions=req.rejected_instructions,
top_k=top_k,
pool_size=pool_size,
model_name=run["model_name"]
)
# Update iteration counter
update_opro_iteration(req.run_id, candidates)
# Get updated run info
run = get_opro_run(req.run_id)
return ok({
"run_id": req.run_id,
"iteration": run["iteration"],
"candidates": [{"instruction": c, "score": None} for c in candidates],
"task_description": run["task_description"]
})
except Exception as e:
raise AppException(500, f"Refinement failed: {e}", "REFINEMENT_ERROR")