refactor: replace OPRO with simple iterative refinement

Major changes: - Remove fake OPRO evaluation (no more fake 0.5 scores) - Add simple refinement based on user selection - New endpoint: POST /opro/refine (selected + rejected instructions) - Update prompt generation to focus on comprehensive coverage instead of style variety - All generated instructions now start with role definition (你是一个...) - Update README to reflect new approach and API endpoints Technical details: - Added refine_based_on_selection() in prompt_utils.py - Added refine_instruction_candidates() in user_prompt_optimizer.py - Added OPRORefineReq model and /opro/refine endpoint in api.py - Updated frontend handleContinueOptimize() to use new refinement flow - Changed prompt requirements from 'different styles' to 'comprehensive coverage' - Added role definition requirement as first item in all prompt templates
2025-12-08 09:43:20 +08:00
parent 602875b08c
commit 65cdcf29dc
5 changed files with 315 additions and 86 deletions
--- a/_qwen_xinference_demo/opro/user_prompt_optimizer.py
+++ b/_qwen_xinference_demo/opro/user_prompt_optimizer.py
@@ -11,7 +11,8 @@ from .prompt_utils import (
    refine_instruction,
    refine_instruction_with_history,
    generate_initial_system_instruction_candidates,
-    generate_optimized_system_instruction
+    generate_optimized_system_instruction,
+    refine_based_on_selection
 )

 def parse_candidates(raw: str) -> list:
@@ -147,3 +148,46 @@ def evaluate_system_instruction(
            correct += 1

    return correct / total
+
+
+def refine_instruction_candidates(
+    task_description: str,
+    selected_instruction: str,
+    rejected_instructions: List[str],
+    top_k: int = config.TOP_K,
+    pool_size: int = None,
+    model_name: str = None
+) -> List[str]:
+    """
+    Simple refinement: Generate new candidates based on user's selection.
+
+    This is NOT OPRO - just straightforward iterative refinement.
+    User picks a favorite, we generate variations of it while avoiding rejected ones.
+
+    Args:
+        task_description: Description of the task
+        selected_instruction: The instruction the user selected
+        rejected_instructions: The instructions the user didn't select
+        top_k: Number of diverse candidates to return
+        pool_size: Number of candidates to generate before clustering
+        model_name: Optional model name to use
+
+    Returns:
+        List of refined instruction candidates
+    """
+    pool_size = pool_size or config.GENERATION_POOL_SIZE
+
+    # Generate the refinement prompt
+    meta_prompt = refine_based_on_selection(
+        task_description,
+        selected_instruction,
+        rejected_instructions,
+        pool_size
+    )
+
+    # Use LLM to generate refined candidates
+    raw = call_qwen(meta_prompt, temperature=0.9, max_tokens=1024, model_name=model_name)
+
+    # Parse and cluster
+    all_candidates = parse_candidates(raw)
+    return cluster_and_select(all_candidates, top_k=top_k)