feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting) - Implement iterative optimization with performance trajectory - Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute) - Create modern Gemini-style chat UI (frontend/opro.html) - Optimize performance: reduce candidates from 20 to 10 (2x faster) - Add model selector in UI toolbar - Add collapsible sidebar with session management - Add copy button for instructions - Ensure all generated prompts use simplified Chinese - Update README with comprehensive documentation - Add .gitignore for local_docs folder
2025-12-06 17:24:28 +08:00
parent 8f52fad41c
commit 1376d60ed5
10 changed files with 1817 additions and 13 deletions
--- a/_qwen_xinference_demo/opro/user_prompt_optimizer.py
+++ b/_qwen_xinference_demo/opro/user_prompt_optimizer.py
@@ -1,12 +1,18 @@
 import re
 import numpy as np
+from typing import List, Tuple
 from sklearn.cluster import AgglomerativeClustering
 from sklearn.metrics.pairwise import cosine_similarity
 import config

 from .ollama_client import call_qwen
 from .xinference_client import embed_texts
-from .prompt_utils import refine_instruction, refine_instruction_with_history
+from .prompt_utils import (
+    refine_instruction,
+    refine_instruction_with_history,
+    generate_initial_system_instruction_candidates,
+    generate_optimized_system_instruction
+)

 def parse_candidates(raw: str) -> list:
    lines = [l.strip() for l in re.split(r'\r?\n', raw) if l.strip()]
@@ -33,7 +39,7 @@ def cluster_and_select(candidates: list, top_k=config.TOP_K, distance_threshold=
                                         linkage="average")
    labels = clustering.fit_predict(X)

-    selected_idx = []
+    selected_idx = []       
    for label in sorted(set(labels)):
        idxs = [i for i,l in enumerate(labels) if l == label]
        sims = cosine_similarity(X[idxs]).mean(axis=1)
@@ -44,6 +50,10 @@ def cluster_and_select(candidates: list, top_k=config.TOP_K, distance_threshold=
    return selected[:top_k]

 def generate_candidates(query: str, rejected=None, top_k=config.TOP_K, model_name=None):
+    """
+    LEGACY: Query rewriting function (NOT true OPRO).
+    Kept for backward compatibility with existing API endpoints.
+    """
    rejected = rejected or []
    if rejected:
        prompt = refine_instruction_with_history(query, rejected)
@@ -53,3 +63,87 @@ def generate_candidates(query: str, rejected=None, top_k=config.TOP_K, model_nam
    raw = call_qwen(prompt, temperature=0.9, max_tokens=1024, model_name=model_name)
    all_candidates = parse_candidates(raw)
    return cluster_and_select(all_candidates, top_k=top_k)
+
+
+# ============================================================================
+# TRUE OPRO FUNCTIONS (System Instruction Optimization)
+# ============================================================================
+
+def generate_system_instruction_candidates(
+    task_description: str,
+    trajectory: List[Tuple[str, float]] = None,
+    top_k: int = config.TOP_K,
+    pool_size: int = None,
+    model_name: str = None
+) -> List[str]:
+    """
+    TRUE OPRO: Generates optimized system instruction candidates.
+
+    This is the core OPRO function that generates system instructions based on
+    performance trajectory (if available) or initial candidates (if starting fresh).
+
+    Args:
+        task_description: Description of the task the LLM should perform
+        trajectory: Optional list of (instruction, score) tuples from previous iterations
+        top_k: Number of diverse candidates to return (default: config.TOP_K = 5)
+        pool_size: Number of candidates to generate before clustering (default: config.GENERATION_POOL_SIZE = 10)
+        model_name: Optional model name to use for generation
+
+    Returns:
+        List of top-k diverse system instruction candidates
+    """
+    pool_size = pool_size or config.GENERATION_POOL_SIZE
+
+    # Generate the meta-prompt based on whether we have trajectory data
+    if trajectory and len(trajectory) > 0:
+        # Sort trajectory by score (highest first)
+        sorted_trajectory = sorted(trajectory, key=lambda x: x[1], reverse=True)
+        meta_prompt = generate_optimized_system_instruction(task_description, sorted_trajectory, pool_size)
+    else:
+        # No trajectory yet, generate initial candidates
+        meta_prompt = generate_initial_system_instruction_candidates(task_description, pool_size)
+
+    # Use the optimizer LLM to generate candidates
+    raw = call_qwen(meta_prompt, temperature=0.9, max_tokens=1024, model_name=model_name)
+
+    # Parse the generated candidates
+    all_candidates = parse_candidates(raw)
+
+    # Cluster and select diverse representatives
+    return cluster_and_select(all_candidates, top_k=top_k)
+
+
+def evaluate_system_instruction(
+    system_instruction: str,
+    test_cases: List[Tuple[str, str]],
+    model_name: str = None
+) -> float:
+    """
+    TRUE OPRO: Evaluates a system instruction's performance on test cases.
+
+    Args:
+        system_instruction: The system instruction to evaluate
+        test_cases: List of (input, expected_output) tuples
+        model_name: Optional model name to use for evaluation
+
+    Returns:
+        Performance score (0.0 to 1.0)
+    """
+    if not test_cases:
+        return 0.0
+
+    correct = 0
+    total = len(test_cases)
+
+    for input_text, expected_output in test_cases:
+        # Construct the full prompt with system instruction
+        full_prompt = f"{system_instruction}\n\n{input_text}"
+
+        # Get LLM response
+        response = call_qwen(full_prompt, temperature=0.2, max_tokens=512, model_name=model_name)
+
+        # Simple exact match scoring (can be replaced with more sophisticated metrics)
+        if expected_output.strip().lower() in response.strip().lower():
+            correct += 1
+
+    return correct / total