feat: implement true OPRO with Gemini-style UI
- Add true OPRO system instruction optimization (vs query rewriting) - Implement iterative optimization with performance trajectory - Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute) - Create modern Gemini-style chat UI (frontend/opro.html) - Optimize performance: reduce candidates from 20 to 10 (2x faster) - Add model selector in UI toolbar - Add collapsible sidebar with session management - Add copy button for instructions - Ensure all generated prompts use simplified Chinese - Update README with comprehensive documentation - Add .gitignore for local_docs folder
This commit is contained in:
@@ -1,12 +1,18 @@
|
||||
import re
|
||||
import numpy as np
|
||||
from typing import List, Tuple
|
||||
from sklearn.cluster import AgglomerativeClustering
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
import config
|
||||
|
||||
from .ollama_client import call_qwen
|
||||
from .xinference_client import embed_texts
|
||||
from .prompt_utils import refine_instruction, refine_instruction_with_history
|
||||
from .prompt_utils import (
|
||||
refine_instruction,
|
||||
refine_instruction_with_history,
|
||||
generate_initial_system_instruction_candidates,
|
||||
generate_optimized_system_instruction
|
||||
)
|
||||
|
||||
def parse_candidates(raw: str) -> list:
|
||||
lines = [l.strip() for l in re.split(r'\r?\n', raw) if l.strip()]
|
||||
@@ -33,7 +39,7 @@ def cluster_and_select(candidates: list, top_k=config.TOP_K, distance_threshold=
|
||||
linkage="average")
|
||||
labels = clustering.fit_predict(X)
|
||||
|
||||
selected_idx = []
|
||||
selected_idx = []
|
||||
for label in sorted(set(labels)):
|
||||
idxs = [i for i,l in enumerate(labels) if l == label]
|
||||
sims = cosine_similarity(X[idxs]).mean(axis=1)
|
||||
@@ -44,6 +50,10 @@ def cluster_and_select(candidates: list, top_k=config.TOP_K, distance_threshold=
|
||||
return selected[:top_k]
|
||||
|
||||
def generate_candidates(query: str, rejected=None, top_k=config.TOP_K, model_name=None):
|
||||
"""
|
||||
LEGACY: Query rewriting function (NOT true OPRO).
|
||||
Kept for backward compatibility with existing API endpoints.
|
||||
"""
|
||||
rejected = rejected or []
|
||||
if rejected:
|
||||
prompt = refine_instruction_with_history(query, rejected)
|
||||
@@ -53,3 +63,87 @@ def generate_candidates(query: str, rejected=None, top_k=config.TOP_K, model_nam
|
||||
raw = call_qwen(prompt, temperature=0.9, max_tokens=1024, model_name=model_name)
|
||||
all_candidates = parse_candidates(raw)
|
||||
return cluster_and_select(all_candidates, top_k=top_k)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# TRUE OPRO FUNCTIONS (System Instruction Optimization)
|
||||
# ============================================================================
|
||||
|
||||
def generate_system_instruction_candidates(
|
||||
task_description: str,
|
||||
trajectory: List[Tuple[str, float]] = None,
|
||||
top_k: int = config.TOP_K,
|
||||
pool_size: int = None,
|
||||
model_name: str = None
|
||||
) -> List[str]:
|
||||
"""
|
||||
TRUE OPRO: Generates optimized system instruction candidates.
|
||||
|
||||
This is the core OPRO function that generates system instructions based on
|
||||
performance trajectory (if available) or initial candidates (if starting fresh).
|
||||
|
||||
Args:
|
||||
task_description: Description of the task the LLM should perform
|
||||
trajectory: Optional list of (instruction, score) tuples from previous iterations
|
||||
top_k: Number of diverse candidates to return (default: config.TOP_K = 5)
|
||||
pool_size: Number of candidates to generate before clustering (default: config.GENERATION_POOL_SIZE = 10)
|
||||
model_name: Optional model name to use for generation
|
||||
|
||||
Returns:
|
||||
List of top-k diverse system instruction candidates
|
||||
"""
|
||||
pool_size = pool_size or config.GENERATION_POOL_SIZE
|
||||
|
||||
# Generate the meta-prompt based on whether we have trajectory data
|
||||
if trajectory and len(trajectory) > 0:
|
||||
# Sort trajectory by score (highest first)
|
||||
sorted_trajectory = sorted(trajectory, key=lambda x: x[1], reverse=True)
|
||||
meta_prompt = generate_optimized_system_instruction(task_description, sorted_trajectory, pool_size)
|
||||
else:
|
||||
# No trajectory yet, generate initial candidates
|
||||
meta_prompt = generate_initial_system_instruction_candidates(task_description, pool_size)
|
||||
|
||||
# Use the optimizer LLM to generate candidates
|
||||
raw = call_qwen(meta_prompt, temperature=0.9, max_tokens=1024, model_name=model_name)
|
||||
|
||||
# Parse the generated candidates
|
||||
all_candidates = parse_candidates(raw)
|
||||
|
||||
# Cluster and select diverse representatives
|
||||
return cluster_and_select(all_candidates, top_k=top_k)
|
||||
|
||||
|
||||
def evaluate_system_instruction(
|
||||
system_instruction: str,
|
||||
test_cases: List[Tuple[str, str]],
|
||||
model_name: str = None
|
||||
) -> float:
|
||||
"""
|
||||
TRUE OPRO: Evaluates a system instruction's performance on test cases.
|
||||
|
||||
Args:
|
||||
system_instruction: The system instruction to evaluate
|
||||
test_cases: List of (input, expected_output) tuples
|
||||
model_name: Optional model name to use for evaluation
|
||||
|
||||
Returns:
|
||||
Performance score (0.0 to 1.0)
|
||||
"""
|
||||
if not test_cases:
|
||||
return 0.0
|
||||
|
||||
correct = 0
|
||||
total = len(test_cases)
|
||||
|
||||
for input_text, expected_output in test_cases:
|
||||
# Construct the full prompt with system instruction
|
||||
full_prompt = f"{system_instruction}\n\n{input_text}"
|
||||
|
||||
# Get LLM response
|
||||
response = call_qwen(full_prompt, temperature=0.2, max_tokens=512, model_name=model_name)
|
||||
|
||||
# Simple exact match scoring (can be replaced with more sophisticated metrics)
|
||||
if expected_output.strip().lower() in response.strip().lower():
|
||||
correct += 1
|
||||
|
||||
return correct / total
|
||||
|
||||
Reference in New Issue
Block a user