Files
opro_demo/_qwen_xinference_demo/opro/user_prompt_optimizer.py

150 lines
5.4 KiB
Python
Raw Normal View History

2025-12-05 07:11:25 +00:00
import re
import numpy as np
from typing import List, Tuple
2025-12-05 07:11:25 +00:00
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics.pairwise import cosine_similarity
import config
from .ollama_client import call_qwen
from .xinference_client import embed_texts
from .prompt_utils import (
refine_instruction,
refine_instruction_with_history,
generate_initial_system_instruction_candidates,
generate_optimized_system_instruction
)
2025-12-05 07:11:25 +00:00
def parse_candidates(raw: str) -> list:
lines = [l.strip() for l in re.split(r'\r?\n', raw) if l.strip()]
cleaned = []
for l in lines:
l = re.sub(r'^[\-\*\d\.\)\s]+', '', l).strip()
if len(l) >= 6:
cleaned.append(l)
return list(dict.fromkeys(cleaned))
def cluster_and_select(candidates: list, top_k=config.TOP_K, distance_threshold=config.CLUSTER_DISTANCE_THRESHOLD):
if not candidates:
return []
if len(candidates) <= top_k:
return candidates
vecs = embed_texts(candidates)
if not vecs or len(vecs) != len(candidates):
return candidates[:top_k]
X = np.array(vecs)
clustering = AgglomerativeClustering(n_clusters=None,
distance_threshold=distance_threshold,
metric="cosine",
linkage="average")
labels = clustering.fit_predict(X)
selected_idx = []
2025-12-05 07:11:25 +00:00
for label in sorted(set(labels)):
idxs = [i for i,l in enumerate(labels) if l == label]
sims = cosine_similarity(X[idxs]).mean(axis=1)
rep = idxs[int(np.argmax(sims))]
selected_idx.append(rep)
selected = [candidates[i] for i in sorted(selected_idx)]
return selected[:top_k]
def generate_candidates(query: str, rejected=None, top_k=config.TOP_K, model_name=None):
"""
LEGACY: Query rewriting function (NOT true OPRO).
Kept for backward compatibility with existing API endpoints.
"""
2025-12-05 07:11:25 +00:00
rejected = rejected or []
if rejected:
prompt = refine_instruction_with_history(query, rejected)
else:
prompt = refine_instruction(query)
raw = call_qwen(prompt, temperature=0.9, max_tokens=1024, model_name=model_name)
all_candidates = parse_candidates(raw)
return cluster_and_select(all_candidates, top_k=top_k)
# ============================================================================
# TRUE OPRO FUNCTIONS (System Instruction Optimization)
# ============================================================================
def generate_system_instruction_candidates(
task_description: str,
trajectory: List[Tuple[str, float]] = None,
top_k: int = config.TOP_K,
pool_size: int = None,
model_name: str = None
) -> List[str]:
"""
TRUE OPRO: Generates optimized system instruction candidates.
This is the core OPRO function that generates system instructions based on
performance trajectory (if available) or initial candidates (if starting fresh).
Args:
task_description: Description of the task the LLM should perform
trajectory: Optional list of (instruction, score) tuples from previous iterations
top_k: Number of diverse candidates to return (default: config.TOP_K = 5)
pool_size: Number of candidates to generate before clustering (default: config.GENERATION_POOL_SIZE = 10)
model_name: Optional model name to use for generation
Returns:
List of top-k diverse system instruction candidates
"""
pool_size = pool_size or config.GENERATION_POOL_SIZE
# Generate the meta-prompt based on whether we have trajectory data
if trajectory and len(trajectory) > 0:
# Sort trajectory by score (highest first)
sorted_trajectory = sorted(trajectory, key=lambda x: x[1], reverse=True)
meta_prompt = generate_optimized_system_instruction(task_description, sorted_trajectory, pool_size)
else:
# No trajectory yet, generate initial candidates
meta_prompt = generate_initial_system_instruction_candidates(task_description, pool_size)
# Use the optimizer LLM to generate candidates
raw = call_qwen(meta_prompt, temperature=0.9, max_tokens=1024, model_name=model_name)
# Parse the generated candidates
all_candidates = parse_candidates(raw)
# Cluster and select diverse representatives
return cluster_and_select(all_candidates, top_k=top_k)
def evaluate_system_instruction(
system_instruction: str,
test_cases: List[Tuple[str, str]],
model_name: str = None
) -> float:
"""
TRUE OPRO: Evaluates a system instruction's performance on test cases.
Args:
system_instruction: The system instruction to evaluate
test_cases: List of (input, expected_output) tuples
model_name: Optional model name to use for evaluation
Returns:
Performance score (0.0 to 1.0)
"""
if not test_cases:
return 0.0
correct = 0
total = len(test_cases)
for input_text, expected_output in test_cases:
# Construct the full prompt with system instruction
full_prompt = f"{system_instruction}\n\n{input_text}"
# Get LLM response
response = call_qwen(full_prompt, temperature=0.2, max_tokens=512, model_name=model_name)
# Simple exact match scoring (can be replaced with more sophisticated metrics)
if expected_output.strip().lower() in response.strip().lower():
correct += 1
return correct / total