2025-12-06 17:24:28 +08:00
|
|
|
|
from typing import List, Tuple
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
# OLD FUNCTIONS (Query Rewriting - NOT true OPRO, kept for compatibility)
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
|
2025-12-05 07:11:25 +00:00
|
|
|
|
def refine_instruction(query: str) -> str:
|
2025-12-06 17:24:28 +08:00
|
|
|
|
"""
|
|
|
|
|
|
LEGACY: Generates query rewrites (NOT true OPRO).
|
|
|
|
|
|
This is query expansion, not system instruction optimization.
|
|
|
|
|
|
"""
|
2025-12-05 07:11:25 +00:00
|
|
|
|
return f"""
|
|
|
|
|
|
你是一个“问题澄清与重写助手”。
|
|
|
|
|
|
请根据用户的原始问题:
|
|
|
|
|
|
【{query}】
|
|
|
|
|
|
生成不少于20条多角度、可直接执行的问题改写,每行一条。
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def refine_instruction_with_history(query: str, rejected_list: list) -> str:
|
2025-12-06 17:24:28 +08:00
|
|
|
|
"""
|
|
|
|
|
|
LEGACY: Generates query rewrites with rejection history (NOT true OPRO).
|
|
|
|
|
|
"""
|
2025-12-05 07:11:25 +00:00
|
|
|
|
rejected_text = "\n".join(f"- {r}" for r in rejected_list) if rejected_list else ""
|
|
|
|
|
|
return f"""
|
|
|
|
|
|
你是一个“问题澄清与重写助手”。
|
|
|
|
|
|
原始问题:
|
|
|
|
|
|
{query}
|
|
|
|
|
|
|
|
|
|
|
|
以下改写已被否定:
|
|
|
|
|
|
{rejected_text}
|
|
|
|
|
|
|
|
|
|
|
|
请从新的角度重新生成至少20条不同的改写问题,每条单独一行。
|
|
|
|
|
|
"""
|
2025-12-06 17:24:28 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
# TRUE OPRO FUNCTIONS (System Instruction Optimization)
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
def generate_initial_system_instruction_candidates(task_description: str, pool_size: int = None) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
TRUE OPRO: Generates initial candidate System Instructions for a new OPRO run.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
task_description: Description of the task the LLM should perform
|
|
|
|
|
|
pool_size: Number of candidates to generate (defaults to config.GENERATION_POOL_SIZE)
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Meta-prompt that instructs the optimizer LLM to generate system instruction candidates
|
|
|
|
|
|
"""
|
|
|
|
|
|
import config
|
|
|
|
|
|
pool_size = pool_size or config.GENERATION_POOL_SIZE
|
|
|
|
|
|
|
|
|
|
|
|
return f"""
|
|
|
|
|
|
你是一个"系统指令生成助手"。
|
|
|
|
|
|
目标任务描述:
|
|
|
|
|
|
【{task_description}】
|
|
|
|
|
|
|
|
|
|
|
|
请根据以上任务,生成 {pool_size} 条高质量、风格各异的"System Instruction"候选指令。
|
|
|
|
|
|
|
|
|
|
|
|
要求:
|
|
|
|
|
|
1. 每条指令必须有明显不同的风格和侧重点
|
|
|
|
|
|
2. 覆盖不同的实现策略(例如:简洁型、详细型、示例型、角色扮演型、步骤型等)
|
|
|
|
|
|
3. 这些指令应指导LLM的行为和输出格式,以最大化任务性能
|
|
|
|
|
|
4. 每条指令单独成行,不包含编号或额外说明
|
|
|
|
|
|
5. 所有生成的指令必须使用简体中文
|
|
|
|
|
|
|
|
|
|
|
|
生成 {pool_size} 条指令:
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_optimized_system_instruction(
|
|
|
|
|
|
task_description: str,
|
|
|
|
|
|
trajectory: List[Tuple[str, float]],
|
|
|
|
|
|
pool_size: int = None
|
|
|
|
|
|
) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
TRUE OPRO: Analyzes performance trajectory and generates optimized System Instructions.
|
|
|
|
|
|
|
|
|
|
|
|
This is the core OPRO function that uses an LLM as an optimizer to improve
|
|
|
|
|
|
system instructions based on historical performance scores.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
task_description: Description of the task the LLM should perform
|
|
|
|
|
|
trajectory: List of (instruction, score) tuples, sorted by score (highest first)
|
|
|
|
|
|
pool_size: Number of candidates to generate (defaults to config.GENERATION_POOL_SIZE)
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Meta-prompt that instructs the optimizer LLM to generate better system instructions
|
|
|
|
|
|
"""
|
|
|
|
|
|
import config
|
|
|
|
|
|
pool_size = pool_size or config.GENERATION_POOL_SIZE
|
|
|
|
|
|
|
|
|
|
|
|
if not trajectory:
|
|
|
|
|
|
# If no trajectory, fall back to initial generation
|
|
|
|
|
|
return generate_initial_system_instruction_candidates(task_description, pool_size)
|
|
|
|
|
|
|
|
|
|
|
|
# Format the trajectory for the Optimizer LLM
|
|
|
|
|
|
formatted_history = "\n".join(
|
|
|
|
|
|
f"--- Instruction Score: {score:.4f}\n{instruction}"
|
|
|
|
|
|
for instruction, score in trajectory
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# Determine the current highest score to set the optimization goal
|
|
|
|
|
|
highest_score = max(score for _, score in trajectory)
|
|
|
|
|
|
|
|
|
|
|
|
# Construct the Meta-Prompt (The OPRO Instruction)
|
|
|
|
|
|
return f"""
|
|
|
|
|
|
你是一个"System Prompt 优化器"。
|
|
|
|
|
|
你的任务是改进一个LLM的系统指令,以最大化其在以下任务中的性能:
|
|
|
|
|
|
【{task_description}】
|
|
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
|
**历史性能轨迹 (Instructions and Scores):**
|
|
|
|
|
|
{formatted_history}
|
|
|
|
|
|
---
|
|
|
|
|
|
**当前最高得分: {highest_score:.4f}**
|
|
|
|
|
|
|
|
|
|
|
|
请分析得分最高的指令的特点和得分最低指令的缺陷。
|
|
|
|
|
|
然后,生成 {pool_size} 条新的、有潜力超越 {highest_score:.4f} 分的System Instruction。
|
|
|
|
|
|
|
|
|
|
|
|
要求:
|
|
|
|
|
|
1. 每条指令必须有明显不同的改进策略
|
|
|
|
|
|
2. 结合高分指令的优点,避免低分指令的缺陷
|
|
|
|
|
|
3. 探索新的优化方向和表达方式
|
|
|
|
|
|
4. 每条指令单独成行,不包含编号或额外说明
|
|
|
|
|
|
5. 所有生成的指令必须使用简体中文
|
|
|
|
|
|
|
|
|
|
|
|
生成 {pool_size} 条优化后的指令:
|
|
|
|
|
|
"""
|