Files
opro_demo/_qwen_xinference_demo/opro/prompt_utils.py
leehwui 65cdcf29dc refactor: replace OPRO with simple iterative refinement
Major changes:
- Remove fake OPRO evaluation (no more fake 0.5 scores)
- Add simple refinement based on user selection
- New endpoint: POST /opro/refine (selected + rejected instructions)
- Update prompt generation to focus on comprehensive coverage instead of style variety
- All generated instructions now start with role definition (你是一个...)
- Update README to reflect new approach and API endpoints

Technical details:
- Added refine_based_on_selection() in prompt_utils.py
- Added refine_instruction_candidates() in user_prompt_optimizer.py
- Added OPRORefineReq model and /opro/refine endpoint in api.py
- Updated frontend handleContinueOptimize() to use new refinement flow
- Changed prompt requirements from 'different styles' to 'comprehensive coverage'
- Added role definition requirement as first item in all prompt templates
2025-12-08 09:43:20 +08:00

189 lines
6.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from typing import List, Tuple
# ============================================================================
# OLD FUNCTIONS (Query Rewriting - NOT true OPRO, kept for compatibility)
# ============================================================================
def refine_instruction(query: str) -> str:
"""
LEGACY: Generates query rewrites (NOT true OPRO).
This is query expansion, not system instruction optimization.
"""
return f"""
你是一个“问题澄清与重写助手”。
请根据用户的原始问题:
{query}
生成不少于20条多角度、可直接执行的问题改写每行一条。
"""
def refine_instruction_with_history(query: str, rejected_list: list) -> str:
"""
LEGACY: Generates query rewrites with rejection history (NOT true OPRO).
"""
rejected_text = "\n".join(f"- {r}" for r in rejected_list) if rejected_list else ""
return f"""
你是一个“问题澄清与重写助手”。
原始问题:
{query}
以下改写已被否定:
{rejected_text}
请从新的角度重新生成至少20条不同的改写问题每条单独一行。
"""
# ============================================================================
# TRUE OPRO FUNCTIONS (System Instruction Optimization)
# ============================================================================
def generate_initial_system_instruction_candidates(task_description: str, pool_size: int = None) -> str:
"""
TRUE OPRO: Generates initial candidate System Instructions for a new OPRO run.
Args:
task_description: Description of the task the LLM should perform
pool_size: Number of candidates to generate (defaults to config.GENERATION_POOL_SIZE)
Returns:
Meta-prompt that instructs the optimizer LLM to generate system instruction candidates
"""
import config
pool_size = pool_size or config.GENERATION_POOL_SIZE
return f"""
你是一个"系统指令生成助手"
目标任务描述:
{task_description}
请根据以上任务,生成 {pool_size} 条高质量、全面的"System Instruction"候选指令。
要求:
1. 每条指令必须以角色定义开头(例如:"你是一个...""你是..."等)
2. 每条指令必须全面覆盖任务的所有要求和细节
3. 指令应清晰、具体、可执行能够有效指导LLM完成任务
4. 确保指令包含必要的行为规范、输出格式、注意事项等
5. 每条指令单独成行,不包含编号或额外说明
6. 所有生成的指令必须使用简体中文
生成 {pool_size} 条指令:
"""
def generate_optimized_system_instruction(
task_description: str,
trajectory: List[Tuple[str, float]],
pool_size: int = None
) -> str:
"""
TRUE OPRO: Analyzes performance trajectory and generates optimized System Instructions.
This is the core OPRO function that uses an LLM as an optimizer to improve
system instructions based on historical performance scores.
Args:
task_description: Description of the task the LLM should perform
trajectory: List of (instruction, score) tuples, sorted by score (highest first)
pool_size: Number of candidates to generate (defaults to config.GENERATION_POOL_SIZE)
Returns:
Meta-prompt that instructs the optimizer LLM to generate better system instructions
"""
import config
pool_size = pool_size or config.GENERATION_POOL_SIZE
if not trajectory:
# If no trajectory, fall back to initial generation
return generate_initial_system_instruction_candidates(task_description, pool_size)
# Format the trajectory for the Optimizer LLM
formatted_history = "\n".join(
f"--- Instruction Score: {score:.4f}\n{instruction}"
for instruction, score in trajectory
)
# Determine the current highest score to set the optimization goal
highest_score = max(score for _, score in trajectory)
# Construct the Meta-Prompt (The OPRO Instruction)
return f"""
你是一个"System Prompt 优化器"
你的任务是改进一个LLM的系统指令以最大化其在以下任务中的性能
{task_description}
---
**历史性能轨迹 (Instructions and Scores):**
{formatted_history}
---
**当前最高得分: {highest_score:.4f}**
请分析得分最高的指令的特点和得分最低指令的缺陷。
然后,生成 {pool_size} 条新的、有潜力超越 {highest_score:.4f} 分的System Instruction。
要求:
1. 每条指令必须以角色定义开头(例如:"你是一个...""你是..."等)
2. 每条指令必须全面覆盖任务的所有要求和细节
3. 结合高分指令的优点,避免低分指令的缺陷
4. 指令应清晰、具体、可执行能够有效指导LLM完成任务
5. 每条指令单独成行,不包含编号或额外说明
6. 所有生成的指令必须使用简体中文
生成 {pool_size} 条优化后的指令:
"""
def refine_based_on_selection(
task_description: str,
selected_instruction: str,
rejected_instructions: List[str],
pool_size: int = None
) -> str:
"""
Simple refinement: Generate variations based on selected instruction while avoiding rejected ones.
This is NOT OPRO - it's straightforward iterative refinement based on user preference.
No scoring, no trajectory, just: "I like this one, give me more like it (but not like those)."
Args:
task_description: Description of the task
selected_instruction: The instruction the user selected
rejected_instructions: The instructions the user didn't select
pool_size: Number of new candidates to generate
Returns:
Prompt for generating refined candidates
"""
import config
pool_size = pool_size or config.GENERATION_POOL_SIZE
rejected_text = ""
if rejected_instructions:
rejected_formatted = "\n".join(f"- {inst}" for inst in rejected_instructions)
rejected_text = f"""
**用户未选择的指令(避免这些方向):**
{rejected_formatted}
"""
return f"""
你是一个"System Prompt 改进助手"
目标任务描述:
{task_description}
**用户选择的指令(基于此改进):**
{selected_instruction}
{rejected_text}
请基于用户选择的指令,生成 {pool_size} 条改进版本。
要求:
1. 每条指令必须以角色定义开头(例如:"你是一个...""你是..."等)
2. 保留用户选择指令的核心优点
3. 每条指令必须全面覆盖任务的所有要求和细节
4. 指令应清晰、具体、可执行能够有效指导LLM完成任务
5. 避免与未选择指令相似的方向
6. 每条指令单独成行,不包含编号或额外说明
7. 所有生成的指令必须使用简体中文
生成 {pool_size} 条改进后的指令:
"""