opro_demo/examples/opro_demo.py

"""
TRUE OPRO Demo Script

This script demonstrates the true OPRO (Optimization by PROmpting) functionality.
It shows how to:
1. Generate initial system instruction candidates
2. Evaluate them on test cases
3. Use the performance trajectory to generate better candidates
"""

import sys
sys.path.insert(0, '.')

from _qwen_xinference_demo.opro.user_prompt_optimizer import (
    generate_system_instruction_candidates,
    evaluate_system_instruction
)
import config


def demo_opro_workflow():
    """
    Demonstrates a complete OPRO optimization workflow.
    """
    print("=" * 80)
    print("TRUE OPRO Demo - System Instruction Optimization")
    print("=" * 80)
    print(f"Pool Size: {config.GENERATION_POOL_SIZE} candidates → Clustered to Top {config.TOP_K}")

    # Define the task
    task_description = """
任务：将用户输入的中文句子翻译成英文。
要求：翻译准确、自然、符合英语表达习惯。
"""

    print(f"\n📋 Task Description:\n{task_description}")
    
    # Define test cases for evaluation
    test_cases = [
        ("你好，很高兴见到你", "Hello, nice to meet you"),
        ("今天天气真好", "The weather is really nice today"),
        ("我喜欢学习编程", "I like learning programming"),
        ("这本书很有趣", "This book is very interesting"),
    ]
    
    print(f"\n🧪 Test Cases: {len(test_cases)} examples")
    for i, (input_text, expected) in enumerate(test_cases, 1):
        print(f"  {i}. '{input_text}' → '{expected}'")
    
    # Iteration 1: Generate initial candidates
    print("\n" + "=" * 80)
    print("🔄 Iteration 1: Generating Initial System Instruction Candidates")
    print("=" * 80)
    
    print("\n⏳ Generating candidates... (this may take a moment)")
    candidates_round1 = generate_system_instruction_candidates(
        task_description=task_description,
        trajectory=None,  # No history yet
        top_k=3,
        model_name=None  # Use default model
    )
    
    print(f"\n✅ Generated {len(candidates_round1)} candidates:")
    for i, candidate in enumerate(candidates_round1, 1):
        print(f"\n  Candidate {i}:")
        print(f"  {candidate[:100]}..." if len(candidate) > 100 else f"  {candidate}")
    
    # Evaluate each candidate
    print("\n" + "-" * 80)
    print("📊 Evaluating Candidates on Test Cases")
    print("-" * 80)
    
    trajectory = []
    for i, candidate in enumerate(candidates_round1, 1):
        print(f"\n⏳ Evaluating Candidate {i}...")
        score = evaluate_system_instruction(
            system_instruction=candidate,
            test_cases=test_cases,
            model_name=None
        )
        trajectory.append((candidate, score))
        print(f"  Score: {score:.2%}")
    
    # Sort by score
    trajectory.sort(key=lambda x: x[1], reverse=True)
    
    print("\n📈 Performance Summary (Round 1):")
    for i, (candidate, score) in enumerate(trajectory, 1):
        print(f"  {i}. Score: {score:.2%} - {candidate[:60]}...")
    
    best_score = trajectory[0][1]
    print(f"\n🏆 Best Score: {best_score:.2%}")
    
    # Iteration 2: Generate optimized candidates based on trajectory
    print("\n" + "=" * 80)
    print("🔄 Iteration 2: Generating Optimized System Instructions")
    print("=" * 80)
    print(f"\n💡 Using performance trajectory to generate better candidates...")
    print(f"   Goal: Beat current best score of {best_score:.2%}")
    
    print("\n⏳ Generating optimized candidates...")
    candidates_round2 = generate_system_instruction_candidates(
        task_description=task_description,
        trajectory=trajectory,  # Use performance history
        top_k=3,
        model_name=None
    )
    
    print(f"\n✅ Generated {len(candidates_round2)} optimized candidates:")
    for i, candidate in enumerate(candidates_round2, 1):
        print(f"\n  Candidate {i}:")
        print(f"  {candidate[:100]}..." if len(candidate) > 100 else f"  {candidate}")
    
    # Evaluate new candidates
    print("\n" + "-" * 80)
    print("📊 Evaluating Optimized Candidates")
    print("-" * 80)
    
    for i, candidate in enumerate(candidates_round2, 1):
        print(f"\n⏳ Evaluating Optimized Candidate {i}...")
        score = evaluate_system_instruction(
            system_instruction=candidate,
            test_cases=test_cases,
            model_name=None
        )
        trajectory.append((candidate, score))
        print(f"  Score: {score:.2%}")
        if score > best_score:
            print(f"  🎉 NEW BEST! Improved from {best_score:.2%} to {score:.2%}")
            best_score = score
    
    # Final summary
    trajectory.sort(key=lambda x: x[1], reverse=True)
    
    print("\n" + "=" * 80)
    print("🏁 Final Results")
    print("=" * 80)
    print(f"\n🏆 Best System Instruction (Score: {trajectory[0][1]:.2%}):")
    print(f"\n{trajectory[0][0]}")
    
    print("\n📊 All Candidates Ranked:")
    for i, (candidate, score) in enumerate(trajectory[:5], 1):
        print(f"\n  {i}. Score: {score:.2%}")
        print(f"     {candidate[:80]}...")
    
    print("\n" + "=" * 80)
    print("✅ OPRO Demo Complete!")
    print("=" * 80)


if __name__ == "__main__":
    print("\n⚠️  NOTE: This demo requires:")
    print("   1. Ollama running locally (http://127.0.0.1:11434)")
    print("   2. A Qwen model available (e.g., qwen3:8b)")
    print("   3. An embedding model (e.g., qwen3-embedding:4b)")
    print("\n   Press Ctrl+C to cancel, or Enter to continue...")
    
    try:
        input()
        demo_opro_workflow()
    except KeyboardInterrupt:
        print("\n\n❌ Demo cancelled by user.")
        sys.exit(0)