feat: implement true OPRO with Gemini-style UI

- Add true OPRO system instruction optimization (vs query rewriting) - Implement iterative optimization with performance trajectory - Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute) - Create modern Gemini-style chat UI (frontend/opro.html) - Optimize performance: reduce candidates from 20 to 10 (2x faster) - Add model selector in UI toolbar - Add collapsible sidebar with session management - Add copy button for instructions - Ensure all generated prompts use simplified Chinese - Update README with comprehensive documentation - Add .gitignore for local_docs folder
2025-12-06 17:24:28 +08:00
parent 8f52fad41c
commit 1376d60ed5
10 changed files with 1817 additions and 13 deletions
--- a/examples/opro_demo.py
+++ b/examples/opro_demo.py
@@ -0,0 +1,164 @@
+"""
+TRUE OPRO Demo Script
+
+This script demonstrates the true OPRO (Optimization by PROmpting) functionality.
+It shows how to:
+1. Generate initial system instruction candidates
+2. Evaluate them on test cases
+3. Use the performance trajectory to generate better candidates
+"""
+
+import sys
+sys.path.insert(0, '.')
+
+from _qwen_xinference_demo.opro.user_prompt_optimizer import (
+    generate_system_instruction_candidates,
+    evaluate_system_instruction
+)
+import config
+
+
+def demo_opro_workflow():
+    """
+    Demonstrates a complete OPRO optimization workflow.
+    """
+    print("=" * 80)
+    print("TRUE OPRO Demo - System Instruction Optimization")
+    print("=" * 80)
+    print(f"Pool Size: {config.GENERATION_POOL_SIZE} candidates → Clustered to Top {config.TOP_K}")
+
+    # Define the task
+    task_description = """
+任务：将用户输入的中文句子翻译成英文。
+要求：翻译准确、自然、符合英语表达习惯。
+"""
+
+    print(f"\n📋 Task Description:\n{task_description}")
+    
+    # Define test cases for evaluation
+    test_cases = [
+        ("你好，很高兴见到你", "Hello, nice to meet you"),
+        ("今天天气真好", "The weather is really nice today"),
+        ("我喜欢学习编程", "I like learning programming"),
+        ("这本书很有趣", "This book is very interesting"),
+    ]
+    
+    print(f"\n🧪 Test Cases: {len(test_cases)} examples")
+    for i, (input_text, expected) in enumerate(test_cases, 1):
+        print(f"  {i}. '{input_text}' → '{expected}'")
+    
+    # Iteration 1: Generate initial candidates
+    print("\n" + "=" * 80)
+    print("🔄 Iteration 1: Generating Initial System Instruction Candidates")
+    print("=" * 80)
+    
+    print("\n⏳ Generating candidates... (this may take a moment)")
+    candidates_round1 = generate_system_instruction_candidates(
+        task_description=task_description,
+        trajectory=None,  # No history yet
+        top_k=3,
+        model_name=None  # Use default model
+    )
+    
+    print(f"\n✅ Generated {len(candidates_round1)} candidates:")
+    for i, candidate in enumerate(candidates_round1, 1):
+        print(f"\n  Candidate {i}:")
+        print(f"  {candidate[:100]}..." if len(candidate) > 100 else f"  {candidate}")
+    
+    # Evaluate each candidate
+    print("\n" + "-" * 80)
+    print("📊 Evaluating Candidates on Test Cases")
+    print("-" * 80)
+    
+    trajectory = []
+    for i, candidate in enumerate(candidates_round1, 1):
+        print(f"\n⏳ Evaluating Candidate {i}...")
+        score = evaluate_system_instruction(
+            system_instruction=candidate,
+            test_cases=test_cases,
+            model_name=None
+        )
+        trajectory.append((candidate, score))
+        print(f"  Score: {score:.2%}")
+    
+    # Sort by score
+    trajectory.sort(key=lambda x: x[1], reverse=True)
+    
+    print("\n📈 Performance Summary (Round 1):")
+    for i, (candidate, score) in enumerate(trajectory, 1):
+        print(f"  {i}. Score: {score:.2%} - {candidate[:60]}...")
+    
+    best_score = trajectory[0][1]
+    print(f"\n🏆 Best Score: {best_score:.2%}")
+    
+    # Iteration 2: Generate optimized candidates based on trajectory
+    print("\n" + "=" * 80)
+    print("🔄 Iteration 2: Generating Optimized System Instructions")
+    print("=" * 80)
+    print(f"\n💡 Using performance trajectory to generate better candidates...")
+    print(f"   Goal: Beat current best score of {best_score:.2%}")
+    
+    print("\n⏳ Generating optimized candidates...")
+    candidates_round2 = generate_system_instruction_candidates(
+        task_description=task_description,
+        trajectory=trajectory,  # Use performance history
+        top_k=3,
+        model_name=None
+    )
+    
+    print(f"\n✅ Generated {len(candidates_round2)} optimized candidates:")
+    for i, candidate in enumerate(candidates_round2, 1):
+        print(f"\n  Candidate {i}:")
+        print(f"  {candidate[:100]}..." if len(candidate) > 100 else f"  {candidate}")
+    
+    # Evaluate new candidates
+    print("\n" + "-" * 80)
+    print("📊 Evaluating Optimized Candidates")
+    print("-" * 80)
+    
+    for i, candidate in enumerate(candidates_round2, 1):
+        print(f"\n⏳ Evaluating Optimized Candidate {i}...")
+        score = evaluate_system_instruction(
+            system_instruction=candidate,
+            test_cases=test_cases,
+            model_name=None
+        )
+        trajectory.append((candidate, score))
+        print(f"  Score: {score:.2%}")
+        if score > best_score:
+            print(f"  🎉 NEW BEST! Improved from {best_score:.2%} to {score:.2%}")
+            best_score = score
+    
+    # Final summary
+    trajectory.sort(key=lambda x: x[1], reverse=True)
+    
+    print("\n" + "=" * 80)
+    print("🏁 Final Results")
+    print("=" * 80)
+    print(f"\n🏆 Best System Instruction (Score: {trajectory[0][1]:.2%}):")
+    print(f"\n{trajectory[0][0]}")
+    
+    print("\n📊 All Candidates Ranked:")
+    for i, (candidate, score) in enumerate(trajectory[:5], 1):
+        print(f"\n  {i}. Score: {score:.2%}")
+        print(f"     {candidate[:80]}...")
+    
+    print("\n" + "=" * 80)
+    print("✅ OPRO Demo Complete!")
+    print("=" * 80)
+
+
+if __name__ == "__main__":
+    print("\n⚠️  NOTE: This demo requires:")
+    print("   1. Ollama running locally (http://127.0.0.1:11434)")
+    print("   2. A Qwen model available (e.g., qwen3:8b)")
+    print("   3. An embedding model (e.g., qwen3-embedding:4b)")
+    print("\n   Press Ctrl+C to cancel, or Enter to continue...")
+    
+    try:
+        input()
+        demo_opro_workflow()
+    except KeyboardInterrupt:
+        print("\n\n❌ Demo cancelled by user.")
+        sys.exit(0)
+