refactor: remove execute instruction button to simplify UX

- Removed '执行此指令' button from candidate cards
- Prevents confusion between execution interactions and new task input
- Cleaner workflow: input box for new tasks, 继续优化 for iteration, 复制 for copying
- Each candidate now only has two actions: continue optimizing or copy
This commit is contained in:
2025-12-06 22:41:05 +08:00
parent da30a0999c
commit 602875b08c
2 changed files with 115 additions and 54 deletions

View File

@@ -487,23 +487,26 @@ def opro_evaluate(req: OPROEvaluateReq):
Evaluate a system instruction on the test cases.
This scores the instruction and updates the performance trajectory.
If no test cases are defined, uses a default score of 0.5 to indicate user selection.
"""
run = get_opro_run(req.run_id)
if not run:
raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
if not run["test_cases"]:
raise AppException(400, "No test cases defined for this run", "NO_TEST_CASES")
# Evaluate the instruction
try:
score = evaluate_system_instruction(
system_instruction=req.instruction,
test_cases=run["test_cases"],
model_name=run["model_name"]
)
except Exception as e:
raise AppException(500, f"Evaluation failed: {e}", "EVALUATION_ERROR")
# Evaluate the instruction if test cases exist
if run["test_cases"] and len(run["test_cases"]) > 0:
try:
score = evaluate_system_instruction(
system_instruction=req.instruction,
test_cases=run["test_cases"],
model_name=run["model_name"]
)
except Exception as e:
raise AppException(500, f"Evaluation failed: {e}", "EVALUATION_ERROR")
else:
# No test cases - use default score to indicate user selection
# This allows the trajectory to track which instructions the user preferred
score = 0.5
# Add to trajectory
add_opro_evaluation(req.run_id, req.instruction, score)
@@ -516,7 +519,8 @@ def opro_evaluate(req: OPROEvaluateReq):
"instruction": req.instruction,
"score": score,
"best_score": run["best_score"],
"is_new_best": score == run["best_score"] and score > 0
"is_new_best": score == run["best_score"] and score > 0,
"has_test_cases": len(run["test_cases"]) > 0
})