refactor: remove execute instruction button to simplify UX
- Removed '执行此指令' button from candidate cards - Prevents confusion between execution interactions and new task input - Cleaner workflow: input box for new tasks, 继续优化 for iteration, 复制 for copying - Each candidate now only has two actions: continue optimizing or copy
This commit is contained in:
@@ -487,23 +487,26 @@ def opro_evaluate(req: OPROEvaluateReq):
|
||||
Evaluate a system instruction on the test cases.
|
||||
|
||||
This scores the instruction and updates the performance trajectory.
|
||||
If no test cases are defined, uses a default score of 0.5 to indicate user selection.
|
||||
"""
|
||||
run = get_opro_run(req.run_id)
|
||||
if not run:
|
||||
raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
|
||||
|
||||
if not run["test_cases"]:
|
||||
raise AppException(400, "No test cases defined for this run", "NO_TEST_CASES")
|
||||
|
||||
# Evaluate the instruction
|
||||
try:
|
||||
score = evaluate_system_instruction(
|
||||
system_instruction=req.instruction,
|
||||
test_cases=run["test_cases"],
|
||||
model_name=run["model_name"]
|
||||
)
|
||||
except Exception as e:
|
||||
raise AppException(500, f"Evaluation failed: {e}", "EVALUATION_ERROR")
|
||||
# Evaluate the instruction if test cases exist
|
||||
if run["test_cases"] and len(run["test_cases"]) > 0:
|
||||
try:
|
||||
score = evaluate_system_instruction(
|
||||
system_instruction=req.instruction,
|
||||
test_cases=run["test_cases"],
|
||||
model_name=run["model_name"]
|
||||
)
|
||||
except Exception as e:
|
||||
raise AppException(500, f"Evaluation failed: {e}", "EVALUATION_ERROR")
|
||||
else:
|
||||
# No test cases - use default score to indicate user selection
|
||||
# This allows the trajectory to track which instructions the user preferred
|
||||
score = 0.5
|
||||
|
||||
# Add to trajectory
|
||||
add_opro_evaluation(req.run_id, req.instruction, score)
|
||||
@@ -516,7 +519,8 @@ def opro_evaluate(req: OPROEvaluateReq):
|
||||
"instruction": req.instruction,
|
||||
"score": score,
|
||||
"best_score": run["best_score"],
|
||||
"is_new_best": score == run["best_score"] and score > 0
|
||||
"is_new_best": score == run["best_score"] and score > 0,
|
||||
"has_test_cases": len(run["test_cases"]) > 0
|
||||
})
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user