refactor: remove execute instruction button to simplify UX

- Removed '执行此指令' button from candidate cards
- Prevents confusion between execution interactions and new task input
- Cleaner workflow: input box for new tasks, 继续优化 for iteration, 复制 for copying
- Each candidate now only has two actions: continue optimizing or copy
This commit is contained in:
2025-12-06 22:41:05 +08:00
parent da30a0999c
commit 602875b08c
2 changed files with 115 additions and 54 deletions

View File

@@ -487,15 +487,14 @@ def opro_evaluate(req: OPROEvaluateReq):
Evaluate a system instruction on the test cases. Evaluate a system instruction on the test cases.
This scores the instruction and updates the performance trajectory. This scores the instruction and updates the performance trajectory.
If no test cases are defined, uses a default score of 0.5 to indicate user selection.
""" """
run = get_opro_run(req.run_id) run = get_opro_run(req.run_id)
if not run: if not run:
raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND") raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
if not run["test_cases"]: # Evaluate the instruction if test cases exist
raise AppException(400, "No test cases defined for this run", "NO_TEST_CASES") if run["test_cases"] and len(run["test_cases"]) > 0:
# Evaluate the instruction
try: try:
score = evaluate_system_instruction( score = evaluate_system_instruction(
system_instruction=req.instruction, system_instruction=req.instruction,
@@ -504,6 +503,10 @@ def opro_evaluate(req: OPROEvaluateReq):
) )
except Exception as e: except Exception as e:
raise AppException(500, f"Evaluation failed: {e}", "EVALUATION_ERROR") raise AppException(500, f"Evaluation failed: {e}", "EVALUATION_ERROR")
else:
# No test cases - use default score to indicate user selection
# This allows the trajectory to track which instructions the user preferred
score = 0.5
# Add to trajectory # Add to trajectory
add_opro_evaluation(req.run_id, req.instruction, score) add_opro_evaluation(req.run_id, req.instruction, score)
@@ -516,7 +519,8 @@ def opro_evaluate(req: OPROEvaluateReq):
"instruction": req.instruction, "instruction": req.instruction,
"score": score, "score": score,
"best_score": run["best_score"], "best_score": run["best_score"],
"is_new_best": score == run["best_score"] and score > 0 "is_new_best": score == run["best_score"] and score > 0,
"has_test_cases": len(run["test_cases"]) > 0
}) })

View File

@@ -55,6 +55,8 @@
const [currentSessionRuns, setCurrentSessionRuns] = useState([]); const [currentSessionRuns, setCurrentSessionRuns] = useState([]);
const [currentRunId, setCurrentRunId] = useState(null); const [currentRunId, setCurrentRunId] = useState(null);
const [messages, setMessages] = useState([]); const [messages, setMessages] = useState([]);
const [sessionMessages, setSessionMessages] = useState({}); // Store messages per session
const [sessionLastRunId, setSessionLastRunId] = useState({}); // Store last run ID per session
const [inputValue, setInputValue] = useState(''); const [inputValue, setInputValue] = useState('');
const [loading, setLoading] = useState(false); const [loading, setLoading] = useState(false);
const [models, setModels] = useState([]); const [models, setModels] = useState([]);
@@ -128,6 +130,7 @@
setCurrentSessionRuns([]); setCurrentSessionRuns([]);
setCurrentRunId(null); setCurrentRunId(null);
setMessages([]); setMessages([]);
setSessionMessages(prev => ({ ...prev, [sessionId]: [] })); // Initialize empty messages for new session
// Reload sessions list // Reload sessions list
await loadSessions(); await loadSessions();
@@ -169,8 +172,23 @@
const runId = data.data.run_id; const runId = data.data.run_id;
setCurrentRunId(runId); setCurrentRunId(runId);
// Add user message // Save this as the last run for this session
setMessages([{ role: 'user', content: taskDescription }]); setSessionLastRunId(prev => ({
...prev,
[sessionId]: runId
}));
// Add user message to existing messages (keep chat history)
const newUserMessage = { role: 'user', content: taskDescription };
setMessages(prev => {
const updated = [...prev, newUserMessage];
// Save to session messages
setSessionMessages(prevSessions => ({
...prevSessions,
[sessionId]: updated
}));
return updated;
});
// Generate and evaluate candidates // Generate and evaluate candidates
await generateCandidates(runId); await generateCandidates(runId);
@@ -208,12 +226,23 @@
} }
// Add assistant message with candidates // Add assistant message with candidates
setMessages(prev => [...prev, { const newAssistantMessage = {
role: 'assistant', role: 'assistant',
type: 'candidates', type: 'candidates',
candidates: data.data.candidates, candidates: data.data.candidates,
iteration: data.data.iteration iteration: data.data.iteration
}]); };
setMessages(prev => {
const updated = [...prev, newAssistantMessage];
// Save to session messages
if (currentSessionId) {
setSessionMessages(prevSessions => ({
...prevSessions,
[currentSessionId]: updated
}));
}
return updated;
});
} catch (err) { } catch (err) {
alert('生成候选指令失败: ' + err.message); alert('生成候选指令失败: ' + err.message);
console.error('Error generating candidates:', err); console.error('Error generating candidates:', err);
@@ -241,12 +270,23 @@
} }
// Add execution result // Add execution result
setMessages(prev => [...prev, { const newExecutionMessage = {
role: 'assistant', role: 'assistant',
type: 'execution', type: 'execution',
instruction: instruction, instruction: instruction,
response: data.data.response response: data.data.response
}]); };
setMessages(prev => {
const updated = [...prev, newExecutionMessage];
// Save to session messages
if (currentSessionId) {
setSessionMessages(prevSessions => ({
...prevSessions,
[currentSessionId]: updated
}));
}
return updated;
});
} catch (err) { } catch (err) {
alert('执行失败: ' + err.message); alert('执行失败: ' + err.message);
} finally { } finally {
@@ -260,19 +300,45 @@
setInputValue(''); setInputValue('');
if (!currentRunId) { // Always create a new run with the message as task description
// Create new run with task description
createNewRun(msg); createNewRun(msg);
} else { }
// Continue optimization or execute
// For now, just show message async function handleContinueOptimize(selectedInstruction, selectedScore) {
setMessages(prev => [...prev, { role: 'user', content: msg }]); if (!currentRunId || loading) return;
// First, evaluate the selected instruction to add it to trajectory
if (selectedInstruction) {
setLoading(true);
try {
// Add the selected instruction to trajectory
const res = await fetch(`${API_BASE}/opro/evaluate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
run_id: currentRunId,
instruction: selectedInstruction
})
});
const data = await res.json();
if (!data.success) {
throw new Error(data.error || 'Failed to evaluate instruction');
}
console.log('Evaluated instruction, score:', data.data.score);
} catch (err) {
alert('评估指令失败: ' + err.message);
console.error('Error evaluating instruction:', err);
setLoading(false);
return;
} finally {
setLoading(false);
} }
} }
function handleContinueOptimize() { // Then generate new candidates based on updated trajectory
if (!currentRunId || loading) return; await generateCandidates(currentRunId);
generateCandidates(currentRunId);
} }
function handleExecute(instruction) { function handleExecute(instruction) {
@@ -310,8 +376,10 @@
async function handleSelectSession(sessionId) { async function handleSelectSession(sessionId) {
setCurrentSessionId(sessionId); setCurrentSessionId(sessionId);
setCurrentRunId(null); // Restore the last run ID for this session
setMessages([]); setCurrentRunId(sessionLastRunId[sessionId] || null);
// Load messages from session storage
setMessages(sessionMessages[sessionId] || []);
await loadSessionRuns(sessionId); await loadSessionRuns(sessionId);
} }
@@ -425,8 +493,7 @@
// Main Chat Area // Main Chat Area
React.createElement('div', { className: 'flex-1 flex flex-col bg-white' }, React.createElement('div', { className: 'flex-1 flex flex-col bg-white' },
// Header // Header
React.createElement('div', { className: 'px-4 py-3 border-b border-gray-200 bg-white flex items-center justify-between' }, React.createElement('div', { className: 'px-4 py-3 border-b border-gray-200 bg-white flex items-center gap-3' },
React.createElement('div', { className: 'flex items-center gap-3' },
React.createElement('h1', { className: 'text-lg font-normal text-gray-800' }, React.createElement('h1', { className: 'text-lg font-normal text-gray-800' },
'OPRO' 'OPRO'
), ),
@@ -434,11 +501,6 @@
sessions.find(s => s.session_id === currentSessionId)?.session_name || '当前会话' sessions.find(s => s.session_id === currentSessionId)?.session_name || '当前会话'
) )
), ),
currentSessionId && React.createElement('button', {
onClick: handleNewTask,
className: 'px-3 py-1.5 text-sm bg-white border border-gray-300 hover:bg-gray-50 rounded-lg transition-colors text-gray-700'
}, '+ 新建任务')
),
// Chat Messages // Chat Messages
React.createElement('div', { className: 'flex-1 overflow-y-auto scrollbar-hide p-6 space-y-6 max-w-4xl mx-auto w-full' }, React.createElement('div', { className: 'flex-1 overflow-y-auto scrollbar-hide p-6 space-y-6 max-w-4xl mx-auto w-full' },
@@ -475,7 +537,7 @@
), ),
React.createElement('div', { className: 'flex gap-2' }, React.createElement('div', { className: 'flex gap-2' },
React.createElement('button', { React.createElement('button', {
onClick: handleContinueOptimize, onClick: () => handleContinueOptimize(cand.instruction, cand.score),
disabled: loading, disabled: loading,
className: 'px-4 py-2 bg-white border border-gray-300 text-gray-700 rounded-lg hover:bg-gray-50 disabled:bg-gray-100 disabled:text-gray-400 disabled:cursor-not-allowed transition-colors text-sm font-medium' className: 'px-4 py-2 bg-white border border-gray-300 text-gray-700 rounded-lg hover:bg-gray-50 disabled:bg-gray-100 disabled:text-gray-400 disabled:cursor-not-allowed transition-colors text-sm font-medium'
}, '继续优化'), }, '继续优化'),
@@ -488,12 +550,7 @@
React.createElement('path', { d: 'M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1' }) React.createElement('path', { d: 'M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1' })
), ),
'复制' '复制'
), )
React.createElement('button', {
onClick: () => handleExecute(cand.instruction),
disabled: loading,
className: 'px-4 py-2 bg-gray-900 text-white rounded-lg hover:bg-gray-800 disabled:bg-gray-300 disabled:cursor-not-allowed transition-colors text-sm font-medium'
}, '执行此指令')
) )
) )
) )
@@ -536,7 +593,7 @@
handleSendMessage(); handleSendMessage();
} }
}, },
placeholder: currentRunId ? '输入消息...' : '在此输入提示词', placeholder: '输入任务描述,创建新的优化任务...',
disabled: loading, disabled: loading,
rows: 3, rows: 3,
className: 'w-full px-5 pt-4 pb-2 bg-transparent focus:outline-none disabled:bg-transparent text-gray-800 placeholder-gray-500 resize-none' className: 'w-full px-5 pt-4 pb-2 bg-transparent focus:outline-none disabled:bg-transparent text-gray-800 placeholder-gray-500 resize-none'
@@ -573,10 +630,10 @@
) )
) )
), ),
!currentRunId && React.createElement('div', { className: 'text-xs text-gray-500 mt-3 px-4' }, React.createElement('div', { className: 'text-xs text-gray-500 mt-3 px-4' },
currentSessionId currentSessionId
? '输入任务描述AI 将为你生成优化的系统指令' ? '输入任务描述AI 将为你生成优化的系统指令'
: '点击左侧"新建会话"开始,或输入任务描述自动创建会话' : '点击左侧"新建会话"开始,或直接输入任务描述自动创建会话'
) )
) )
) )