import re
import numpy as np
from typing import List, Tuple
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics.pairwise import cosine_similarity
import config

from .ollama_client import call_qwen
from .xinference_client import embed_texts
from .prompt_utils import (
    refine_instruction,
    refine_instruction_with_history,
    generate_initial_system_instruction_candidates,
    generate_optimized_system_instruction
)

def parse_candidates(raw: str) -> list:
    lines = [l.strip() for l in re.split(r'\r?\n', raw) if l.strip()]
    cleaned = []
    for l in lines:
        l = re.sub(r'^[\-\*\d\.\)\s]+', '', l).strip()
        if len(l) >= 6:
            cleaned.append(l)
    return list(dict.fromkeys(cleaned))

def cluster_and_select(candidates: list, top_k=config.TOP_K, distance_threshold=config.CLUSTER_DISTANCE_THRESHOLD):
    if not candidates:
        return []
    if len(candidates) <= top_k:
        return candidates
    vecs = embed_texts(candidates)
    if not vecs or len(vecs) != len(candidates):
        return candidates[:top_k]
    X = np.array(vecs)

    clustering = AgglomerativeClustering(n_clusters=None,
                                         distance_threshold=distance_threshold,
                                         metric="cosine",
                                         linkage="average")
    labels = clustering.fit_predict(X)

    selected_idx = []       
    for label in sorted(set(labels)):
        idxs = [i for i,l in enumerate(labels) if l == label]
        sims = cosine_similarity(X[idxs]).mean(axis=1)
        rep = idxs[int(np.argmax(sims))]
        selected_idx.append(rep)

    selected = [candidates[i] for i in sorted(selected_idx)]
    return selected[:top_k]

def generate_candidates(query: str, rejected=None, top_k=config.TOP_K, model_name=None):
    """
    LEGACY: Query rewriting function (NOT true OPRO).
    Kept for backward compatibility with existing API endpoints.
    """
    rejected = rejected or []
    if rejected:
        prompt = refine_instruction_with_history(query, rejected)
    else:
        prompt = refine_instruction(query)

    raw = call_qwen(prompt, temperature=0.9, max_tokens=1024, model_name=model_name)
    all_candidates = parse_candidates(raw)
    return cluster_and_select(all_candidates, top_k=top_k)


# ============================================================================
# TRUE OPRO FUNCTIONS (System Instruction Optimization)
# ============================================================================

def generate_system_instruction_candidates(
    task_description: str,
    trajectory: List[Tuple[str, float]] = None,
    top_k: int = config.TOP_K,
    pool_size: int = None,
    model_name: str = None
) -> List[str]:
    """
    TRUE OPRO: Generates optimized system instruction candidates.

    This is the core OPRO function that generates system instructions based on
    performance trajectory (if available) or initial candidates (if starting fresh).

    Args:
        task_description: Description of the task the LLM should perform
        trajectory: Optional list of (instruction, score) tuples from previous iterations
        top_k: Number of diverse candidates to return (default: config.TOP_K = 5)
        pool_size: Number of candidates to generate before clustering (default: config.GENERATION_POOL_SIZE = 10)
        model_name: Optional model name to use for generation

    Returns:
        List of top-k diverse system instruction candidates
    """
    pool_size = pool_size or config.GENERATION_POOL_SIZE

    # Generate the meta-prompt based on whether we have trajectory data
    if trajectory and len(trajectory) > 0:
        # Sort trajectory by score (highest first)
        sorted_trajectory = sorted(trajectory, key=lambda x: x[1], reverse=True)
        meta_prompt = generate_optimized_system_instruction(task_description, sorted_trajectory, pool_size)
    else:
        # No trajectory yet, generate initial candidates
        meta_prompt = generate_initial_system_instruction_candidates(task_description, pool_size)

    # Use the optimizer LLM to generate candidates
    raw = call_qwen(meta_prompt, temperature=0.9, max_tokens=1024, model_name=model_name)

    # Parse the generated candidates
    all_candidates = parse_candidates(raw)

    # Cluster and select diverse representatives
    return cluster_and_select(all_candidates, top_k=top_k)


def evaluate_system_instruction(
    system_instruction: str,
    test_cases: List[Tuple[str, str]],
    model_name: str = None
) -> float:
    """
    TRUE OPRO: Evaluates a system instruction's performance on test cases.

    Args:
        system_instruction: The system instruction to evaluate
        test_cases: List of (input, expected_output) tuples
        model_name: Optional model name to use for evaluation

    Returns:
        Performance score (0.0 to 1.0)
    """
    if not test_cases:
        return 0.0

    correct = 0
    total = len(test_cases)

    for input_text, expected_output in test_cases:
        # Construct the full prompt with system instruction
        full_prompt = f"{system_instruction}\n\n{input_text}"

        # Get LLM response
        response = call_qwen(full_prompt, temperature=0.2, max_tokens=512, model_name=model_name)

        # Simple exact match scoring (can be replaced with more sophisticated metrics)
        if expected_output.strip().lower() in response.strip().lower():
            correct += 1

    return correct / total