import requests import re import config OLLAMA_URL = config.OLLAMA_GENERATE_URL TAGS_URL = config.OLLAMA_TAGS_URL MODEL_NAME = config.DEFAULT_CHAT_MODEL def call_qwen(prompt: str, temperature: float = 0.8, max_tokens: int = 512, model_name: str | None = None) -> str: def _payload(m: str): return { "model": m, "prompt": prompt, "stream": False, "options": { "temperature": temperature, "num_predict": max_tokens } } primary = model_name or MODEL_NAME try: resp = requests.post(OLLAMA_URL, json=_payload(primary), timeout=60) resp.raise_for_status() data = resp.json() return data.get("response", "") or data.get("text", "") except requests.HTTPError as e: # Try fallback to default when user-selected model fails if model_name and model_name != MODEL_NAME: try: resp = requests.post(OLLAMA_URL, json=_payload(MODEL_NAME), timeout=60) resp.raise_for_status() data = resp.json() return data.get("response", "") or data.get("text", "") except Exception: pass raise def list_models() -> list[str]: try: r = requests.get(TAGS_URL, timeout=10) r.raise_for_status() data = r.json() or {} items = data.get("models") or [] names = [] for m in items: name = m.get("name") or m.get("model") if name: names.append(name) names = [n for n in names if not re.search(r"embedding|rerank|reranker|bge", n, re.I)] return names except Exception: return [MODEL_NAME]