原始代码
This commit is contained in:
52
_qwen_xinference_demo/opro/ollama_client.py
Normal file
52
_qwen_xinference_demo/opro/ollama_client.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import requests
|
||||
import re
|
||||
import config
|
||||
|
||||
OLLAMA_URL = config.OLLAMA_GENERATE_URL
|
||||
TAGS_URL = config.OLLAMA_TAGS_URL
|
||||
MODEL_NAME = config.DEFAULT_CHAT_MODEL
|
||||
|
||||
def call_qwen(prompt: str, temperature: float = 0.8, max_tokens: int = 512, model_name: str | None = None) -> str:
|
||||
def _payload(m: str):
|
||||
return {
|
||||
"model": m,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": temperature,
|
||||
"num_predict": max_tokens
|
||||
}
|
||||
}
|
||||
primary = model_name or MODEL_NAME
|
||||
try:
|
||||
resp = requests.post(OLLAMA_URL, json=_payload(primary), timeout=60)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return data.get("response", "") or data.get("text", "")
|
||||
except requests.HTTPError as e:
|
||||
# Try fallback to default when user-selected model fails
|
||||
if model_name and model_name != MODEL_NAME:
|
||||
try:
|
||||
resp = requests.post(OLLAMA_URL, json=_payload(MODEL_NAME), timeout=60)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return data.get("response", "") or data.get("text", "")
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
|
||||
def list_models() -> list[str]:
|
||||
try:
|
||||
r = requests.get(TAGS_URL, timeout=10)
|
||||
r.raise_for_status()
|
||||
data = r.json() or {}
|
||||
items = data.get("models") or []
|
||||
names = []
|
||||
for m in items:
|
||||
name = m.get("name") or m.get("model")
|
||||
if name:
|
||||
names.append(name)
|
||||
names = [n for n in names if not re.search(r"embedding|rerank|reranker|bge", n, re.I)]
|
||||
return names
|
||||
except Exception:
|
||||
return [MODEL_NAME]
|
||||
Reference in New Issue
Block a user