返回模型
说明文档
原始模型在此
转换为 onnx 和量化的代码在此
依赖项:
pip install huggingface-hub onnx onnxruntime numpy tokenizers
推理代码:
from huggingface_hub import hf_hub_download
from tokenizers import Tokenizer
import onnxruntime as ort
import numpy as np
class GLiClassOnnxInference:
def __init__(self, model_id: str, use_int8_quant: bool = False):
self.onnx_runtime_session = ort.InferenceSession(
hf_hub_download(repo_id=model_id, filename=\"model_i8.onnx\" if use_int8_quant else \"model.onnx\")
)
self.tokenizer = Tokenizer.from_file(
hf_hub_download(repo_id=model_id, filename=\"tokenizer.json\")
)
def encode(self, text: str, max_length: int = 512, pad: bool = True):
encoded = self.tokenizer.encode(text)
ids = encoded.ids
mask = encoded.attention_mask
if pad and len(ids) < max_length:
pad_len = max_length - len(ids)
ids += [self.tokenizer.token_to_id(\"[PAD]\")] * pad_len
mask += [0] * pad_len
ids = ids[:max_length]
mask = mask[:max_length]
return np.array([ids], dtype=np.int64), np.array([mask], dtype=np.int64)
def onnx_predict(self, text: str, labels: list[str]):
full_text = \"\".join([f\"<<LABEL>>{l}\" for l in labels]) + \"<<SEP>>\" + text
ids, mask = self.encode(full_text, max_length=512)
ort_inputs = {\"input_ids\": ids, \"attention_mask\": mask}
logits = self.onnx_runtime_session.run(None, ort_inputs)[0]
probs = 1 / (1 + np.exp(-logits[0]))
return [{\"label\": label, \"score\": float(prob)} for label, prob in zip(labels, probs)]
inference_session = GLiClassOnnxInference(
\"cnmoro/gliclass-edge-v3.0-onnx\",
use_int8_quant = False
)
results = inference_session.onnx_predict(
text = \"One day I will see the world!\",
labels = [\"travel\", \"dreams\", \"sport\", \"science\", \"politics\"]
)
for r in results:
print(f\"{r['label']} => {r['score']:.3f}\")
cnmoro/gliclass-edge-v3.0-onnx
作者 cnmoro
text-classification
onnx
↓ 0
♥ 1
创建时间: 2025-07-29 22:01:38+00:00
更新时间: 2025-07-30 12:53:04+00:00
在 Hugging Face 上查看文件 (5)
.gitattributes
README.md
model.onnx
ONNX
model_i8.onnx
ONNX
tokenizer.json