返回模型

说明文档

Jina Reranker M0 - ONNX FP16 版本

本仓库包含 jinaai/jina-reranker-m0 模型转换为 ONNX 格式并采用 FP16 精度的版本。

模型描述

Jina Reranker 旨在根据搜索结果或文档段落与给定查询的相关性对其进行重新排序。它接受一个查询和一组文档作为输入，并输出相关性分数。

此版本专门导出用于 ONNX Runtime。

原始模型卡片: jinaai/jina-reranker-m0

技术细节

格式: ONNX
Opset: 14
精度: FP16 (使用 .half() 导出)
外部数据: 由于模型大小，使用 ONNX 外部数据格式。本仓库中的所有文件都是必需的。huggingface_hub 会自动处理下载。
导出来源: 使用 torch.onnx.export 从 Hugging Face transformers 库导出。

使用方法

您可以使用 onnxruntime 进行推理。您还需要 transformers 库来加载适当的处理器以准备输入，以及 huggingface_hub 来下载模型文件。

1. 安装:

pip install onnxruntime huggingface_hub transformers torch sentencepiece

2. 推理脚本:

import onnxruntime as ort
from huggingface_hub import hf_hub_download
from transformers import AutoProcessor
import numpy as np
import torch # 用于处理器输出处理

# --- 配置 ---
# 如有不同，请替换为您的仓库 ID
repo_id = "jian-mo/jina-reranker-m0-onnx"
onnx_filename = "jina-reranker-m0.onnx" # 主 ONNX 文件名
# 使用原始模型 ID 加载正确的处理器
original_model_id = "jinaai/jina-reranker-m0"
# --- 配置结束 ---

# 1. 从 Hub 下载 ONNX 模型文件
# hf_hub_download 会自动处理通过 LFS 链接的外部数据文件
print(f"正在从 {repo_id} 下载 ONNX 模型...")
local_onnx_path = hf_hub_download(
    repo_id=repo_id,
    filename=onnx_filename
)
print(f"ONNX 模型已下载至: {local_onnx_path}")

# 2. 加载 ONNX Runtime 会话
print("正在加载 ONNX 推理会话...")
# 您可以选择执行提供程序，例如 ['CUDAExecutionProvider', 'CPUExecutionProvider']
# 如果您有 GPU 支持和必要的 onnxruntime 构建。
session_options = ort.SessionOptions()
# session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
providers = ['CPUExecutionProvider'] # 默认使用 CPU
session = ort.InferenceSession(local_onnx_path, sess_options=session_options, providers=providers)
print(f"ONNX 会话已加载，使用提供程序: {session.get_providers()}")

# 3. 加载处理器
print(f"正在从 {original_model_id} 加载处理器...")
processor = AutoProcessor.from_pretrained(original_model_id, trust_remote_code=True)
print("处理器已加载。")

# 4. 准备输入数据
query = "什么是深度学习？"
document = "深度学习是基于具有表示学习的人工神经网络的机器学习的一个子集。"
# 多文档示例（批处理）
# documents = [
#     "深度学习是基于具有表示学习的人工神经网络的机器学习的一个子集。",
#     "人工智能是指机器对人类智能的模拟。",
#     "Transformer 是一种深度学习模型，主要用于自然语言处理领域。"
# ]
# 如有需要，使用适合查询 + 多文档的处理器逻辑

print("正在准备输入数据...")
# 按照重排序模型的预期，一起处理查询和文档
inputs = processor(
    text=f"{query} {document}",
    images=None, # 假设仅为文本重排序
    return_tensors="pt", # 首先获取 PyTorch 张量
    padding=True,
    truncation=True,
    max_length=512 # 使用合理的 max_length
)

# 转换为 NumPy 以供 ONNX Runtime 使用
inputs_np = {
    "input_ids": inputs["input_ids"].numpy(),
    "attention_mask": inputs["attention_mask"].numpy()
}
print("输入数据已准备。")
# print("输入形状:", {k: v.shape for k, v in inputs_np.items()})

# 5. 运行推理
print("正在运行推理...")
output_names = [output.name for output in session.get_outputs()]
outputs = session.run(output_names, inputs_np)
print("推理完成。")

# 6. 处理输出
# 确切的解释取决于模型的输出结构。
# 对于 Jina Reranker，输出通常是 logit 分数。
# 较高的值通常表示较高的相关性。请查看原始模型卡片。
print(f"输出数量: {len(outputs)}")
if len(outputs) > 0:
    logits = outputs[0]
    print(f"输出 logits 形状: {logits.shape}")
    # 通常，相关性分数与

jian-mo/jina-reranker-m0-onnx

作者 jian-mo

sentence-similarity onnx

↓ 0 ♥ 2

创建时间: 2025-04-09 21:04:58+00:00

更新时间: 2025-04-09 21:11:21+00:00

在 Hugging Face 上查看

文件 (287)

.gitattributes

README.md

jina-reranker-m0.onnx ONNX

model.embed_tokens.weight

model.layers.0.input_layernorm.weight

model.layers.0.post_attention_layernorm.weight

model.layers.0.self_attn.q_proj.bias

model.layers.1.input_layernorm.weight

model.layers.1.post_attention_layernorm.weight

model.layers.1.self_attn.q_proj.bias

model.layers.10.input_layernorm.weight

model.layers.10.post_attention_layernorm.weight

model.layers.10.self_attn.q_proj.bias

model.layers.11.input_layernorm.weight

model.layers.11.post_attention_layernorm.weight

model.layers.11.self_attn.q_proj.bias

model.layers.12.input_layernorm.weight

model.layers.12.post_attention_layernorm.weight

model.layers.12.self_attn.q_proj.bias

model.layers.13.input_layernorm.weight

model.layers.13.post_attention_layernorm.weight

model.layers.13.self_attn.q_proj.bias

model.layers.14.input_layernorm.weight

model.layers.14.post_attention_layernorm.weight

model.layers.14.self_attn.q_proj.bias

model.layers.15.input_layernorm.weight

model.layers.15.post_attention_layernorm.weight

model.layers.15.self_attn.q_proj.bias

model.layers.16.input_layernorm.weight

model.layers.16.post_attention_layernorm.weight

model.layers.16.self_attn.q_proj.bias

model.layers.17.input_layernorm.weight

model.layers.17.post_attention_layernorm.weight

model.layers.17.self_attn.q_proj.bias

model.layers.18.input_layernorm.weight

model.layers.18.post_attention_layernorm.weight

model.layers.18.self_attn.q_proj.bias

model.layers.19.input_layernorm.weight

model.layers.19.post_attention_layernorm.weight

model.layers.19.self_attn.q_proj.bias

model.layers.2.input_layernorm.weight

model.layers.2.post_attention_layernorm.weight

model.layers.2.self_attn.q_proj.bias

model.layers.20.input_layernorm.weight

model.layers.20.post_attention_layernorm.weight

model.layers.20.self_attn.q_proj.bias

model.layers.21.input_layernorm.weight

model.layers.21.post_attention_layernorm.weight

model.layers.21.self_attn.q_proj.bias

model.layers.22.input_layernorm.weight

model.layers.22.post_attention_layernorm.weight

model.layers.22.self_attn.q_proj.bias

model.layers.23.input_layernorm.weight

model.layers.23.post_attention_layernorm.weight

model.layers.23.self_attn.q_proj.bias

model.layers.24.input_layernorm.weight

model.layers.24.post_attention_layernorm.weight

model.layers.24.self_attn.q_proj.bias

model.layers.25.input_layernorm.weight

model.layers.25.post_attention_layernorm.weight

model.layers.25.self_attn.q_proj.bias

model.layers.26.input_layernorm.weight

model.layers.26.post_attention_layernorm.weight

model.layers.26.self_attn.q_proj.bias

model.layers.27.input_layernorm.weight

model.layers.27.post_attention_layernorm.weight

model.layers.27.self_attn.q_proj.bias

model.layers.3.input_layernorm.weight

model.layers.3.post_attention_layernorm.weight

model.layers.3.self_attn.q_proj.bias

model.layers.4.input_layernorm.weight

model.layers.4.post_attention_layernorm.weight

model.layers.4.self_attn.q_proj.bias

model.layers.5.input_layernorm.weight

model.layers.5.post_attention_layernorm.weight

model.layers.5.self_attn.q_proj.bias

model.layers.6.input_layernorm.weight

model.layers.6.post_attention_layernorm.weight

model.layers.6.self_attn.q_proj.bias

model.layers.7.input_layernorm.weight

model.layers.7.post_attention_layernorm.weight

model.layers.7.self_attn.q_proj.bias

model.layers.8.input_layernorm.weight

model.layers.8.post_attention_layernorm.weight

model.layers.8.self_attn.q_proj.bias

model.layers.9.input_layernorm.weight

model.layers.9.post_attention_layernorm.weight

model.layers.9.self_attn.q_proj.bias

model.norm.weight

onnx__MatMul_8817

onnx__MatMul_8818

onnx__MatMul_8819

onnx__MatMul_8844

onnx__MatMul_8845

onnx__MatMul_8846

onnx__MatMul_8847

onnx__MatMul_8848

onnx__MatMul_8849

onnx__MatMul_8850

onnx__MatMul_8875

onnx__MatMul_8876

onnx__MatMul_8877

onnx__MatMul_8878

onnx__MatMul_8879

onnx__MatMul_8880

onnx__MatMul_8881

onnx__MatMul_8906

onnx__MatMul_8907

onnx__MatMul_8908

onnx__MatMul_8909

onnx__MatMul_8910

onnx__MatMul_8911

onnx__MatMul_8912

onnx__MatMul_8937

onnx__MatMul_8938

onnx__MatMul_8939

onnx__MatMul_8940

onnx__MatMul_8941

onnx__MatMul_8942

onnx__MatMul_8943

onnx__MatMul_8968

onnx__MatMul_8969

onnx__MatMul_8970

onnx__MatMul_8971

onnx__MatMul_8972

onnx__MatMul_8973

onnx__MatMul_8974

onnx__MatMul_8999

onnx__MatMul_9000

onnx__MatMul_9001

onnx__MatMul_9002

onnx__MatMul_9003

onnx__MatMul_9004

onnx__MatMul_9005

onnx__MatMul_9030

onnx__MatMul_9031

onnx__MatMul_9032

onnx__MatMul_9033

onnx__MatMul_9034

onnx__MatMul_9035

onnx__MatMul_9036

onnx__MatMul_9061

onnx__MatMul_9062

onnx__MatMul_9063

onnx__MatMul_9064

onnx__MatMul_9065

onnx__MatMul_9066

onnx__MatMul_9067

onnx__MatMul_9092

onnx__MatMul_9093

onnx__MatMul_9094

onnx__MatMul_9095

onnx__MatMul_9096

onnx__MatMul_9097

onnx__MatMul_9098

onnx__MatMul_9123

onnx__MatMul_9124

onnx__MatMul_9125

onnx__MatMul_9126

onnx__MatMul_9127

onnx__MatMul_9128

onnx__MatMul_9129

onnx__MatMul_9154

onnx__MatMul_9155

onnx__MatMul_9156

onnx__MatMul_9157

onnx__MatMul_9158

onnx__MatMul_9159

onnx__MatMul_9160

onnx__MatMul_9185

onnx__MatMul_9186

onnx__MatMul_9187

onnx__MatMul_9188

onnx__MatMul_9189

onnx__MatMul_9190

onnx__MatMul_9191

onnx__MatMul_9216

onnx__MatMul_9217

onnx__MatMul_9218

onnx__MatMul_9219

onnx__MatMul_9220

onnx__MatMul_9221

onnx__MatMul_9222

onnx__MatMul_9247

onnx__MatMul_9248

onnx__MatMul_9249

onnx__MatMul_9250

onnx__MatMul_9251

onnx__MatMul_9252

onnx__MatMul_9253

onnx__MatMul_9278

onnx__MatMul_9279

onnx__MatMul_9280

onnx__MatMul_9281

onnx__MatMul_9282

onnx__MatMul_9283

onnx__MatMul_9284

onnx__MatMul_9309

onnx__MatMul_9310

onnx__MatMul_9311

onnx__MatMul_9312

onnx__MatMul_9313

onnx__MatMul_9314

onnx__MatMul_9315

onnx__MatMul_9340

onnx__MatMul_9341

onnx__MatMul_9342

onnx__MatMul_9343

onnx__MatMul_9344

onnx__MatMul_9345

onnx__MatMul_9346

onnx__MatMul_9371

onnx__MatMul_9372

onnx__MatMul_9373

onnx__MatMul_9374

onnx__MatMul_9375

onnx__MatMul_9376

onnx__MatMul_9377

onnx__MatMul_9402

onnx__MatMul_9403

onnx__MatMul_9404

onnx__MatMul_9405

onnx__MatMul_9406

onnx__MatMul_9407

onnx__MatMul_9408

onnx__MatMul_9433

onnx__MatMul_9434

onnx__MatMul_9435

onnx__MatMul_9436

onnx__MatMul_9437

onnx__MatMul_9438

onnx__MatMul_9439

onnx__MatMul_9464

onnx__MatMul_9465

onnx__MatMul_9466

onnx__MatMul_9467

onnx__MatMul_9468

onnx__MatMul_9469

onnx__MatMul_9470

onnx__MatMul_9495

onnx__MatMul_9496

onnx__MatMul_9497

onnx__MatMul_9498

onnx__MatMul_9499

onnx__MatMul_9500

onnx__MatMul_9501

onnx__MatMul_9526

onnx__MatMul_9527

onnx__MatMul_9528

onnx__MatMul_9529

onnx__MatMul_9530

onnx__MatMul_9531

onnx__MatMul_9532

onnx__MatMul_9557

onnx__MatMul_9558

onnx__MatMul_9559

onnx__MatMul_9560

onnx__MatMul_9561

onnx__MatMul_9562

onnx__MatMul_9563

onnx__MatMul_9588

onnx__MatMul_9589

onnx__MatMul_9590

onnx__MatMul_9591

onnx__MatMul_9592

onnx__MatMul_9593

onnx__MatMul_9594

onnx__MatMul_9619

onnx__MatMul_9620

onnx__MatMul_9621

onnx__MatMul_9622

onnx__MatMul_9623

onnx__MatMul_9624

onnx__MatMul_9625

onnx__MatMul_9650

onnx__MatMul_9651

onnx__MatMul_9652

onnx__MatMul_9653

onnx__MatMul_9654

onnx__MatMul_9655

onnx__MatMul_9656

onnx__MatMul_9681

onnx__MatMul_9682

onnx__MatMul_9683

onnx__MatMul_9684

onnx__MatMul_9685

readme.md