DocReRank: Multi-Modal Reranker
This is the official model from the paper:
π DocReRank: Single-Page Hard Negative Query Generation for Training Multi-Modal RAG Rerankers
See Project Page for more information.
β Model Overview
- Base model: Qwen/Qwen2-VL-2B-Instruct
- Architecture: Vision-Language reranker
- Fine-tuning method: PEFT (LoRA)
- Training data: Generated by Single-Page Hard Negative Query Generation Pipeline.
- Purpose: Improves second-stage reranking for Retrieval-Augmented Generation (RAG) in multimodal scenarios.
β How to Use
This adapter requires the base Qwen2-VL model.
from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
from peft import PeftModel
import torch
from PIL import Image
from huggingface_hub import hf_hub_download
import os
# β
Load base model
base_model = Qwen2VLForConditionalGeneration.from_pretrained(
"Qwen/Qwen2-VL-2B-Instruct",
torch_dtype=torch.bfloat16,
device_map="cuda"
)
# β
Load DocReRank adapter
model = PeftModel.from_pretrained(base_model, "DocReRank/DocReRank-Reranker").eval()
# β
Load processor
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
processor.image_processor.min_pixels = 200704
processor.image_processor.max_pixels = 589824
# β
Define query and images
query_text = "What are the performances of the DocReRank model on restaurant and biomedical benchmarks?"
# query_text = "Are there ablation results for the DocReRank model?"
# Downloading Pages for Demo
save_dir = os.path.join(os.getcwd(), "paper_pages")
os.makedirs(save_dir, exist_ok=True)
image_files = ["DocReRank_paper_page_2.png","DocReRank_paper_page_4.png","DocReRank_paper_page_6.png","DocReRank_paper_page_8.png"]
local_paths = []
for f in image_files:
local_path = hf_hub_download(repo_id="DocReRank/DocReRank-Reranker",filename=f,local_dir=save_dir)
local_paths.append(local_path)
print("β
Files downloaded to:", local_paths)
image_paths = [ "paper_pages/DocReRank_paper_page_2.png", "paper_pages/DocReRank_paper_page_4.png", "paper_pages/DocReRank_paper_page_6.png", "paper_pages/DocReRank_paper_page_8.png"]
# β
Reranking prompt template
def compute_score(image_path, query_text):
image = Image.open(image_path)
prompt = f"Assert the relevance of the previous image document to the following query, answer True or False. The query is: {query_text}"
messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": prompt}]}]
# Tokenize
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = processor(text=text, images=image, return_tensors="pt").to(model.device, torch.bfloat16)
# Compute logits
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits[:, -1, :]
true_id = processor.tokenizer.convert_tokens_to_ids("True")
false_id = processor.tokenizer.convert_tokens_to_ids("False")
probs = torch.softmax(logits[:, [true_id, false_id]], dim=-1)
relevance_score = probs[0, 0].item() # Probability of "True"
return relevance_score
# β
Compute scores for both images
scores = [(img, compute_score(img, query_text)) for img in image_paths]
# β
Print results
for img, score in scores:
print(f"Image: {img} | Relevance Score: {score:.4f}")
Citation
If you use this dataset, please cite:
@article{wasserman2025docrerank,
title={DocReRank: Single-Page Hard Negative Query Generation for Training Multi-Modal RAG Rerankers},
author={Wasserman, Navve and Heinimann, Oliver and Golbari, Yuval and Zimbalist, Tal and Schwartz, Eli and Irani, Michal},
journal={arXiv preprint arXiv:2505.22584},
year={2025}
}
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
π
Ask for provider support