from smolagents import Tool from transformers import pipeline class ConvertImageToTextTool(Tool): name = "convert_image_to_text" description = "Transcribe an image file to text using a free Hugging Face template." inputs = { "image_path": { "type": "string", "description": "The path of the image file to elaborate" } } output_type = "string" def __init__(self): super().__init__() self.model = "nlpconnect/vit-gpt2-image-captioning" self.transcriber = pipeline( "image-to-text", model=self.model, use_fast=True ) def forward(self, image_path: str) -> str: try: result = self.transcriber(image_path) return f"Image description: {result[0]['generated_text']}" except Exception as e: return f"Error convert_image_to_text is not working properly, error: {e}, please skip this tool"