| from fastapi import FastAPI, UploadFile, File |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| from PIL import Image |
| import torch |
| import io |
|
|
| app = FastAPI() |
|
|
| |
| model_id = "THUDM/glm-4v-9b" |
| tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) |
| model = model_id.from_pretrained(model_id, trust_remote_code=True).cpu().eval() |
|
|
| @app.post("/convert") |
| async def convert_image(file: UploadFile = File(...)): |
| image_data = await file.read() |
| image = Image.open(io.BytesIO(image_data)).convert("RGB") |
| |
| |
| prompt = "Read this chart and output the data as a clean HTML table with headers." |
| |
| inputs = tokenizer.apply_chat_template([{"role": "user", "image": image, "content": prompt}], |
| add_generation_prompt=True, tokenize=True, return_tensors="pt") |
| |
| with torch.no_grad(): |
| outputs = model.generate(**inputs, max_new_tokens=1000) |
| |
| response = tokenizer.decode(outputs[0]) |
| return {"html_result": response} |