Fernando Cervan commited on
Commit
f5d0236
·
1 Parent(s): f8138f2

Salvando alterações

Browse files
Files changed (1) hide show
  1. app.py +28 -25
app.py CHANGED
@@ -1,28 +1,31 @@
1
- from transformers import (
2
- PaliGemmaProcessor,
3
- PaliGemmaForConditionalGeneration,
4
- )
5
- from transformers.image_utils import load_image
6
  import torch
7
 
8
- model_id = "google/paligemma2-3b-mix-224"
9
-
10
- url = "cnh-michele-digital.jpeg"
11
- image = load_image(url)
12
-
13
- model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto").eval()
14
- processor = PaliGemmaProcessor.from_pretrained(model_id)
15
-
16
- prompt = "Extraia o nome, cpf, data de nascimento e número de registro. Retorno apenas um JSON com esses dados."
17
- prompt = "describe en"
18
- model_inputs = processor(text=prompt, images=image, return_tensors="pt").to(torch.bfloat16).to(model.device)
19
- input_len = model_inputs["input_ids"].shape[-1]
 
 
 
 
 
 
 
20
 
21
- with torch.inference_mode():
22
- generation = model.generate(**model_inputs, max_new_tokens=100, do_sample=False)
23
- generation = generation[0][input_len:]
24
- decoded = processor.decode(generation, skip_special_tokens=True)
25
- print("="*20)
26
- print(decoded)
27
- print("=" * 20)
28
- print("FIM PROCESSO")
 
1
+ from transformers import pipeline
 
 
 
 
2
  import torch
3
 
4
+ pipe = pipeline(
5
+ "image-text-to-text",
6
+ model="google/gemma-3-4b-it",
7
+ device="cuda",
8
+ torch_dtype=torch.bfloat16
9
+ )
10
+ messages = [
11
+ {
12
+ "role": "system",
13
+ "content": [{"type": "text", "text": "You are a helpful assistant."}]
14
+ },
15
+ {
16
+ "role": "user",
17
+ "content": [
18
+ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
19
+ {"type": "text", "text": "What animal is on the candy?"}
20
+ ]
21
+ }
22
+ ]
23
 
24
+ output = pipe(text=messages, max_new_tokens=200)
25
+ print("=" * 20)
26
+ print(output[0]["generated_text"][-1]["content"])
27
+ print("=" * 20)
28
+ print("FIM PROCESSO")
29
+ # Okay, let's take a look!
30
+ # Based on the image, the animal on the candy is a **turtle**.
31
+ # You can see the shell shape and the head and legs.