Caikejs commited on
Commit
07fea57
verified
1 Parent(s): 9eb9488

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -58
app.py CHANGED
@@ -1,75 +1,58 @@
1
  import torch
2
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
3
 
4
- # Configura莽玫es de mem贸ria
5
  DEVICE = 0 if torch.cuda.is_available() else -1
6
  TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
7
 
8
- # Modelos otimizados para 16GB
9
- MODEL_CONFIG = {
10
- "Modelo A": {
11
- "name": "pierreguillain/gpt2-small-portuguese",
12
- "max_tokens": 150
13
- },
14
- "Modelo B": {
15
- "name": "pierreguillain/gpt-neo-125m-portuguese",
16
- "max_tokens": 150
17
- }
18
- }
19
 
20
- # Carrega apenas um modelo por vez
21
- def load_model(model_name):
22
- config = MODEL_CONFIG[model_name]
23
- model = AutoModelForCausalLM.from_pretrained(
24
- config["name"],
25
- torch_dtype=TORCH_DTYPE,
26
- low_cpu_mem_usage=True # Reduz consumo de mem贸ria
27
- )
28
- tokenizer = AutoTokenizer.from_pretrained(config["name"])
29
- pipe = pipeline(
30
- "text-generation",
31
- model=model,
32
- tokenizer=tokenizer,
33
- device=DEVICE,
34
- return_full_text=False,
35
- pad_token_id=tokenizer.eos_token_id
36
- )
37
- return pipe, config["max_tokens"]
38
 
39
- # Libera mem贸ria explicitamente
40
- def unload_model(pipe):
41
- del pipe
42
- torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # Interface de prompt
45
  def format_prompt(user_input):
46
- return f"Responda de forma clara e concisa: {user_input.strip()}"
47
 
48
  if __name__ == "__main__":
49
- print("Sistema otimizado para 16GB de RAM\n")
50
-
51
  while True:
52
  prompt = input("\nDigite uma pergunta (ou 'sair'): ").strip()
53
  if prompt.lower() == "sair":
54
  break
55
 
56
- # Processa um modelo por vez
57
- for model_name in MODEL_CONFIG:
58
- try:
59
- print(f"\n=== Carregando {model_name} ===")
60
- pipe, max_tokens = load_model(model_name)
61
-
62
- print(f"\n=== Resposta do {model_name} ===")
63
- response = pipe(
64
- format_prompt(prompt),
65
- max_new_tokens=max_tokens,
66
- temperature=0.7,
67
- top_p=0.9
68
- )[0]['generated_text'].strip()
69
-
70
- print(response)
71
- unload_model(pipe)
72
-
73
- except Exception as e:
74
- print(f"Erro no {model_name}: {str(e)}")
75
- unload_model(pipe)
 
1
  import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
 
4
+ # Detecta o dispositivo
5
  DEVICE = 0 if torch.cuda.is_available() else -1
6
  TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
7
 
8
+ # Modelo A: Falcon 7B Instruct (sem autentica莽茫o)
9
+ model_a = AutoModelForCausalLM.from_pretrained(
10
+ "tiiuae/falcon-7b-instruct",
11
+ torch_dtype=TORCH_DTYPE
12
+ )
13
+ tokenizer_a = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
 
 
 
 
 
14
 
15
+ pipe_a = pipeline(
16
+ "text-generation",
17
+ model=model_a,
18
+ tokenizer=tokenizer_a,
19
+ device=DEVICE,
20
+ return_full_text=False,
21
+ pad_token_id=tokenizer_a.eos_token_id
22
+ )
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Modelo B: OpenAssistant Pythia 12B (sem autentica莽茫o)
25
+ model_b = AutoModelForCausalLM.from_pretrained(
26
+ "OpenAssistant/oasst-sft-1-pythia-12b",
27
+ torch_dtype=TORCH_DTYPE
28
+ )
29
+ tokenizer_b = AutoTokenizer.from_pretrained("OpenAssistant/oasst-sft-1-pythia-12b")
30
+
31
+ pipe_b = pipeline(
32
+ "text-generation",
33
+ model=model_b,
34
+ tokenizer=tokenizer_b,
35
+ device=DEVICE,
36
+ return_full_text=False,
37
+ pad_token_id=tokenizer_b.eos_token_id
38
+ )
39
 
40
  # Interface de prompt
41
  def format_prompt(user_input):
42
+ return f"Responda em portugu锚s: {user_input.strip()}"
43
 
44
  if __name__ == "__main__":
 
 
45
  while True:
46
  prompt = input("\nDigite uma pergunta (ou 'sair'): ").strip()
47
  if prompt.lower() == "sair":
48
  break
49
 
50
+ print("\n=== Resposta do Falcon ===")
51
+ with torch.no_grad():
52
+ falcon_response = pipe_a(format_prompt(prompt), max_new_tokens=100)[0]['generated_text']
53
+ print(falcon_response)
54
+
55
+ print("\n=== Resposta do OpenAssistant ===")
56
+ with torch.no_grad():
57
+ oa_response = pipe_b(format_prompt(prompt), max_new_tokens=100)[0]['generated_text']
58
+ print(oa_response)