Caikejs commited on
Commit
9eb9488
verified
1 Parent(s): c595ab5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -41
app.py CHANGED
@@ -1,58 +1,75 @@
1
  import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
 
4
- # Detecta o dispositivo
5
  DEVICE = 0 if torch.cuda.is_available() else -1
6
  TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
7
 
8
- # Modelo A: Falcon 7B Instruct (sem autentica莽茫o)
9
- model_a = AutoModelForCausalLM.from_pretrained(
10
- "tiiuae/falcon-7b-instruct",
11
- torch_dtype=TORCH_DTYPE
12
- )
13
- tokenizer_a = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
 
 
 
 
 
14
 
15
- pipe_a = pipeline(
16
- "text-generation",
17
- model=model_a,
18
- tokenizer=tokenizer_a,
19
- device=DEVICE,
20
- return_full_text=False,
21
- pad_token_id=tokenizer_a.eos_token_id
22
- )
 
 
 
 
 
 
 
 
 
 
23
 
24
- # Modelo B: OpenAssistant Pythia 12B (sem autentica莽茫o)
25
- model_b = AutoModelForCausalLM.from_pretrained(
26
- "OpenAssistant/oasst-sft-1-pythia-12b",
27
- torch_dtype=TORCH_DTYPE
28
- )
29
- tokenizer_b = AutoTokenizer.from_pretrained("OpenAssistant/oasst-sft-1-pythia-12b")
30
-
31
- pipe_b = pipeline(
32
- "text-generation",
33
- model=model_b,
34
- tokenizer=tokenizer_b,
35
- device=DEVICE,
36
- return_full_text=False,
37
- pad_token_id=tokenizer_b.eos_token_id
38
- )
39
 
40
  # Interface de prompt
41
  def format_prompt(user_input):
42
- return f"Responda em portugu锚s: {user_input.strip()}"
43
 
44
  if __name__ == "__main__":
 
 
45
  while True:
46
  prompt = input("\nDigite uma pergunta (ou 'sair'): ").strip()
47
  if prompt.lower() == "sair":
48
  break
49
 
50
- print("\n=== Resposta do Falcon ===")
51
- with torch.no_grad():
52
- falcon_response = pipe_a(format_prompt(prompt), max_new_tokens=100)[0]['generated_text']
53
- print(falcon_response)
54
-
55
- print("\n=== Resposta do OpenAssistant ===")
56
- with torch.no_grad():
57
- oa_response = pipe_b(format_prompt(prompt), max_new_tokens=100)[0]['generated_text']
58
- print(oa_response)
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
3
 
4
+ # Configura莽玫es de mem贸ria
5
  DEVICE = 0 if torch.cuda.is_available() else -1
6
  TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
7
 
8
+ # Modelos otimizados para 16GB
9
+ MODEL_CONFIG = {
10
+ "Modelo A": {
11
+ "name": "pierreguillain/gpt2-small-portuguese",
12
+ "max_tokens": 150
13
+ },
14
+ "Modelo B": {
15
+ "name": "pierreguillain/gpt-neo-125m-portuguese",
16
+ "max_tokens": 150
17
+ }
18
+ }
19
 
20
+ # Carrega apenas um modelo por vez
21
+ def load_model(model_name):
22
+ config = MODEL_CONFIG[model_name]
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ config["name"],
25
+ torch_dtype=TORCH_DTYPE,
26
+ low_cpu_mem_usage=True # Reduz consumo de mem贸ria
27
+ )
28
+ tokenizer = AutoTokenizer.from_pretrained(config["name"])
29
+ pipe = pipeline(
30
+ "text-generation",
31
+ model=model,
32
+ tokenizer=tokenizer,
33
+ device=DEVICE,
34
+ return_full_text=False,
35
+ pad_token_id=tokenizer.eos_token_id
36
+ )
37
+ return pipe, config["max_tokens"]
38
 
39
+ # Libera mem贸ria explicitamente
40
+ def unload_model(pipe):
41
+ del pipe
42
+ torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # Interface de prompt
45
  def format_prompt(user_input):
46
+ return f"Responda de forma clara e concisa: {user_input.strip()}"
47
 
48
  if __name__ == "__main__":
49
+ print("Sistema otimizado para 16GB de RAM\n")
50
+
51
  while True:
52
  prompt = input("\nDigite uma pergunta (ou 'sair'): ").strip()
53
  if prompt.lower() == "sair":
54
  break
55
 
56
+ # Processa um modelo por vez
57
+ for model_name in MODEL_CONFIG:
58
+ try:
59
+ print(f"\n=== Carregando {model_name} ===")
60
+ pipe, max_tokens = load_model(model_name)
61
+
62
+ print(f"\n=== Resposta do {model_name} ===")
63
+ response = pipe(
64
+ format_prompt(prompt),
65
+ max_new_tokens=max_tokens,
66
+ temperature=0.7,
67
+ top_p=0.9
68
+ )[0]['generated_text'].strip()
69
+
70
+ print(response)
71
+ unload_model(pipe)
72
+
73
+ except Exception as e:
74
+ print(f"Erro no {model_name}: {str(e)}")
75
+ unload_model(pipe)