mihalykiss commited on
Commit
724a60b
·
verified ·
1 Parent(s): e99c594

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -20
app.py CHANGED
@@ -1,30 +1,19 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
  import re
5
  from tokenizers import normalizers
6
  from tokenizers.normalizers import Sequence, Replace, Strip
7
  from tokenizers import Regex
8
 
9
- model1_path = "modernbert.bin"
10
- model2_path = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12"
11
- model3_path = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed22"
12
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13
 
14
- tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
15
 
16
- model_1 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
17
- model_1.load_state_dict(torch.load(model1_path, map_location=device))
18
- model_1.to(device).eval()
19
 
20
- model_2 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
21
- model_2.load_state_dict(torch.hub.load_state_dict_from_url(model2_path, map_location=device))
22
  model_2.to(device).eval()
23
 
24
- model_3 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41)
25
- model_3.load_state_dict(torch.hub.load_state_dict_from_url(model3_path, map_location=device))
26
- model_3.to(device).eval()
27
-
28
 
29
  label_mapping = {
30
  0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
@@ -66,15 +55,12 @@ def classify_text(text):
66
  inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, padding=True).to(device)
67
 
68
  with torch.no_grad():
69
- logits_1 = model_1(**inputs).logits
70
  logits_2 = model_2(**inputs).logits
71
- logits_3 = model_3(**inputs).logits
72
 
73
- softmax_1 = torch.softmax(logits_1, dim=1)
74
  softmax_2 = torch.softmax(logits_2, dim=1)
75
- softmax_3 = torch.softmax(logits_3, dim=1)
76
 
77
- averaged_probabilities = (softmax_1 + softmax_2 + softmax_3) / 3
78
  probabilities = averaged_probabilities[0]
79
 
80
  ai_probs = probabilities.clone()
@@ -92,7 +78,7 @@ def classify_text(text):
92
  else:
93
  result_message = (
94
  f"**The text is** <span class='highlight-ai'>**{ai_total_prob:.2f}%** likely <b>AI generated</b>.</span>\n\n"
95
- f"**Identified AI Model: {ai_argmax_model}**"
96
  )
97
 
98
  return result_message
 
1
  import gradio as gr
2
+ from transformers import DebertaTokenizer, DebertaForSequenceClassification, get_linear_schedule_with_warmup
3
  import torch
4
  import re
5
  from tokenizers import normalizers
6
  from tokenizers.normalizers import Sequence, Replace, Strip
7
  from tokenizers import Regex
8
 
 
 
 
9
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10
 
11
+ tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base')
12
 
 
 
 
13
 
14
+ model_2 = DebertaForSequenceClassification.from_pretrained("mihalykiss/best_merged_41_2", num_labels=41)
 
15
  model_2.to(device).eval()
16
 
 
 
 
 
17
 
18
  label_mapping = {
19
  0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
 
55
  inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, padding=True).to(device)
56
 
57
  with torch.no_grad():
58
+
59
  logits_2 = model_2(**inputs).logits
 
60
 
 
61
  softmax_2 = torch.softmax(logits_2, dim=1)
 
62
 
63
+ averaged_probabilities = softmax_2
64
  probabilities = averaged_probabilities[0]
65
 
66
  ai_probs = probabilities.clone()
 
78
  else:
79
  result_message = (
80
  f"**The text is** <span class='highlight-ai'>**{ai_total_prob:.2f}%** likely <b>AI generated</b>.</span>\n\n"
81
+ f"**Identified LLM: {ai_argmax_model}**"
82
  )
83
 
84
  return result_message