Spaces:
Running
Running
Commit
·
e15502b
1
Parent(s):
e4c9f8c
Fixed Analyse Script
Browse files- model/analyzer.py +56 -11
model/analyzer.py
CHANGED
@@ -26,7 +26,7 @@ class ContentAnalyzer:
|
|
26 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
27 |
self.model_name,
|
28 |
use_fast=True,
|
29 |
-
token=os.environ.get("HF_TOKEN")
|
30 |
)
|
31 |
|
32 |
print(f"Loading model on {self.device}...")
|
@@ -35,14 +35,13 @@ class ContentAnalyzer:
|
|
35 |
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
36 |
low_cpu_mem_usage=True,
|
37 |
device_map="auto",
|
38 |
-
token=os.environ.get("HF_TOKEN")
|
39 |
)
|
40 |
return True
|
41 |
except Exception as e:
|
42 |
print(f"Model loading error: {str(e)}")
|
43 |
return False
|
44 |
|
45 |
-
# Rest of your code remains exactly the same
|
46 |
def cleanup(self):
|
47 |
if self.device == "cuda":
|
48 |
torch.cuda.empty_cache()
|
@@ -52,6 +51,7 @@ class ContentAnalyzer:
|
|
52 |
mapped_name = category_info["mapped_name"]
|
53 |
description = category_info["description"]
|
54 |
|
|
|
55 |
prompt = f"""Check this text for any indication of {mapped_name} ({description}).
|
56 |
Be sensitive to subtle references or implications, make sure the text is not metaphorical.
|
57 |
Respond concisely with: YES, NO, or MAYBE.
|
@@ -59,10 +59,12 @@ class ContentAnalyzer:
|
|
59 |
Answer:"""
|
60 |
|
61 |
try:
|
|
|
62 |
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
|
63 |
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
64 |
|
65 |
with torch.no_grad():
|
|
|
66 |
outputs = self.model.generate(
|
67 |
**inputs,
|
68 |
max_new_tokens=10,
|
@@ -75,6 +77,14 @@ class ContentAnalyzer:
|
|
75 |
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True).strip().upper()
|
76 |
first_word = response.split("\n")[-1].split()[0] if response else "NO"
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
score = 1 if first_word == "YES" else 0.5 if first_word == "MAYBE" else 0
|
79 |
return score, first_word
|
80 |
|
@@ -83,6 +93,9 @@ class ContentAnalyzer:
|
|
83 |
return 0, "NO"
|
84 |
|
85 |
def analyze_text(self, text):
|
|
|
|
|
|
|
86 |
if not self.load_model():
|
87 |
return {
|
88 |
"detected_triggers": {"0": "Error"},
|
@@ -95,14 +108,44 @@ class ContentAnalyzer:
|
|
95 |
overlap = 15
|
96 |
script_chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size - overlap)]
|
97 |
|
|
|
98 |
trigger_categories = {
|
99 |
-
"Violence": {
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
"
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
}
|
107 |
|
108 |
identified_triggers = {}
|
@@ -119,6 +162,7 @@ class ContentAnalyzer:
|
|
119 |
final_triggers = [category for category, count in identified_triggers.items() if count > 0.5]
|
120 |
self.cleanup()
|
121 |
|
|
|
122 |
if not final_triggers:
|
123 |
result = {
|
124 |
"detected_triggers": {"0": "None"},
|
@@ -134,7 +178,8 @@ class ContentAnalyzer:
|
|
134 |
"model": self.model_name,
|
135 |
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
136 |
}
|
137 |
-
|
|
|
138 |
return result
|
139 |
|
140 |
def analyze_content(text):
|
|
|
26 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
27 |
self.model_name,
|
28 |
use_fast=True,
|
29 |
+
token=os.environ.get("HF_TOKEN")
|
30 |
)
|
31 |
|
32 |
print(f"Loading model on {self.device}...")
|
|
|
35 |
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
36 |
low_cpu_mem_usage=True,
|
37 |
device_map="auto",
|
38 |
+
token=os.environ.get("HF_TOKEN")
|
39 |
)
|
40 |
return True
|
41 |
except Exception as e:
|
42 |
print(f"Model loading error: {str(e)}")
|
43 |
return False
|
44 |
|
|
|
45 |
def cleanup(self):
|
46 |
if self.device == "cuda":
|
47 |
torch.cuda.empty_cache()
|
|
|
51 |
mapped_name = category_info["mapped_name"]
|
52 |
description = category_info["description"]
|
53 |
|
54 |
+
print(f"\nAnalyzing for {mapped_name}...")
|
55 |
prompt = f"""Check this text for any indication of {mapped_name} ({description}).
|
56 |
Be sensitive to subtle references or implications, make sure the text is not metaphorical.
|
57 |
Respond concisely with: YES, NO, or MAYBE.
|
|
|
59 |
Answer:"""
|
60 |
|
61 |
try:
|
62 |
+
print(f"Sending prompt to model...")
|
63 |
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
|
64 |
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
65 |
|
66 |
with torch.no_grad():
|
67 |
+
print("Generating response...")
|
68 |
outputs = self.model.generate(
|
69 |
**inputs,
|
70 |
max_new_tokens=10,
|
|
|
77 |
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True).strip().upper()
|
78 |
first_word = response.split("\n")[-1].split()[0] if response else "NO"
|
79 |
|
80 |
+
print(f"Model response for {mapped_name}: {first_word}")
|
81 |
+
if first_word == "YES":
|
82 |
+
print(f"Detected {mapped_name} in this chunk!")
|
83 |
+
elif first_word == "MAYBE":
|
84 |
+
print(f"Possible {mapped_name} detected, marking for review.")
|
85 |
+
else:
|
86 |
+
print(f"No {mapped_name} detected in this chunk.")
|
87 |
+
|
88 |
score = 1 if first_word == "YES" else 0.5 if first_word == "MAYBE" else 0
|
89 |
return score, first_word
|
90 |
|
|
|
93 |
return 0, "NO"
|
94 |
|
95 |
def analyze_text(self, text):
|
96 |
+
print("\n=== Starting Analysis ===")
|
97 |
+
print(f"Time: {datetime.now()}")
|
98 |
+
|
99 |
if not self.load_model():
|
100 |
return {
|
101 |
"detected_triggers": {"0": "Error"},
|
|
|
108 |
overlap = 15
|
109 |
script_chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size - overlap)]
|
110 |
|
111 |
+
# Using the more detailed trigger categories
|
112 |
trigger_categories = {
|
113 |
+
"Violence": {
|
114 |
+
"mapped_name": "Violence",
|
115 |
+
"description": "Any act involving physical force or aggression intended to cause harm, injury, or death to a person, animal, or object. Includes direct physical confrontations, implied violence, or large-scale events like wars, riots, or violent protests."
|
116 |
+
},
|
117 |
+
"Death": {
|
118 |
+
"mapped_name": "Death References",
|
119 |
+
"description": "Any mention, implication, or depiction of the loss of life, including direct deaths of characters, mentions of deceased individuals, or abstract references to mortality."
|
120 |
+
},
|
121 |
+
"Substance_Use": {
|
122 |
+
"mapped_name": "Substance Use",
|
123 |
+
"description": "Any explicit or implied reference to the consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances."
|
124 |
+
},
|
125 |
+
"Gore": {
|
126 |
+
"mapped_name": "Gore",
|
127 |
+
"description": "Extremely detailed and graphic depictions of highly severe physical injuries, mutilation, or extreme bodily harm."
|
128 |
+
},
|
129 |
+
"Sexual_Content": {
|
130 |
+
"mapped_name": "Sexual Content",
|
131 |
+
"description": "Any depiction or mention of sexual activity, intimacy, or sexual behavior."
|
132 |
+
},
|
133 |
+
"Self_Harm": {
|
134 |
+
"mapped_name": "Self-Harm",
|
135 |
+
"description": "Any mention or depiction of behaviors where an individual intentionally causes harm to themselves."
|
136 |
+
},
|
137 |
+
"Gun_Use": {
|
138 |
+
"mapped_name": "Gun Use",
|
139 |
+
"description": "Any explicit or implied mention of firearms being handled, fired, or used in a threatening manner."
|
140 |
+
},
|
141 |
+
"Animal_Cruelty": {
|
142 |
+
"mapped_name": "Animal Cruelty",
|
143 |
+
"description": "Any act of harm, abuse, or neglect toward animals, whether intentional or accidental."
|
144 |
+
},
|
145 |
+
"Mental_Health": {
|
146 |
+
"mapped_name": "Mental Health Issues",
|
147 |
+
"description": "Any reference to mental health struggles, disorders, or psychological distress."
|
148 |
+
}
|
149 |
}
|
150 |
|
151 |
identified_triggers = {}
|
|
|
162 |
final_triggers = [category for category, count in identified_triggers.items() if count > 0.5]
|
163 |
self.cleanup()
|
164 |
|
165 |
+
print("\n=== Analysis Complete ===")
|
166 |
if not final_triggers:
|
167 |
result = {
|
168 |
"detected_triggers": {"0": "None"},
|
|
|
178 |
"model": self.model_name,
|
179 |
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
180 |
}
|
181 |
+
|
182 |
+
print("\nFinal Result:", result)
|
183 |
return result
|
184 |
|
185 |
def analyze_content(text):
|