Kuberwastaken commited on
Commit
c19f33e
·
1 Parent(s): 6149106

Tried to fix the script

Browse files
Files changed (1) hide show
  1. model/analyzer.py +149 -159
model/analyzer.py CHANGED
@@ -32,147 +32,158 @@ def analyze_script(script):
32
  device_map="auto" # Automatically map model to available device
33
  )
34
  print("Model loaded successfully")
 
35
  except Exception as e:
36
  print(f"An error occurred: {e}")
37
 
38
- # Define trigger categories with their descriptions
39
- trigger_categories = {
40
- "Violence": {
41
- "mapped_name": "Violence",
42
- "description": (
43
- "Any act involving physical force or aggression intended to cause harm, injury, or death to a person, animal, or object. "
44
- "Includes direct physical confrontations (e.g., fights, beatings, or assaults), implied violence (e.g., very graphical threats or descriptions of injuries), "
45
- "or large-scale events like wars, riots, or violent protests."
46
- )
47
- },
48
- "Death": {
49
- "mapped_name": "Death References",
50
- "description": (
51
- "Any mention, implication, or depiction of the loss of life, including direct deaths of characters, including mentions of deceased individuals, "
52
- "or abstract references to mortality (e.g., 'facing the end' or 'gone forever'). This also covers depictions of funerals, mourning, "
53
- "grieving, or any dialogue that centers around death, do not take metaphors into context that don't actually lead to death."
54
- )
55
- },
56
- "Substance Use": {
57
- "mapped_name": "Substance Use",
58
- "description": (
59
- "Any explicit or implied reference to the consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances. "
60
- "Includes scenes of drinking, smoking, or drug use, whether recreational or addictive. May also cover references to withdrawal symptoms, "
61
- "rehabilitation, or substance-related paraphernalia (e.g., needles, bottles, pipes)."
62
- )
63
- },
64
- "Gore": {
65
- "mapped_name": "Gore",
66
- "description": (
67
- "Extremely detailed and graphic depictions of highly severe physical injuries, mutilation, or extreme bodily harm, often accompanied by descriptions of heavy blood, exposed organs, "
68
- "or dismemberment. This includes war scenes with severe casualties, horror scenarios involving grotesque creatures, or medical procedures depicted with excessive detail."
69
- )
70
- },
71
- "Vomit": {
72
- "mapped_name": "Vomit",
73
- "description": (
74
- "Any reference to the act of vomiting, whether directly described, implied, or depicted in detail. This includes sounds or visual descriptions of the act, "
75
- "mentions of nausea leading to vomiting, or its aftermath (e.g., the presence of vomit, cleaning it up, or characters reacting to it)."
76
- )
77
- },
78
- "Sexual Content": {
79
- "mapped_name": "Sexual Content",
80
- "description": (
81
- "Any depiction or mention of sexual activity, intimacy, or sexual behavior, ranging from implied scenes to explicit descriptions. "
82
- "This includes romantic encounters, physical descriptions of characters in a sexual context, sexual dialogue, or references to sexual themes (e.g., harassment, innuendos)."
83
- )
84
- },
85
- "Sexual Abuse": {
86
- "mapped_name": "Sexual Abuse",
87
- "description": (
88
- "Any form of non-consensual sexual act, behavior, or interaction, involving coercion, manipulation, or physical force. "
89
- "This includes incidents of sexual assault, molestation, exploitation, harassment, and any acts where an individual is subjected to sexual acts against their will or without their consent. "
90
- "It also covers discussions or depictions of the aftermath of such abuse, such as trauma, emotional distress, legal proceedings, or therapy. "
91
- "References to inappropriate sexual advances, groping, or any other form of sexual misconduct are also included, as well as the psychological and emotional impact on survivors. "
92
- "Scenes where individuals are placed in sexually compromising situations, even if not directly acted upon, may also fall under this category."
93
- )
94
- },
95
- "Self-Harm": {
96
- "mapped_name": "Self-Harm",
97
- "description": (
98
- "Any mention or depiction of behaviors where an individual intentionally causes harm to themselves. This includes cutting, burning, or other forms of physical injury, "
99
- "as well as suicidal ideation, suicide attempts, or discussions of self-destructive thoughts and actions. References to scars, bruises, or other lasting signs of self-harm are also included."
100
- )
101
- },
102
- "Gun Use": {
103
- "mapped_name": "Gun Use",
104
- "description": (
105
- "Any explicit or implied mention of firearms being handled, fired, or used in a threatening manner. This includes scenes of gun violence, references to shootings, "
106
- "gun-related accidents, or the presence of firearms in a tense or dangerous context (e.g., holstered weapons during an argument)."
107
- )
108
- },
109
- "Animal Cruelty": {
110
- "mapped_name": "Animal Cruelty",
111
- "description": (
112
- "Any act of harm, abuse, or neglect toward animals, whether intentional or accidental. This includes physical abuse (e.g., hitting, injuring, or killing animals), "
113
- "mental or emotional mistreatment (e.g., starvation, isolation), and scenes where animals are subjected to pain or suffering for human entertainment or experimentation."
114
- )
115
- },
116
- "Mental Health Issues": {
117
- "mapped_name": "Mental Health Issues",
118
- "description": (
119
- "Any reference to mental health struggles, disorders, or psychological distress. This includes mentions of depression, anxiety, PTSD, bipolar disorder, schizophrenia, "
120
- "or other conditions. Scenes depicting therapy sessions, psychiatric treatment, or coping mechanisms (e.g., medication, journaling) are also included. May cover subtle hints "
121
- "like a character expressing feelings of worthlessness, hopelessness, or detachment from reality."
122
- )
123
- }
124
  }
125
-
126
- print("\nProcessing text...") # Output indicating the text is being processed
127
- chunk_size = 256 # Set the chunk size for text processing
128
- overlap = 15 # Overlap between chunks for context preservation
129
- script_chunks = [script[i:i + chunk_size] for i in range(0, len(script), chunk_size - overlap)]
130
-
131
- identified_triggers = {}
132
-
133
- for chunk_idx, chunk in enumerate(script_chunks, 1):
134
- print(f"\n--- Processing Chunk {chunk_idx}/{len(script_chunks)} ---")
135
- for category, info in trigger_categories.items():
136
- mapped_name = info["mapped_name"]
137
- description = info["description"]
138
-
139
- print(f"\nAnalyzing for {mapped_name}...")
140
- prompt = f"""
141
- Check this text for any indication of {mapped_name} ({description}).
142
- Be sensitive to subtle references or implications, make sure the text is not metaphorical.
143
- Respond concisely with: YES, NO, or MAYBE.
144
- Text: {chunk}
145
- Answer:
146
- """
147
-
148
- print(f"Sending prompt to model...") # Indicate that prompt is being sent to the model
149
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) # Tokenize the prompt
150
- inputs = {k: v.to(device) for k, v in inputs.items()} # Send inputs to the chosen device
151
-
152
- with torch.no_grad(): # Disable gradient calculation for inference
153
- print("Generating response...") # Indicate that the model is generating a response
154
- outputs = model.generate(
155
- **inputs,
156
- max_new_tokens=3, # Limit response length
157
- do_sample=True, # Enable sampling for more diverse output
158
- temperature=0.5, # Control randomness of the output
159
- top_p=0.9, # Use nucleus sampling
160
- pad_token_id=tokenizer.eos_token_id # Pad token ID
161
- )
162
-
163
- response_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip().upper() # Decode and format the response
164
- first_word = response_text.split("\n")[-1].split()[0] if response_text else "NO" # Get the first word of the response
165
- print(f"Model response for {mapped_name}: {first_word}")
166
-
167
- # Update identified triggers based on model response
168
- if first_word == "YES":
169
- print(f"Detected {mapped_name} in this chunk!") # Trigger detected
170
- identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 1
171
- elif first_word == "MAYBE":
172
- print(f"Possible {mapped_name} detected, marking for further review.") # Possible trigger detected
173
- identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 0.5
174
- else:
175
- print(f"No {mapped_name} detected in this chunk.") # No trigger detected
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  print("\n=== Analysis Complete ===") # Indicate that analysis is complete
178
  print("Final Results:")
@@ -189,30 +200,9 @@ def analyze_script(script):
189
 
190
  return final_triggers
191
 
192
- def analyze_content(script):
193
- triggers = analyze_script(script)
194
-
195
- if isinstance(triggers, list) and triggers != ["None"]:
196
- result = {
197
- "detected_triggers": triggers,
198
- "confidence": "High - Content detected",
199
- "model": "Llama-3.2-1B",
200
- "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
201
- }
202
- else:
203
- result = {
204
- "detected_triggers": ["None"],
205
- "confidence": "High - No concerning content detected",
206
- "model": "Llama-3.2-1B",
207
- "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
208
- }
209
-
210
- print("\nFinal Result Dictionary:", result)
211
- return result
212
-
213
  # Define the Gradio interface
214
  import gradio as gr
215
- from model import analyze_script # Import the function to analyze the content
216
 
217
  def analyze_content(script):
218
  # Perform the analysis on the input script using the analyze_script function
 
32
  device_map="auto" # Automatically map model to available device
33
  )
34
  print("Model loaded successfully")
35
+
36
  except Exception as e:
37
  print(f"An error occurred: {e}")
38
 
39
+ # Define trigger categories with their descriptions
40
+ trigger_categories = {
41
+ "Violence": {
42
+ "mapped_name": "Violence",
43
+ "description": (
44
+ "Any act involving physical force or aggression intended to cause harm, injury, or death to a person, animal, or object. "
45
+ "Includes direct physical confrontations (e.g., fights, beatings, or assaults), implied violence (e.g., very graphical threats or descriptions of injuries), "
46
+ "or large-scale events like wars, riots, or violent protests."
47
+ )
48
+ },
49
+ "Death": {
50
+ "mapped_name": "Death References",
51
+ "description": (
52
+ "Any mention, implication, or depiction of the loss of life, including direct deaths of characters, including mentions of deceased individuals, "
53
+ "or abstract references to mortality (e.g., 'facing the end' or 'gone forever'). This also covers depictions of funerals, mourning, "
54
+ "grieving, or any dialogue that centers around death, do not take metaphors into context that don't actually lead to death."
55
+ )
56
+ },
57
+ "Substance Use": {
58
+ "mapped_name": "Substance Use",
59
+ "description": (
60
+ "Any explicit or implied reference to the consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances. "
61
+ "Includes scenes of drinking, smoking, or drug use, whether recreational or addictive. May also cover references to withdrawal symptoms, "
62
+ "rehabilitation, or substance-related paraphernalia (e.g., needles, bottles, pipes)."
63
+ )
64
+ },
65
+ "Gore": {
66
+ "mapped_name": "Gore",
67
+ "description": (
68
+ "Extremely detailed and graphic depictions of highly severe physical injuries, mutilation, or extreme bodily harm, often accompanied by descriptions of heavy blood, exposed organs, "
69
+ "or dismemberment. This includes war scenes with severe casualties, horror scenarios involving grotesque creatures, or medical procedures depicted with excessive detail."
70
+ )
71
+ },
72
+ "Vomit": {
73
+ "mapped_name": "Vomit",
74
+ "description": (
75
+ "Any reference to the act of vomiting, whether directly described, implied, or depicted in detail. This includes sounds or visual descriptions of the act, "
76
+ "mentions of nausea leading to vomiting, or its aftermath (e.g., the presence of vomit, cleaning it up, or characters reacting to it)."
77
+ )
78
+ },
79
+ "Sexual Content": {
80
+ "mapped_name": "Sexual Content",
81
+ "description": (
82
+ "Any depiction or mention of sexual activity, intimacy, or sexual behavior, ranging from implied scenes to explicit descriptions. "
83
+ "This includes romantic encounters, physical descriptions of characters in a sexual context, sexual dialogue, or references to sexual themes (e.g., harassment, innuendos)."
84
+ )
85
+ },
86
+ "Sexual Abuse": {
87
+ "mapped_name": "Sexual Abuse",
88
+ "description": (
89
+ "Any form of non-consensual sexual act, behavior, or interaction, involving coercion, manipulation, or physical force. "
90
+ "This includes incidents of sexual assault, molestation, exploitation, harassment, and any acts where an individual is subjected to sexual acts against their will or without their consent. "
91
+ "It also covers discussions or depictions of the aftermath of such abuse, such as trauma, emotional distress, legal proceedings, or therapy. "
92
+ "References to inappropriate sexual advances, groping, or any other form of sexual misconduct are also included, as well as the psychological and emotional impact on survivors. "
93
+ "Scenes where individuals are placed in sexually compromising situations, even if not directly acted upon, may also fall under this category."
94
+ )
95
+ },
96
+ "Self-Harm": {
97
+ "mapped_name": "Self-Harm",
98
+ "description": (
99
+ "Any mention or depiction of behaviors where an individual intentionally causes harm to themselves. This includes cutting, burning, or other forms of physical injury, "
100
+ "as well as suicidal ideation, suicide attempts, or discussions of self-destructive thoughts and actions. References to scars, bruises, or other lasting signs of self-harm are also included."
101
+ )
102
+ },
103
+ "Gun Use": {
104
+ "mapped_name": "Gun Use",
105
+ "description": (
106
+ "Any explicit or implied mention of firearms being handled, fired, or used in a threatening manner. This includes scenes of gun violence, references to shootings, "
107
+ "gun-related accidents, or the presence of firearms in a tense or dangerous context (e.g., holstered weapons during an argument)."
108
+ )
109
+ },
110
+ "Animal Cruelty": {
111
+ "mapped_name": "Animal Cruelty",
112
+ "description": (
113
+ "Any act of harm, abuse, or neglect toward animals, whether intentional or accidental. This includes physical abuse (e.g., hitting, injuring, or killing animals), "
114
+ "mental or emotional mistreatment (e.g., starvation, isolation), and scenes where animals are subjected to pain or suffering for human entertainment or experimentation."
115
+ )
116
+ },
117
+ "Mental Health Issues": {
118
+ "mapped_name": "Mental Health Issues",
119
+ "description": (
120
+ "Any reference to mental health struggles, disorders, or psychological distress. This includes mentions of depression, anxiety, PTSD, bipolar disorder, schizophrenia, "
121
+ "or other conditions. Scenes depicting therapy sessions, psychiatric treatment, or coping mechanisms (e.g., medication, journaling) are also included. May cover subtle hints "
122
+ "like a character expressing feelings of worthlessness, hopelessness, or detachment from reality."
123
+ )
 
124
  }
125
+ }
126
+
127
+ print("\nProcessing text...") # Output indicating the text is being processed
128
+ chunk_size = 256 # Set the chunk size for text processing
129
+ overlap = 15 # Overlap between chunks for context preservation
130
+ script_chunks = [script[i:i + chunk_size] for i in range(0, len(script), chunk_size - overlap)]
131
+
132
+ identified_triggers = {}
133
+
134
+ for chunk_idx, chunk in enumerate(script_chunks, 1):
135
+ print(f"\n--- Processing Chunk {chunk_idx}/{len(script_chunks)} ---")
136
+ for category, info in trigger_categories.items():
137
+ mapped_name = info["mapped_name"]
138
+ description = info["description"]
139
+
140
+ print(f"\nAnalyzing for {mapped_name}...")
141
+ prompt = f"""
142
+ Check this text for any indication of {mapped_name} ({description}).
143
+ Be sensitive to subtle references or implications, make sure the text is not metaphorical.
144
+ Respond concisely with: YES, NO, or MAYBE.
145
+ Text: {chunk}
146
+ Answer:
147
+ """
148
+
149
+ print(f"Sending prompt to model...") # Indicate that prompt is being sent to the model
150
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) # Tokenize the prompt
151
+ inputs = {k: v.to(device) for k, v in inputs.items()} # Send inputs to the chosen device
152
+
153
+ with torch.no_grad(): # Disable gradient calculation for inference
154
+ print("Generating response...") # Indicate that the model is generating a response
155
+ outputs = model.generate(
156
+ **inputs,
157
+ max_new_tokens=3, # Limit response length
158
+ do_sample=True, # Enable sampling for more diverse output
159
+ temperature=0.5, # Control randomness of the output
160
+ top_p=0.9, # Use nucleus sampling
161
+ pad_token_id=tokenizer.eos_token_id # Pad token ID
162
+ )
163
+
164
+ response_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip().upper() # Decode and format the response
165
+ first_word = response_text.split("\n")[-1].split()[0] if response_text else "NO" # Get the first word of the response
166
+ print(f"Model response for {mapped_name}: {first_word}")
167
+
168
+ # Update identified triggers based on model response
169
+ if first_word == "YES":
170
+ print(f"Detected {mapped_name} in this chunk!") # Trigger detected
171
+ identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 1
172
+ elif first_word == "MAYBE":
173
+ print(f"Possible {mapped_name} detected, marking for further review.") # Possible trigger detected
174
+ identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 0.5
175
+ else:
176
+ print(f"No {mapped_name} detected in this chunk.") # No trigger detected
177
+
178
+ print("\n=== Analysis Complete ===") # Indicate that analysis is complete
179
+ print("Final Results:")
180
+ final_triggers = [] # List to store final triggers
181
+
182
+ # Filter and output the final trigger results
183
+ for mapped_name, count in identified_triggers.items():
184
+ if count > 0.5:
185
+ final_triggers.append(mapped_name)
186
+ print(f"- {mapped_name}: found in {count} chunks")
187
 
188
  print("\n=== Analysis Complete ===") # Indicate that analysis is complete
189
  print("Final Results:")
 
200
 
201
  return final_triggers
202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  # Define the Gradio interface
204
  import gradio as gr
205
+ from datetime import datetime
206
 
207
  def analyze_content(script):
208
  # Perform the analysis on the input script using the analyze_script function