File size: 11,093 Bytes
0b4dd15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20d720d
c3ffbb9
20d720d
 
c3ffbb9
b28635c
c537b15
 
961a175
c3ffbb9
d39c478
c3ffbb9
 
 
 
 
 
 
 
961a175
 
c3ffbb9
20d720d
 
 
 
0b4dd15
 
 
 
 
 
c3ffbb9
 
0b4dd15
20d720d
 
 
0b4dd15
 
 
 
 
 
 
 
c3ffbb9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# import re
# from typing import List, Optional, Dict, Any
# from transformers import pipeline
# import torch
# from pydantic import PrivateAttr
# from crewai.tools import BaseTool

# # --- BaseTool for Response Validation ---
# class ValidateResponseTool(BaseTool):
#     name: str = "validate_response"
#     description: str = "Validates safety and helpfulness of an AI response"
#     model_config = {"arbitrary_types_allowed": True}

#     _prohibited_patterns: dict = PrivateAttr()
#     _supportive_elements: dict = PrivateAttr()
#     _crisis_indicators: List[str] = PrivateAttr()
#     _negative_tone_words: List[str] = PrivateAttr()
#     _dismissive_phrases: List[str] = PrivateAttr()
#     _sentiment_analyzer: object = PrivateAttr()

#     def __init__(self, config=None, **data):
#         super().__init__(**data)
#         # === Paste your initialization data here as PrivateAttr ===
#         self._sentiment_analyzer = pipeline(
#             "sentiment-analysis",
#             model="nlptown/bert-base-multilingual-uncased-sentiment",
#             device=0 if torch.cuda.is_available() else -1
#         )
#         self._prohibited_patterns = {
#             'medical': [
#                 r'\b(?:diagnos|prescrib|medicat|cure|treat|therap)\w*\b',
#                 r'\b(?:disease|illness|disorder|syndrome)\s+(?:is|are|can be)\b',
#                 r'\b(?:take|consume|dose|dosage)\s+\d+\s*(?:mg|ml|pill|tablet)',
#                 r'\b(?:medical|clinical|physician|doctor)\s+(?:advice|consultation|opinion)',
#             ],
#             'legal': [
#                 r'\b(?:legal advice|lawsuit|sue|court|litigation)\b',
#                 r'\b(?:illegal|unlawful|crime|criminal|prosecut)\w*\b',
#                 r'\b(?:you should|must|have to)\s+(?:sign|agree|consent|contract)',
#                 r'\b(?:rights|obligations|liability|damages)\s+(?:are|include)\b',
#             ],
#             'financial': [
#                 r'\b(?:invest|buy|sell|trade)\s+(?:stock|crypto|bitcoin|forex)\b',
#                 r'\b(?:guaranteed|promise)\s+(?:return|profit|income|earnings)\b',
#                 r'\b(?:financial advisor|investment advice|trading strategy)\b',
#                 r'\b(?:tax|accounting|financial planning)\s+(?:advice|consultation)',
#             ],
#             'harmful': [
#                 r'\b(?:suicide|suicidal|kill\s+(?:your|my)self|end\s+(?:it|life))\b',
#                 r'\b(?:self[\-\s]?harm|hurt\s+(?:your|my)self|cutting)\b',
#                 r'\b(?:violence|violent|weapon|attack|assault)\b',
#                 r'\b(?:hate|discriminat|racist|sexist|homophobic)\b',
#             ],
#             'absolute': [
#                 r'\b(?:always|never|every|all|none|no one|everyone)\s+(?:will|must|should|is|are)\b',
#                 r'\b(?:definitely|certainly|guaranteed|assured|promise)\b',
#                 r'\b(?:only way|only solution|must do|have to)\b',
#             ]
#         }
#         self._supportive_elements = {
#             'empathy': [
#                 'understand', 'hear', 'feel', 'acknowledge', 'recognize',
#                 'appreciate', 'empathize', 'relate', 'comprehend'
#             ],
#             'validation': [
#                 'valid', 'normal', 'understandable', 'natural', 'okay',
#                 'reasonable', 'makes sense', 'legitimate'
#             ],
#             'support': [
#                 'support', 'help', 'here for you', 'together', 'alongside',
#                 'assist', 'guide', 'accompany', 'with you'
#             ],
#             'hope': [
#                 'can', 'possible', 'able', 'capable', 'potential',
#                 'opportunity', 'growth', 'improve', 'better', 'progress'
#             ],
#             'empowerment': [
#                 'choice', 'decide', 'control', 'power', 'strength',
#                 'agency', 'capable', 'resource', 'ability'
#             ]
#         }
#         self._crisis_indicators = [
#             r'\b(?:want|going|plan)\s+to\s+(?:die|kill|end)\b',
#             r'\b(?:no reason|point|hope)\s+(?:to|in)\s+(?:live|living|life)\b',
#             r'\b(?:better off|world)\s+without\s+me\b',
#             r'\bsuicide\s+(?:plan|method|attempt)\b',
#             r'\b(?:final|last)\s+(?:goodbye|letter|message)\b'
#         ]
#         self._negative_tone_words = [
#             'stupid', 'idiot', 'dumb', 'pathetic', 'worthless',
#             'loser', 'failure', 'weak', 'incompetent', 'useless'
#         ]
#         self._dismissive_phrases = [
#             'just get over it', 'stop complaining', 'not a big deal',
#             'being dramatic', 'overreacting', 'too sensitive'
#         ]

#     def _run(self, response: str, context: Optional[dict] = None):
#         """
#         Pydantic and CrewAI-compatible single-tool version.
#         Returns a dictionary directly.
#         """
#         # Issues, warnings, suggestions collections
#         issues = []
#         warnings = []
#         suggestions = []
        
#         # --- Prohibited Content ---
#         for category, patterns in self._prohibited_patterns.items():
#             for pattern in patterns:
#                 if re.search(pattern, response, re.IGNORECASE):
#                     issues.append(f"Contains {category} advice/content")
#                     if category == "medical":
#                         suggestions.append("Replace with: 'Consider speaking with a healthcare professional'")
#                     elif category == "legal":
#                         suggestions.append("For legal matters, consult with a qualified attorney")
#                     elif category == "financial":
#                         suggestions.append("For financial decisions, consider consulting a financial advisor")
#                     elif category == "harmful":
#                         suggestions.append("Include crisis resources and express immediate concern for safety")
#                     elif category == "absolute":
#                         suggestions.append("Use qualifying language like 'often', 'might', 'could' instead of absolutes")
#                     break
#         # --- Sentiment/Tone ---
#         try:
#             sentiment_result = self._sentiment_analyzer(response[:512])[0]
#             sentiment_label = sentiment_result['label']
#             if '1' in sentiment_label or '2' in sentiment_label:
#                 warnings.append("Response tone is too negative")
#                 suggestions.append("Add more supportive and hopeful language")
#         except Exception:
#             pass
#         # --- Negative words ---
#         found_negative = [word for word in self._negative_tone_words if word in response.lower()]
#         if found_negative:
#             warnings.append(f"Contains negative/judgmental language: {', '.join(found_negative)}")
#             suggestions.append("Replace judgmental terms with supportive language")
#         # --- Dismissive ---
#         found_dismissive = [phrase for phrase in self._dismissive_phrases if phrase in response.lower()]
#         if found_dismissive:
#             warnings.append("Contains dismissive language")
#             suggestions.append("Acknowledge and validate the person's feelings instead")
#         # --- Supportive Elements ---
#         text_lower = response.lower()
#         missing_elements = []
#         for element, keywords in self._supportive_elements.items():
#             if not any(keyword in text_lower for keyword in keywords):
#                 missing_elements.append(element)
#         if missing_elements:
#             warnings.append(f"Missing supportive elements: {', '.join(missing_elements)}")
#             for miss in missing_elements:
#                 if miss == 'empathy':
#                     suggestions.append("Add empathetic language like 'I understand how difficult this must be'")
#                 elif miss == 'validation':
#                     suggestions.append("Validate their feelings with phrases like 'Your feelings are completely valid'")
#                 elif miss == 'support':
#                     suggestions.append("Express support with 'I'm here to support you through this'")
#                 elif miss == 'hope':
#                     suggestions.append("Include hopeful elements about growth and positive change")
#                 elif miss == 'empowerment':
#                     suggestions.append("Emphasize their agency and ability to make choices")
#         # --- Crisis detection from context ---
#         if context and context.get("user_input"):
#             for pattern in self._crisis_indicators:
#                 if re.search(pattern, context["user_input"], re.IGNORECASE):
#                     if "crisis" not in response.lower():
#                         warnings.append("User may be in crisis but response doesn't address this")
#                         suggestions.append("Include crisis resources and immediate support options")
#         # --- Confidence ---
#         confidence = 1.0
#         if issues:
#             confidence = 0.3 - (0.1 * len(issues))
#         confidence = max(0.0, confidence - 0.1 * len(warnings))
# class ValidationTools:
#     def __init__(self, config=None):
#         self.validate_response_tool = ValidateResponseTool(config)
import re
from typing import List, Optional, Dict, Any
from transformers import pipeline
import torch
from pydantic import PrivateAttr
from crewai.tools import BaseTool

class ValidateResponseTool(BaseTool):
    name: str = "validate_response"
    description: str = "Validates safety and helpfulness of an AI response"
    model_config = {"arbitrary_types_allowed": True}

    _prohibited_patterns: dict = PrivateAttr()
    _supportive_elements: dict = PrivateAttr()
    _crisis_indicators: List[str] = PrivateAttr()
    _negative_tone_words: List[str] = PrivateAttr()
    _dismissive_phrases: List[str] = PrivateAttr()
    _sentiment_analyzer: object = PrivateAttr()

    def __init__(self, config=None, **data):
        super().__init__(**data)
        self._sentiment_analyzer = pipeline(
            "sentiment-analysis",
            model="nlptown/bert-base-multilingual-uncased-sentiment",
            device=0 if torch.cuda.is_available() else -1
        )
        # ... (rest of pattern/class setup exactly as you wrote) ...
        self._prohibited_patterns = {...} 
        self._supportive_elements = {...}
        self._crisis_indicators = [...]
        self._negative_tone_words = [...]
        self._dismissive_phrases = [...]

    def _run(self, response: str, context: Optional[dict] = None):
        # ... All your logic from the previous snippet. ...
        issues = []
        warnings = []
        suggestions = []
        # ... checks (same as previous) ...
        return {
            "is_valid": len(issues) == 0,
            "issues": issues,
            "warnings": warnings,
            "suggestions": suggestions,
        }

class ValidationTools:
    def __init__(self, config=None):
        self.validate_response_tool = ValidateResponseTool(config)