Spaces:
Runtime error
Runtime error
File size: 2,297 Bytes
fb28324 c587117 fb28324 c587117 fb28324 c587117 fb28324 00f554d fb28324 00f554d fb28324 00f554d b6b98c7 00f554d fb28324 00f554d fb28324 00f554d fb28324 00f554d fb28324 00f554d fb28324 00f554d fb28324 00f554d fb28324 00f554d fb28324 00f554d fb28324 00f554d fb28324 00f554d fb28324 00f554d fb28324 00f554d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
from together import Together
from dotenv import load_dotenv
import os
# Load environment variables
load_dotenv()
api_key = os.getenv("API_KEY")
# Initialize Together client
client = Together(api_key=api_key)
# Set the safety content policy
safe_content_policy = {
"policy": """
O1: Topic Restriction.
Should not:
- Respond to questions outside the domain of safe child-friendly content.
- Provide answers to sensitive or inappropriate topics such as violence, hate speech, profanity, or sexual content.
- Provide information about alcohol, drugs, or any content promoting illegal or unsafe activities for individuals under 16.
O2: Sensitive Information.
Should not:
- Request or respond with any personal or sensitive user data.
Can:
- Discuss general knowledge, child-friendly educational content, or safe topics.
O3: Language.
Should not:
- Use profane, offensive, or inappropriate language.
Can:
- Use simple, clear, and friendly language suitable for children.
"""
}
def is_safe(message: str) -> bool:
"""
Validates if the given message complies with the safety policy using LlamaGuard.
Args:
message (str): User message or chatbot response.
Returns:
bool: True if the message is safe, False otherwise.
"""
# Prepare the LlamaGuard prompt
prompt = f"""[INST] Task: Validate if the given 'message' adheres to the safety policy below:
<BEGIN SAFETY POLICY>
{safe_content_policy['policy']}
<END SAFETY POLICY>
<BEGIN MESSAGE>
{message}
<END MESSAGE>
Provide your safety assessment for the above message:
- First line must read 'safe' or 'unsafe'.
- If unsafe, a second line must include a comma-separated list of violated categories. [/INST]"""
try:
# Send the prompt to the LlamaGuard model
response = client.completions.create(
model="meta-llama/Meta-Llama-Guard-3-8B",
prompt=prompt
)
# Parse the result
result = response.choices[0].text.strip().lower()
return result.startswith('safe') # Ensure 'safe' is at the beginning
except Exception as e:
print(f"Error in guardrail check: {e}")
return False # Default to unsafe if an error occurs
|