TatTwamAI / utils /config.py
Jayashree Sridhar
Added typegpt2 model in knowledgebase
068c1e8
"""
Configuration management for Personal Coach CrewAI
"""
import os
from dataclasses import dataclass
from typing import Dict, List, Optional
from dotenv import load_dotenv
import torch
# Load environment variables
load_dotenv()
@dataclass
class ModelConfig:
"""Model configuration settings"""
# Mistral model for main LLM
#mistral_model: str = "mistralai/Mistral-7B-Instruct-v0.1"
tinygpt2_model:str = "sshleifer/tiny-gpt2"
# Embedding model for RAG
embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
# Whisper model for multilingual STT
whisper_model: str = "openai/whisper-small"
# TTS models for different languages
tts_models: Dict[str, str] = None
# Model parameters
max_length: int = 2048
temperature: float = 0.7
top_p: float = 0.95
do_sample: bool = True
# Device configuration
device: str = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype: torch.dtype = torch.float16 if torch.cuda.is_available() else torch.float32
def __post_init__(self):
if self.tts_models is None:
self.tts_models = {
"en": "microsoft/speecht5_tts",
"hi": "facebook/mms-tts-hin",
"es": "facebook/mms-tts-spa",
"fr": "facebook/mms-tts-fra",
"de": "facebook/mms-tts-deu",
"zh": "facebook/mms-tts-cmn",
"ar": "facebook/mms-tts-ara",
"default": "microsoft/speecht5_tts"
}
@dataclass
class VectorStoreConfig:
"""Vector store configuration for knowledge base"""
index_type: str = "Flat" # FAISS index type
dimension: int = 384 # for all-MiniLM-L6-v2
metric: str = "cosine" # similarity metric
n_results: int = 5 # number of results to retrieve
chunk_size: int = 500 # text chunk size
chunk_overlap: int = 50 # overlap between chunks
@dataclass
class AudioConfig:
"""Audio processing configuration"""
sample_rate: int = 16000
chunk_length: int = 30 # seconds
language_detection: bool = True
supported_languages: List[str] = None
def __post_init__(self):
if self.supported_languages is None:
self.supported_languages = [
"en", "es", "fr", "de", "it", "pt", "ru", "zh",
"ja", "ko", "hi", "ar", "bn", "pa", "te", "mr",
"ta", "ur", "gu", "kn", "ml", "or"
]
@dataclass
class CrewConfig:
"""CrewAI specific configuration"""
max_iterations: int = 3
memory: bool = True
verbose: bool = True
temperature: float = 0.7
max_rpm: int = 10 # rate limiting
# Agent-specific settings
agent_settings: Dict[str, Dict] = None
def __post_init__(self):
if self.agent_settings is None:
self.agent_settings = {
"conversation_handler": {
"max_questions": 3,
"empathy_level": "high",
"response_style": "warm"
},
"knowledge_advisor": {
"search_depth": 5,
"context_window": 3,
"wisdom_sources": ["all"]
},
"response_validator": {
"safety_threshold": 0.9,
"tone_check": True,
"fact_check": False
},
"interaction_manager": {
"voice_speed": 1.0,
"voice_pitch": 1.0,
"include_followup": True
}
}
class Config:
"""Main configuration class"""
def __init__(self):
# Base paths
self.BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
self.DATA_DIR = os.path.join(self.BASE_DIR, "data")
self.BOOKS_DIR = os.path.join(self.DATA_DIR, "books")
self.INDEX_DIR = os.path.join(self.DATA_DIR, "faiss_index")
self.CACHE_DIR = os.path.join(self.BASE_DIR, ".cache")
self.LOGS_DIR = os.path.join(self.BASE_DIR, "logs")
# Create necessary directories
for directory in [self.DATA_DIR, self.BOOKS_DIR, self.INDEX_DIR, self.CACHE_DIR, self.LOGS_DIR]:
os.makedirs(directory, exist_ok=True)
# Model configuration
self.models = ModelConfig(
tinygpt2_model=os.getenv("MISTRAL_MODEL", ModelConfig.tinygpt2_model),
embedding_model=os.getenv("EMBEDDING_MODEL", ModelConfig.embedding_model),
whisper_model=os.getenv("WHISPER_MODEL", ModelConfig.whisper_model),
temperature=float(os.getenv("TEMPERATURE", "0.7")),
max_length=int(os.getenv("MAX_LENGTH", "2048"))
)
# Vector store configuration
self.vector_store = VectorStoreConfig(
chunk_size=int(os.getenv("CHUNK_SIZE", "500")),
n_results=int(os.getenv("N_RESULTS", "5"))
)
# Audio configuration
self.audio = AudioConfig(
sample_rate=int(os.getenv("SAMPLE_RATE", "16000")),
language_detection=os.getenv("LANGUAGE_DETECTION", "true").lower() == "true"
)
# CrewAI configuration
self.crew = CrewConfig(
verbose=os.getenv("CREW_VERBOSE", "true").lower() == "true",
max_iterations=int(os.getenv("MAX_ITERATIONS", "3"))
)
# API tokens
self.tokens = {
"huggingface": os.getenv("HUGGINGFACE_TOKEN", ""),
"openai": os.getenv("OPENAI_API_KEY", "")
}
# Feature flags
self.features = {
"voice_enabled": os.getenv("VOICE_ENABLED", "true").lower() == "true",
"multilingual": os.getenv("MULTILINGUAL", "true").lower() == "true",
"save_history": os.getenv("SAVE_HISTORY", "true").lower() == "true",
"debug_mode": os.getenv("DEBUG_MODE", "false").lower() == "true"
}
# Knowledge base books
self.knowledge_sources = {
"spiritual": [
"Bhagavad Gita",
"Autobiography of a Yogi",
"The Power of Now",
"Tao Te Ching",
"Dhyana Vahini",
"Gita Vahini",
"Prema Vahini",
"Prasnothra Vahini"
],
"self_help": [
"Atomic Habits",
"The 7 Habits of Highly Effective People",
"Man's Search for Meaning",
"Mindset"
],
"philosophy": [
"Meditations"
]
}
# Prompt templates
self.prompts = {
"system_prompt": """You are a compassionate personal coach who draws wisdom from ancient texts and modern psychology.
You listen deeply, ask thoughtful questions, and provide guidance that is both practical and profound.
You speak with warmth and understanding, never judging, always supporting.""",
"conversation_prompt": """Based on what the user shared: {user_input}
Their emotional state appears to be: {emotional_state}
Generate {num_questions} empathetic, reflective questions to help them explore their feelings deeper.""",
"wisdom_prompt": """The user is dealing with: {situation}
Their emotional state: {emotional_state}
Drawing from these wisdom sources: {sources}
Provide relevant guidance that:
1. Acknowledges their feelings
2. Shares applicable wisdom
3. Offers practical steps
4. Maintains a supportive tone""",
"validation_prompt": """Review this response for appropriateness:
{response}
Ensure it:
1. Contains no medical/legal/financial advice
2. Maintains supportive tone
3. Includes practical guidance
4. Avoids absolute statements""",
"meditation_prompt": """Create a {duration} minute meditation practice for someone feeling {emotion}.
Include:
1. Simple setup instructions
2. Step-by-step guidance
3. Focus technique
4. Closing reflection"""
}
# Response guidelines
self.guidelines = {
"tone": ["empathetic", "supportive", "non-judgmental", "encouraging"],
"avoid": ["prescriptive", "absolute", "diagnostic", "dismissive"],
"include": ["validation", "practical steps", "hope", "empowerment"]
}
# Crisis resources
self.crisis_resources = {
"global": {
"name": "International Crisis Lines",
"url": "https://findahelpline.com",
"phone": "Various by country"
},
"us": {
"name": "988 Suicide & Crisis Lifeline",
"phone": "988",
"text": "Text HOME to 741741"
},
"uk": {
"name": "Samaritans",
"phone": "116 123",
"email": "jo@samaritans.org"
},
"india": {
"name": "Vandrevala Foundation",
"phone": "9999666555",
"languages": ["Hindi", "English", "Regional"]
}
}
def get_language_config(self, language_code: str) -> Dict:
"""Get language-specific configuration"""
language_configs = {
"en": {"name": "English", "tts_voice": "en-US-AriaNeural"},
"hi": {"name": "Hindi", "tts_voice": "hi-IN-SwaraNeural"},
"es": {"name": "Spanish", "tts_voice": "es-ES-ElviraNeural"},
"fr": {"name": "French", "tts_voice": "fr-FR-DeniseNeural"},
"de": {"name": "German", "tts_voice": "de-DE-KatjaNeural"},
"zh": {"name": "Chinese", "tts_voice": "zh-CN-XiaoxiaoNeural"},
"ar": {"name": "Arabic", "tts_voice": "ar-SA-ZariyahNeural"}
}
return language_configs.get(language_code, language_configs["en"])
def get_prompt(self, prompt_type: str, **kwargs) -> str:
"""Get formatted prompt with variables"""
prompt_template = self.prompts.get(prompt_type, "")
return prompt_template.format(**kwargs)
def to_dict(self) -> Dict:
"""Convert configuration to dictionary"""
return {
"paths": {
"base": self.BASE_DIR,
"data": self.DATA_DIR,
"books": self.BOOKS_DIR,
"index": self.INDEX_DIR,
"cache": self.CACHE_DIR
},
"models": self.models.__dict__,
"vector_store": self.vector_store.__dict__,
"audio": self.audio.__dict__,
"crew": self.crew.__dict__,
"features": self.features,
"knowledge_sources": self.knowledge_sources
}