Spaces:

jayashree
/

TatTwamAI

Sleeping

TatTwamAI / utils /config.py

Jayashree Sridhar

Added typegpt2 model in knowledgebase

068c1e8 2 months ago

11.2 kB

	"""
	Configuration management for Personal Coach CrewAI
	"""

	import os
	from dataclasses import dataclass
	from typing import Dict, List, Optional
	from dotenv import load_dotenv
	import torch

	# Load environment variables
	load_dotenv()

	@dataclass
	class ModelConfig:
	"""Model configuration settings"""
	# Mistral model for main LLM
	#mistral_model: str = "mistralai/Mistral-7B-Instruct-v0.1"
	tinygpt2_model:str = "sshleifer/tiny-gpt2"

	# Embedding model for RAG
	embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"

	# Whisper model for multilingual STT
	whisper_model: str = "openai/whisper-small"

	# TTS models for different languages
	tts_models: Dict[str, str] = None

	# Model parameters
	max_length: int = 2048
	temperature: float = 0.7
	top_p: float = 0.95
	do_sample: bool = True

	# Device configuration
	device: str = "cuda" if torch.cuda.is_available() else "cpu"
	torch_dtype: torch.dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	def __post_init__(self):
	if self.tts_models is None:
	self.tts_models = {
	"en": "microsoft/speecht5_tts",
	"hi": "facebook/mms-tts-hin",
	"es": "facebook/mms-tts-spa",
	"fr": "facebook/mms-tts-fra",
	"de": "facebook/mms-tts-deu",
	"zh": "facebook/mms-tts-cmn",
	"ar": "facebook/mms-tts-ara",
	"default": "microsoft/speecht5_tts"
	}

	@dataclass
	class VectorStoreConfig:
	"""Vector store configuration for knowledge base"""
	index_type: str = "Flat" # FAISS index type
	dimension: int = 384 # for all-MiniLM-L6-v2
	metric: str = "cosine" # similarity metric
	n_results: int = 5 # number of results to retrieve
	chunk_size: int = 500 # text chunk size
	chunk_overlap: int = 50 # overlap between chunks

	@dataclass
	class AudioConfig:
	"""Audio processing configuration"""
	sample_rate: int = 16000
	chunk_length: int = 30 # seconds
	language_detection: bool = True
	supported_languages: List[str] = None

	def __post_init__(self):
	if self.supported_languages is None:
	self.supported_languages = [
	"en", "es", "fr", "de", "it", "pt", "ru", "zh",
	"ja", "ko", "hi", "ar", "bn", "pa", "te", "mr",
	"ta", "ur", "gu", "kn", "ml", "or"
	]

	@dataclass
	class CrewConfig:
	"""CrewAI specific configuration"""
	max_iterations: int = 3
	memory: bool = True
	verbose: bool = True
	temperature: float = 0.7
	max_rpm: int = 10 # rate limiting

	# Agent-specific settings
	agent_settings: Dict[str, Dict] = None

	def __post_init__(self):
	if self.agent_settings is None:
	self.agent_settings = {
	"conversation_handler": {
	"max_questions": 3,
	"empathy_level": "high",
	"response_style": "warm"
	},
	"knowledge_advisor": {
	"search_depth": 5,
	"context_window": 3,
	"wisdom_sources": ["all"]
	},
	"response_validator": {
	"safety_threshold": 0.9,
	"tone_check": True,
	"fact_check": False
	},
	"interaction_manager": {
	"voice_speed": 1.0,
	"voice_pitch": 1.0,
	"include_followup": True
	}
	}

	class Config:
	"""Main configuration class"""

	def __init__(self):
	# Base paths
	self.BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	self.DATA_DIR = os.path.join(self.BASE_DIR, "data")
	self.BOOKS_DIR = os.path.join(self.DATA_DIR, "books")
	self.INDEX_DIR = os.path.join(self.DATA_DIR, "faiss_index")
	self.CACHE_DIR = os.path.join(self.BASE_DIR, ".cache")
	self.LOGS_DIR = os.path.join(self.BASE_DIR, "logs")

	# Create necessary directories
	for directory in [self.DATA_DIR, self.BOOKS_DIR, self.INDEX_DIR, self.CACHE_DIR, self.LOGS_DIR]:
	os.makedirs(directory, exist_ok=True)

	# Model configuration
	self.models = ModelConfig(
	tinygpt2_model=os.getenv("MISTRAL_MODEL", ModelConfig.tinygpt2_model),
	embedding_model=os.getenv("EMBEDDING_MODEL", ModelConfig.embedding_model),
	whisper_model=os.getenv("WHISPER_MODEL", ModelConfig.whisper_model),
	temperature=float(os.getenv("TEMPERATURE", "0.7")),
	max_length=int(os.getenv("MAX_LENGTH", "2048"))
	)

	# Vector store configuration
	self.vector_store = VectorStoreConfig(
	chunk_size=int(os.getenv("CHUNK_SIZE", "500")),
	n_results=int(os.getenv("N_RESULTS", "5"))
	)

	# Audio configuration
	self.audio = AudioConfig(
	sample_rate=int(os.getenv("SAMPLE_RATE", "16000")),
	language_detection=os.getenv("LANGUAGE_DETECTION", "true").lower() == "true"
	)

	# CrewAI configuration
	self.crew = CrewConfig(
	verbose=os.getenv("CREW_VERBOSE", "true").lower() == "true",
	max_iterations=int(os.getenv("MAX_ITERATIONS", "3"))
	)

	# API tokens
	self.tokens = {
	"huggingface": os.getenv("HUGGINGFACE_TOKEN", ""),
	"openai": os.getenv("OPENAI_API_KEY", "")
	}

	# Feature flags
	self.features = {
	"voice_enabled": os.getenv("VOICE_ENABLED", "true").lower() == "true",
	"multilingual": os.getenv("MULTILINGUAL", "true").lower() == "true",
	"save_history": os.getenv("SAVE_HISTORY", "true").lower() == "true",
	"debug_mode": os.getenv("DEBUG_MODE", "false").lower() == "true"
	}

	# Knowledge base books
	self.knowledge_sources = {
	"spiritual": [
	"Bhagavad Gita",
	"Autobiography of a Yogi",
	"The Power of Now",
	"Tao Te Ching",
	"Dhyana Vahini",
	"Gita Vahini",
	"Prema Vahini",
	"Prasnothra Vahini"
	],
	"self_help": [
	"Atomic Habits",
	"The 7 Habits of Highly Effective People",
	"Man's Search for Meaning",
	"Mindset"
	],
	"philosophy": [
	"Meditations"
	]
	}

	# Prompt templates
	self.prompts = {
	"system_prompt": """You are a compassionate personal coach who draws wisdom from ancient texts and modern psychology.
	You listen deeply, ask thoughtful questions, and provide guidance that is both practical and profound.
	You speak with warmth and understanding, never judging, always supporting.""",

	"conversation_prompt": """Based on what the user shared: {user_input}
	Their emotional state appears to be: {emotional_state}
	Generate {num_questions} empathetic, reflective questions to help them explore their feelings deeper.""",

	"wisdom_prompt": """The user is dealing with: {situation}
	Their emotional state: {emotional_state}

	Drawing from these wisdom sources: {sources}
	Provide relevant guidance that:
	1. Acknowledges their feelings
	2. Shares applicable wisdom
	3. Offers practical steps
	4. Maintains a supportive tone""",

	"validation_prompt": """Review this response for appropriateness:
	{response}

	Ensure it:
	1. Contains no medical/legal/financial advice
	2. Maintains supportive tone
	3. Includes practical guidance
	4. Avoids absolute statements""",

	"meditation_prompt": """Create a {duration} minute meditation practice for someone feeling {emotion}.
	Include:
	1. Simple setup instructions
	2. Step-by-step guidance
	3. Focus technique
	4. Closing reflection"""
	}

	# Response guidelines
	self.guidelines = {
	"tone": ["empathetic", "supportive", "non-judgmental", "encouraging"],
	"avoid": ["prescriptive", "absolute", "diagnostic", "dismissive"],
	"include": ["validation", "practical steps", "hope", "empowerment"]
	}

	# Crisis resources
	self.crisis_resources = {
	"global": {
	"name": "International Crisis Lines",
	"url": "https://findahelpline.com",
	"phone": "Various by country"
	},
	"us": {
	"name": "988 Suicide & Crisis Lifeline",
	"phone": "988",
	"text": "Text HOME to 741741"
	},
	"uk": {
	"name": "Samaritans",
	"phone": "116 123",
	"email": "jo@samaritans.org"
	},
	"india": {
	"name": "Vandrevala Foundation",
	"phone": "9999666555",
	"languages": ["Hindi", "English", "Regional"]
	}
	}

	def get_language_config(self, language_code: str) -> Dict:
	"""Get language-specific configuration"""
	language_configs = {
	"en": {"name": "English", "tts_voice": "en-US-AriaNeural"},
	"hi": {"name": "Hindi", "tts_voice": "hi-IN-SwaraNeural"},
	"es": {"name": "Spanish", "tts_voice": "es-ES-ElviraNeural"},
	"fr": {"name": "French", "tts_voice": "fr-FR-DeniseNeural"},
	"de": {"name": "German", "tts_voice": "de-DE-KatjaNeural"},
	"zh": {"name": "Chinese", "tts_voice": "zh-CN-XiaoxiaoNeural"},
	"ar": {"name": "Arabic", "tts_voice": "ar-SA-ZariyahNeural"}
	}

	return language_configs.get(language_code, language_configs["en"])

	def get_prompt(self, prompt_type: str, **kwargs) -> str:
	"""Get formatted prompt with variables"""
	prompt_template = self.prompts.get(prompt_type, "")
	return prompt_template.format(**kwargs)

	def to_dict(self) -> Dict:
	"""Convert configuration to dictionary"""
	return {
	"paths": {
	"base": self.BASE_DIR,
	"data": self.DATA_DIR,
	"books": self.BOOKS_DIR,
	"index": self.INDEX_DIR,
	"cache": self.CACHE_DIR
	},
	"models": self.models.__dict__,
	"vector_store": self.vector_store.__dict__,
	"audio": self.audio.__dict__,
	"crew": self.crew.__dict__,
	"features": self.features,
	"knowledge_sources": self.knowledge_sources
	}