contract-guard-ai / services /risk_analyzer.py
satyakimitra's picture
Final Repor Updated
522f7a0
# DEPENDENCIES
import sys
from typing import Any
from typing import List
from typing import Dict
from typing import Tuple
from pathlib import Path
from typing import Optional
from dataclasses import field
from collections import defaultdict
# Add parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))
from utils.logger import log_info
from utils.logger import log_error
from config.risk_rules import RiskRules
from config.risk_rules import ContractType
from services.data_models import RiskScore
from services.term_analyzer import TermAnalyzer
from utils.logger import ContractAnalyzerLogger
from services.data_models import ExtractedClause
from services.data_models import UnfavorableTerm
from services.data_models import MissingProtection
from services.data_models import RiskBreakdownItem
from services.protection_checker import ProtectionChecker
from services.clause_extractor import RiskClauseExtractor
from services.contract_classifier import ContractCategory
from services.contract_classifier import ContractClassifier
from services.clause_extractor import ComprehensiveClauseExtractor
class RiskAnalyzer:
"""
Orchestrates all analysis components and calculates comprehensive risk scores
Analysis Pipeline:
1. Contract Classification
2. Clause Extraction
3. Term Analysis
4. Protection Checking
5. Risk Scoring
"""
def __init__(self, model_loader):
"""
Initialize the risk analyzer with all required components
Arguments:
----------
model_loader : ModelLoader instance for accessing AI models
"""
self.model_loader = model_loader
self.rules = RiskRules()
self.logger = ContractAnalyzerLogger.get_logger()
# Initialize all analysis components
self.contract_classifier = ContractClassifier(model_loader = model_loader)
self.risk_clause_extractor = None # Will be initialized with contract type
self.term_analyzer = TermAnalyzer()
self.protection_checker = ProtectionChecker()
log_info("RiskAnalyzer initialized - All components ready")
@ContractAnalyzerLogger.log_execution_time("analyze_contract_risk")
def analyze_contract_risk(self, contract_text: str) -> RiskScore:
"""
Comprehensive contract risk analysis
Arguments:
----------
contract_text { str } : Full contract text
Returns:
--------
{ RiskScore } : Complete risk assessment with 0-100 score and detailed breakdown
"""
log_info("Starting Comprehensive Contract Risk Analysis...", text_length = len(contract_text))
# Contract Classification
contract_category = self._classify_contract(contract_text = contract_text)
log_info("Contract classified", contract_type = contract_category.category)
# Clause Extraction: RiskClauseExtractor
clauses = self._extract_clauses(contract_text = contract_text,
contract_category = contract_category,
)
log_info("Clauses extracted", num_clauses = len(clauses))
# Unfavourable Term Analysis
unfavorable_terms = self._analyze_unfavorable_terms(contract_text = contract_text,
clauses = clauses,
contract_category = contract_category,
)
log_info("Unfavorable terms analyzed", num_unfavorable_terms = len(unfavorable_terms))
# MISSING PROTECTIONS ANALYSIS
missing_protections = self._analyze_missing_protections(contract_text = contract_text,
clauses = clauses,
contract_category = contract_category,
)
log_info("Missing protections analyzed", num_missing_protections = len(missing_protections))
# RISK SCORING & AGGREGATION
risk_score = self._calculate_comprehensive_risk(contract_category = contract_category,
clauses = clauses,
unfavorable_terms = unfavorable_terms,
missing_protections = missing_protections,
contract_text = contract_text,
)
log_info("Risk Analysis Complete",
overall_score = risk_score.overall_score,
risk_level = risk_score.risk_level,
contract_type = risk_score.contract_type,
)
return risk_score
def _classify_contract(self, contract_text: str):
"""
Classify contract type
"""
log_info("Classifying contract type...")
try:
classification = self.contract_classifier.classify_contract(contract_text = contract_text)
log_info("Contract classification successful",
category = classification.category,
confidence = classification.confidence,
subcategory = classification.subcategory)
return classification
except Exception as e:
log_error(e, context = {"component": "RiskAnalyzer", "operation": "contract_classification"})
# Fallback to general classification
return ContractCategory(category = "general",
subcategory = None,
confidence = 0.5,
reasoning = ["Classification failed, using general fallback"],
detected_keywords = [],
)
def _extract_clauses(self, contract_text: str, contract_category) -> List:
"""
Extract clauses from contract using RiskClauseExtractor
"""
log_info("Extracting RISK-FOCUSED clauses from contract...")
try:
# Get contract type enum
contract_type_enum = self._get_contract_type_enum(category_str = contract_category.category)
# Initialize RiskClauseExtractor (NOT ComprehensiveClauseExtractor)
self.risk_clause_extractor = RiskClauseExtractor(model_loader = self.model_loader,
contract_type = contract_type_enum,
)
# Use RiskClauseExtractor which outputs risk categories
clauses = self.risk_clause_extractor.extract_risk_clauses(contract_text = contract_text,
max_clauses = 50,
)
log_info("Risk-focused clause extraction successful",
total_clauses = len(clauses),
categories = [c.category for c in clauses])
return clauses
except Exception as e:
log_error(e, context = {"component": "RiskAnalyzer", "operation": "clause_extraction"})
return []
def _analyze_unfavorable_terms(self, contract_text: str, clauses: List, contract_category) -> List[UnfavorableTerm]:
"""
Analyze for unfavorable terms (using risk categories from RiskClauseExtractor)
"""
log_info("Analyzing unfavorable terms...")
try:
# Initialize term analyzer with contract type
contract_type_enum = self._get_contract_type_enum(category_str = contract_category.category)
self.term_analyzer = TermAnalyzer(contract_type = contract_type_enum)
unfavorable_terms = self.term_analyzer.analyze_unfavorable_terms(contract_text = contract_text,
clauses = clauses)
log_info("Unfavorable terms analysis successful",
total_terms = len(unfavorable_terms),
critical = sum(1 for t in unfavorable_terms if (t.severity == "critical")))
return unfavorable_terms
except Exception as e:
log_error(e, context = {"component": "RiskAnalyzer", "operation": "unfavorable_terms_analysis"})
return []
def _analyze_missing_protections(self, contract_text: str, clauses: List, contract_category) -> List[MissingProtection]:
"""
Analyze for missing protections
"""
log_info("Analyzing missing protections...")
try:
# Initialize protection checker with contract type
contract_type_enum = self._get_contract_type_enum(category_str = contract_category.category)
self.protection_checker = ProtectionChecker(contract_type = contract_type_enum)
missing_protections = self.protection_checker.check_missing_protections(contract_text = contract_text,
clauses = clauses)
log_info("Missing protections analysis successful",
total_missing = len(missing_protections),
critical = sum(1 for p in missing_protections if (p.importance == "critical")))
return missing_protections
except Exception as e:
log_error(e, context = {"component": "RiskAnalyzer", "operation": "missing_protections_analysis"})
return []
def _calculate_comprehensive_risk(self, contract_category, clauses: List, unfavorable_terms: List[UnfavorableTerm], missing_protections: List[MissingProtection],
contract_text: str) -> RiskScore:
"""
Calculate comprehensive risk score using all analysis results
"""
log_info("Calculating comprehensive risk score...")
# Get contract type for risk rule adjustments
contract_type_enum = self._get_contract_type_enum(category_str = contract_category.category)
adjusted_weights = self.rules.get_adjusted_weights(contract_type_enum)
# Initialize scoring containers
category_scores = defaultdict(int)
detailed_findings = defaultdict(list)
risk_factors = list()
# Calculate risk for each category
for risk_category in adjusted_weights.keys():
category_risk = self._calculate_category_risk(risk_category = risk_category,
contract_type = contract_type_enum,
clauses = clauses,
unfavorable_terms = unfavorable_terms,
missing_protections = missing_protections,
contract_text = contract_text,
)
category_scores[risk_category] = category_risk["score"]
detailed_findings[risk_category] = category_risk["findings"]
# Add to risk factors if high risk
if (category_risk["score"] >= self.rules.RISK_THRESHOLDS["high"]):
risk_factors.append(risk_category)
# Calculate weighted overall score
overall_score = self._calculate_weighted_score(category_scores = category_scores,
adjusted_weights = adjusted_weights)
risk_level = self._get_risk_level(score = overall_score)
# Create risk breakdown
risk_breakdown = self._create_risk_breakdown(category_scores = dict(category_scores),
detailed_findings = dict(detailed_findings))
# Benchmark comparison
benchmark_comparison = self._compare_to_benchmarks(category_scores = category_scores,
contract_type = contract_type_enum)
# Prepare output data
unfavorable_terms_dict = [term.to_dict() for term in unfavorable_terms]
missing_protections_dict = [protection.to_dict() for protection in missing_protections]
return RiskScore(overall_score = overall_score,
risk_level = risk_level,
category_scores = dict(category_scores),
risk_factors = risk_factors,
detailed_findings = dict(detailed_findings),
benchmark_comparison = benchmark_comparison,
risk_breakdown = risk_breakdown,
contract_type = contract_category.category,
unfavorable_terms = unfavorable_terms_dict,
missing_protections = missing_protections_dict,
)
def _calculate_category_risk(self, risk_category: str, contract_type: ContractType, clauses: List, unfavorable_terms: List[UnfavorableTerm],
missing_protections: List[MissingProtection], contract_text: str) -> Dict:
"""
Calculate risk score for a specific category using all available data
"""
base_score = 0
findings = list()
# Score from unfavorable terms in this category
category_terms = [t for t in unfavorable_terms if (t.category == risk_category)]
for term in category_terms:
# Scale appropriately
base_score += term.risk_score * 0.4
findings.append(f"{term.term}: {term.explanation}")
# Score from missing protections affecting this category
category_protections = [p for p in missing_protections if risk_category in p.categories]
for protection in category_protections:
base_score += protection.risk_score * 0.3
findings.append(f"Missing: {protection.protection}")
# Score from clauses in this category
category_clauses = self._get_clauses_for_risk_category(clauses = clauses,
risk_category = risk_category,
)
for clause in category_clauses:
clause_risk = self._analyze_clause_risk(clause = clause,
risk_category = risk_category,
contract_type = contract_type,
)
base_score += clause_risk["score"] * 0.3
findings.extend(clause_risk["findings"])
# Apply contract-type specific adjustments
category_weight = self.rules.CONTRACT_TYPE_ADJUSTMENTS.get(contract_type.value, {}).get(risk_category, 1.0)
adjusted_score = base_score * category_weight
# Cap score between 0-100
final_score = max(0, min(100, int(adjusted_score)))
# Top 25 findings
return {"score" : final_score,
"findings" : findings[:25]
}
def _get_clauses_for_risk_category(self, clauses: List, risk_category: str) -> List:
"""
Map clauses to risk categories (now clauses are already in risk categories)
"""
# clauses.category is already a risk category from RiskClauseExtractor
clauses_for_risk_category = [c for c in clauses if (c.category == risk_category)]
return clauses_for_risk_category
def _analyze_clause_risk(self, clause, risk_category: str, contract_type: ContractType) -> Dict:
"""
Analyze individual clause risk using RiskRules factors
"""
risk_factors = self.rules.CLAUSE_RISK_FACTORS
# Map RISK category (e.g., "restrictive_covenants") to CLAUSE category (e.g., "non_compete")
factor_mapping = {"restrictive_covenants" : "non_compete",
"termination_rights" : "termination",
"liability_indemnity" : "indemnification",
"compensation_benefits" : "compensation",
"intellectual_property" : "intellectual_property",
"confidentiality" : "confidentiality",
"penalties_liability" : "liability",
"warranties" : "warranty",
"dispute_resolution" : "dispute_resolution",
"assignment_change" : "assignment",
"insurance" : "insurance",
"force_majeure" : "force_majeure",
}
clause_category_key = factor_mapping.get(risk_category)
if not clause_category_key or clause_category_key not in risk_factors:
return {"score": 0, "findings": []}
factor_config = risk_factors[clause_category_key]
base_risk = factor_config.get("base_risk", 50)
text_lower = clause.text.lower()
risk_score = base_risk
findings = list()
# Check red flags
for red_flag, adjustment in factor_config["red_flags"].items():
if red_flag in text_lower:
risk_score += adjustment
severity = "increases" if adjustment > 0 else "decreases"
findings.append(f"Red flag: '{red_flag}' ({severity} risk by {abs(adjustment)})")
# Apply contract-type specific multiplier
type_adjustments = self.rules.CONTRACT_TYPE_ADJUSTMENTS.get(contract_type.value, {})
category_multiplier = type_adjustments.get(risk_category, 1.0)
risk_score *= category_multiplier
return {"score" : max(0, min(100, risk_score)),
"findings" : findings,
}
def _calculate_weighted_score(self, category_scores: Dict[str, int], adjusted_weights: Dict[str, float]) -> int:
"""
Calculate weighted overall risk score
"""
total_score = 0
total_weight = 0
for category, score in category_scores.items():
weight = adjusted_weights.get(category, 1.0)
total_score += score * weight
total_weight += weight
return int(total_score / total_weight) if (total_weight > 0) else 50
def _get_risk_level(self, score: int) -> str:
"""
Convert numeric score to risk level
"""
if (score >= self.rules.RISK_THRESHOLDS["critical"]):
return "CRITICAL"
elif (score >= self.rules.RISK_THRESHOLDS["high"]):
return "HIGH"
elif (score >= self.rules.RISK_THRESHOLDS["medium"]):
return "MEDIUM"
elif (score >= self.rules.RISK_THRESHOLDS["low"]):
return "LOW"
return "VERY LOW"
def _create_risk_breakdown(self, category_scores: Dict[str, int], detailed_findings: Dict[str, List[str]]) -> List[RiskBreakdownItem]:
"""
Create detailed risk breakdown for reporting
"""
breakdown = list()
category_descriptions = self.rules.CATEGORY_DESCRIPTIONS
for category, score in category_scores.items():
if category in category_descriptions:
# Get appropriate description based on score
if (score >= 70):
risk_level = "high"
elif (score >= 40):
risk_level = "medium"
else:
risk_level = "low"
summary = category_descriptions[category][risk_level]
else:
summary = f"Risk assessment for {category.replace('_', ' ')}"
findings = detailed_findings.get(category, [])
breakdown.append(RiskBreakdownItem(category = category.replace('_', ' ').title(),
score = score,
summary = summary,
findings = findings[:25], # Top 25 findings
)
)
# Sort by score (highest risk first)
breakdown.sort(key = lambda x: x.score, reverse = True)
return breakdown
def _compare_to_benchmarks(self, category_scores: Dict[str, int], contract_type: ContractType) -> Dict[str, str]:
"""
Compare risk scores to industry benchmarks
"""
comparisons = dict()
# Overall risk comparison
overall_score = sum(category_scores.values()) / len(category_scores) if category_scores else 50
if (overall_score >= 70):
comparisons["overall"] = "βœ— Significantly above market risk levels"
elif (overall_score >= 55):
comparisons["overall"] = "⚠ Above typical market risk levels"
elif (overall_score >= 45):
comparisons["overall"] = "βœ“ Within typical market risk range"
else:
comparisons["overall"] = "βœ“ Below market risk levels (favorable)"
# Key category comparisons
high_risk_categories = [cat for cat, score in category_scores.items() if score >= 60]
if high_risk_categories:
comparisons["high_risk_areas"] = f"High risk in: {', '.join(high_risk_categories)}"
return comparisons
def _get_contract_type_enum(self, category_str: str) -> ContractType:
"""
Convert category string to ContractType enum
"""
mapping = {"employment" : ContractType.EMPLOYMENT,
"consulting" : ContractType.CONSULTING,
"nda" : ContractType.NDA,
"software" : ContractType.SOFTWARE,
"service" : ContractType.SERVICE,
"partnership" : ContractType.PARTNERSHIP,
"lease" : ContractType.LEASE,
"purchase" : ContractType.PURCHASE,
"general" : ContractType.GENERAL,
}
return mapping.get(category_str, ContractType.GENERAL)