import re import os import requests as req from bs4 import BeautifulSoup from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer import torch RULES = { "GDPR": [ ("Lawful basis documented", r"lawful\s+basis"), ("Data-subject rights process", r"right\s+to\s+access|erasure"), ("72-hour breach notice plan", r"72\s*hour"), ], "EU_AI_ACT": [ ("High-risk AI DPIA", r"risk\s+assessment"), ("Training data governance", r"data\s+governance"), ], "ISO_27001": [ ("Annex A control list", r"annex\s*a"), ("Statement of Applicability", r"statement\s+of\s+applicability"), ] } def run_check(text, framework): results = {} for fw in framework: results[fw] = [] for label, pattern in RULES[fw]: match = re.search(pattern, text, re.I) results[fw].append((label, bool(match))) return results AI_REPORT_PROMPT = """ You are an expert compliance consultant with deep experience in GDPR, the EU AI Act, ISO 27001, and related global data‑privacy and security standards. You have just received a concise checklist summary showing, for each framework, how many controls passed and which specific items failed. Your task is to produce a **clear, actionable report** tailored to a technical audience. Structure it as follows: 1. **Executive Summary** - One or two sentences on overall compliance posture - Highest‑level takeaways 2. **Key Issues Identified** - For each framework with failures, list: - The specific failed control(s) by label - A brief description of why that control matters - Use bullet points and group by framework 3. **Redemption Strategies** - For each key issue above, recommend a **concrete next step** or mitigation strategy - Prioritize actions by risk/impact (e.g., “High‑priority: Encrypt data at rest to meet ISO 27001 A.10.1”) 4. **Additional Resources & Next Steps** - A short paragraph on how deeper expert review can streamline remediation - A call‑out promoting AnkTechSol’s professional compliance consulting (e.g., “For a full policy audit, tailored gap analysis, and implementation roadmap, visit anktechsol.com or contact our team at [contact link].”) 5. **Appendix (Optional)** - Raw bullet list of “Framework: X passed/total, Y failed/total” Make sure to: - Use clear headings (`## Executive Summary`, etc.) - Keep each section brief but substantive (no more than 4–6 bullets per section) - Write in a confident, consultative tone Here are the raw results to incorporate: {bullet} Generate the report as markdown. """ HF_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1" # Load the text generation pipeline def load_pipeline(): tokenizer = AutoTokenizer.from_pretrained(HF_MODEL) model = AutoModelForCausalLM.from_pretrained( HF_MODEL, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True ) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, device_map="auto" ) return pipe generator = load_pipeline() def generate_report(prompt, max_tokens=600): try: response = generator( prompt, max_new_tokens=max_tokens, do_sample=True, temperature=0.7, top_p=0.95, return_full_text=False ) return response[0]["generated_text"] except Exception as e: return f"Error: {e}" def fetchText(url): try: response = req.get(url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') main_content = soup.find('main') if main_content: text = main_content.get_text(separator='\n', strip=True) else: text = soup.body.get_text(separator='\n', strip=True) return text.strip(), None except Exception as e: return "", f"Error fetching URL: {e}" # Exported functions __all__ = ["RULES", "run_check", "AI_REPORT_PROMPT", "generate_report", "fetchText"]