import re
import os
import requests as req
from bs4 import BeautifulSoup
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import torch

RULES = {
    "GDPR": [
        ("Lawful basis documented", r"lawful\s+basis"),
        ("Data-subject rights process", r"right\s+to\s+access|erasure"),
        ("72-hour breach notice plan", r"72\s*hour"),
    ],
    "EU_AI_ACT": [
        ("High-risk AI DPIA", r"risk\s+assessment"),
        ("Training data governance", r"data\s+governance"),
    ],
    "ISO_27001": [
        ("Annex A control list", r"annex\s*a"),
        ("Statement of Applicability", r"statement\s+of\s+applicability"),
    ]
}


def run_check(text, framework):
    results = {}
    for fw in framework:
        results[fw] = []
        for label, pattern in RULES[fw]:
            match = re.search(pattern, text, re.I)
            results[fw].append((label, bool(match)))
    return results


AI_REPORT_PROMPT = """
You are an expert compliance consultant with deep experience in GDPR, the EU AI Act, ISO 27001, and related global data‑privacy and security standards. You have just received a concise checklist summary showing, for each framework, how many controls passed and which specific items failed.

Your task is to produce a **clear, actionable report** tailored to a technical audience. Structure it as follows:

1. **Executive Summary**  
   - One or two sentences on overall compliance posture  
   - Highest‑level takeaways

2. **Key Issues Identified**  
   - For each framework with failures, list:  
     - The specific failed control(s) by label  
     - A brief description of why that control matters  
   - Use bullet points and group by framework

3. **Redemption Strategies**  
   - For each key issue above, recommend a **concrete next step** or mitigation strategy  
   - Prioritize actions by risk/impact (e.g., “High‑priority: Encrypt data at rest to meet ISO 27001 A.10.1”)

4. **Additional Resources & Next Steps**  
   - A short paragraph on how deeper expert review can streamline remediation  
   - A call‑out promoting AnkTechSol’s professional compliance consulting (e.g., “For a full policy audit, tailored gap analysis, and implementation roadmap, visit anktechsol.com or contact our team at [contact link].”)

5. **Appendix (Optional)**  
   - Raw bullet list of “Framework: X passed/total, Y failed/total”

Make sure to:
- Use clear headings (`## Executive Summary`, etc.)  
- Keep each section brief but substantive (no more than 4–6 bullets per section)  
- Write in a confident, consultative tone  

Here are the raw results to incorporate:

{bullet}

Generate the report as markdown.  
"""

HF_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"

# Load the text generation pipeline
def load_pipeline():
    tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)
    model = AutoModelForCausalLM.from_pretrained(
        HF_MODEL,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device_map="auto"
    )
    return pipe

generator = load_pipeline()


def generate_report(prompt, max_tokens=600):
    try:
        response = generator(
            prompt,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.95,
            return_full_text=False
        )
        return response[0]["generated_text"]
    except Exception as e:
        return f"Error: {e}"


def fetchText(url):
    try:
        response = req.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        main_content = soup.find('main')
        if main_content:
            text = main_content.get_text(separator='\n', strip=True)
        else:
            text = soup.body.get_text(separator='\n', strip=True)
        return text.strip(), None
    except Exception as e:
        return "", f"Error fetching URL: {e}"

# Exported functions
__all__ = ["RULES", "run_check", "AI_REPORT_PROMPT", "generate_report", "fetchText"]