Spaces:
Paused
Paused
import re | |
import os | |
import requests as req | |
from bs4 import BeautifulSoup | |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer | |
import torch | |
RULES = { | |
"GDPR": [ | |
("Lawful basis documented", r"lawful\s+basis"), | |
("Data-subject rights process", r"right\s+to\s+access|erasure"), | |
("72-hour breach notice plan", r"72\s*hour"), | |
], | |
"EU_AI_ACT": [ | |
("High-risk AI DPIA", r"risk\s+assessment"), | |
("Training data governance", r"data\s+governance"), | |
], | |
"ISO_27001": [ | |
("Annex A control list", r"annex\s*a"), | |
("Statement of Applicability", r"statement\s+of\s+applicability"), | |
] | |
} | |
def run_check(text, framework): | |
results = {} | |
for fw in framework: | |
results[fw] = [] | |
for label, pattern in RULES[fw]: | |
match = re.search(pattern, text, re.I) | |
results[fw].append((label, bool(match))) | |
return results | |
AI_REPORT_PROMPT = """ | |
You are an expert compliance consultant with deep experience in GDPR, the EU AI Act, ISO 27001, and related global data‑privacy and security standards. You have just received a concise checklist summary showing, for each framework, how many controls passed and which specific items failed. | |
Your task is to produce a **clear, actionable report** tailored to a technical audience. Structure it as follows: | |
1. **Executive Summary** | |
- One or two sentences on overall compliance posture | |
- Highest‑level takeaways | |
2. **Key Issues Identified** | |
- For each framework with failures, list: | |
- The specific failed control(s) by label | |
- A brief description of why that control matters | |
- Use bullet points and group by framework | |
3. **Redemption Strategies** | |
- For each key issue above, recommend a **concrete next step** or mitigation strategy | |
- Prioritize actions by risk/impact (e.g., “High‑priority: Encrypt data at rest to meet ISO 27001 A.10.1”) | |
4. **Additional Resources & Next Steps** | |
- A short paragraph on how deeper expert review can streamline remediation | |
- A call‑out promoting AnkTechSol’s professional compliance consulting (e.g., “For a full policy audit, tailored gap analysis, and implementation roadmap, visit anktechsol.com or contact our team at [contact link].”) | |
5. **Appendix (Optional)** | |
- Raw bullet list of “Framework: X passed/total, Y failed/total” | |
Make sure to: | |
- Use clear headings (`## Executive Summary`, etc.) | |
- Keep each section brief but substantive (no more than 4–6 bullets per section) | |
- Write in a confident, consultative tone | |
Here are the raw results to incorporate: | |
{bullet} | |
Generate the report as markdown. | |
""" | |
HF_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
# Load the text generation pipeline | |
def load_pipeline(): | |
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL) | |
model = AutoModelForCausalLM.from_pretrained( | |
HF_MODEL, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
trust_remote_code=True | |
) | |
pipe = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
device_map="auto" | |
) | |
return pipe | |
generator = load_pipeline() | |
def generate_report(prompt, max_tokens=600): | |
try: | |
response = generator( | |
prompt, | |
max_new_tokens=max_tokens, | |
do_sample=True, | |
temperature=0.7, | |
top_p=0.95, | |
return_full_text=False | |
) | |
return response[0]["generated_text"] | |
except Exception as e: | |
return f"Error: {e}" | |
def fetchText(url): | |
try: | |
response = req.get(url) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
main_content = soup.find('main') | |
if main_content: | |
text = main_content.get_text(separator='\n', strip=True) | |
else: | |
text = soup.body.get_text(separator='\n', strip=True) | |
return text.strip(), None | |
except Exception as e: | |
return "", f"Error fetching URL: {e}" | |
# Exported functions | |
__all__ = ["RULES", "run_check", "AI_REPORT_PROMPT", "generate_report", "fetchText"] | |