Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
from transformers import pipeline | |
from sklearn.metrics.pairwise import cosine_similarity | |
from docx import Document | |
import io | |
import re | |
class CarbonCreditDocGenerator: | |
def __init__(self): | |
self.sbert_model = SentenceTransformer('all-MiniLM-L6-v2') | |
self.nlg_pipeline = pipeline("text-generation", model="gpt2", max_length=1000) | |
self.knowledge_base = self.load_knowledge_base() | |
def load_knowledge_base(self): | |
return [ | |
"Carbon credits represent the reduction of one metric ton of carbon dioxide emissions.", | |
"Afforestation projects involve planting trees in areas where there were none before.", | |
"The Verified Carbon Standard (VCS) is a widely recognized certification for carbon credits.", | |
"Carbon credit projects must demonstrate additionality, meaning the reductions wouldn't have occurred without the project.", | |
"Monitoring, reporting, and verification (MRV) are crucial components of carbon credit projects.", | |
"Project developers must provide detailed information about project location, type, and expected carbon sequestration.", | |
"Carbon credit pricing can vary based on project type, location, and additional benefits.", | |
"Environmental Impact Assessments (EIA) are often required for carbon credit projects.", | |
"Community engagement and social benefits are important aspects of many carbon credit projects.", | |
"Risk assessment and mitigation strategies are crucial for project success and credibility." | |
] | |
def process_input_data(self, input_text): | |
sections = re.split(r'\d+\.\s+', input_text)[1:] # Split by numbered sections | |
data = {} | |
current_section = "" | |
for section in sections: | |
lines = section.strip().split('\n') | |
section_title = lines[0].strip() | |
current_section = section_title | |
data[current_section] = {} | |
for line in lines[1:]: | |
if ':' in line: | |
key, value = line.split(':', 1) | |
data[current_section][key.strip()] = value.strip() | |
else: | |
# Append to the last key if no colon is found | |
if data[current_section]: | |
last_key = list(data[current_section].keys())[-1] | |
data[current_section][last_key] += " " + line.strip() | |
return data | |
def retrieve_relevant_knowledge(self, query, top_k=3): | |
query_embedding = self.sbert_model.encode([query])[0] | |
knowledge_embeddings = self.sbert_model.encode(self.knowledge_base) | |
similarities = cosine_similarity([query_embedding], knowledge_embeddings)[0] | |
top_indices = np.argsort(similarities)[-top_k:][::-1] | |
return [self.knowledge_base[i] for i in top_indices] | |
def generate_section_content(self, section_title, input_data, max_length=1000): | |
query = f"Generate content for the '{section_title}' section of a carbon credit document." | |
relevant_knowledge = self.retrieve_relevant_knowledge(query) | |
section_data = input_data.get(section_title, input_data) | |
context = f"Input data: {section_data}\n\nRelevant knowledge: {' '.join(relevant_knowledge)}" | |
prompt = f"{context}\n\nTask: {query}\n\nContent:" | |
generated_text = self.nlg_pipeline(prompt, max_length=max_length, num_return_sequences=1)[0]['generated_text'] | |
corrected_text = self.apply_corrective_rag(generated_text, section_data, relevant_knowledge) | |
return corrected_text | |
def apply_corrective_rag(self, generated_text, input_data, relevant_knowledge): | |
corrected_text = generated_text | |
for key, value in input_data.items(): | |
if isinstance(value, dict): | |
for sub_key, sub_value in value.items(): | |
if sub_value.lower() not in corrected_text.lower(): | |
corrected_text += f" {sub_key}: {sub_value}." | |
elif value.lower() not in corrected_text.lower(): | |
corrected_text += f" {key}: {value}." | |
for knowledge in relevant_knowledge: | |
if knowledge.lower() not in corrected_text.lower(): | |
corrected_text += f" {knowledge}" | |
return corrected_text | |
def create_document(self, input_text): | |
doc = Document() | |
doc.add_heading('Carbon Credit Project Document', 0) | |
input_data = self.process_input_data(input_text) | |
sections = [ | |
"Project Overview", | |
"Seller/Proponent Information", | |
"Carbon Credit Specifications", | |
"Financial & Pricing Information", | |
"Project Impact and Sustainability", | |
"Risks & Mitigation Strategies", | |
"Supporting Documentation", | |
"Declarations and Acknowledgements" | |
] | |
for section in sections: | |
doc.add_heading(section, level=1) | |
content = self.generate_section_content(section, input_data) | |
doc.add_paragraph(content) | |
return doc | |
def generate_document(self, input_text): | |
doc = self.create_document(input_text) | |
doc_io = io.BytesIO() | |
doc.save(doc_io) | |
doc_io.seek(0) | |
return doc_io | |
# Streamlit app | |
def main(): | |
st.set_page_config(page_title="Carbon Credit Document Generator", page_icon="๐ฟ") | |
st.title("Carbon Credit Document Generator") | |
st.markdown(""" | |
This app generates a comprehensive Carbon Credit Project Document based on your input. | |
Upload a text file or paste your project details below. | |
""") | |
input_method = st.radio("Choose input method:", ("Upload File", "Paste Text")) | |
if input_method == "Upload File": | |
uploaded_file = st.file_uploader("Choose a text file", type="txt") | |
if uploaded_file is not None: | |
input_text = uploaded_file.read().decode("utf-8") | |
st.text_area("File Contents (Read-only)", input_text, height=300, disabled=True) | |
else: | |
input_text = st.text_area("Paste your project details here:", height=400, help="Enter your project details in a structured format, similar to the Carbon Credit Project Submission Form.") | |
if st.button("Generate Document"): | |
if not input_text: | |
st.error("Please provide input data before generating the document.") | |
else: | |
try: | |
generator = CarbonCreditDocGenerator() | |
with st.spinner("Generating document... This may take a few moments."): | |
doc_io = generator.generate_document(input_text) | |
st.success("Document generated successfully!") | |
st.download_button( | |
label="๐ฅ Download Carbon Credit Document", | |
data=doc_io.getvalue(), | |
file_name="carbon_credit_document.docx", | |
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" | |
) | |
st.info("Your document is ready for download. Click the button above to save it.") | |
except Exception as e: | |
st.error(f"An error occurred while generating the document: {str(e)}") | |
st.info("Please try again or contact support if the problem persists.") | |
st.markdown("---") | |
st.markdown("Developed by Carbon Connect") | |
if __name__ == "__main__": | |
main() |