File size: 7,726 Bytes
b0b0f10
 
 
 
 
 
 
 
22dc2ca
b0b0f10
 
 
 
22dc2ca
b0b0f10
 
 
 
 
 
 
 
 
22dc2ca
 
 
 
 
b0b0f10
 
 
22dc2ca
b0b0f10
22dc2ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0b0f10
 
 
 
 
 
 
 
 
 
 
22dc2ca
b0b0f10
 
 
22dc2ca
 
b0b0f10
 
 
 
22dc2ca
b0b0f10
 
 
 
 
 
 
22dc2ca
 
 
 
 
b0b0f10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22dc2ca
 
 
 
 
 
 
 
b0b0f10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22dc2ca
b0b0f10
 
22dc2ca
 
 
 
 
 
 
 
 
 
 
 
 
 
b0b0f10
22dc2ca
 
 
 
 
 
 
 
 
b0b0f10
22dc2ca
b0b0f10
22dc2ca
 
 
 
 
 
 
 
 
 
 
b0b0f10
22dc2ca
8be9094
b0b0f10
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import streamlit as st
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity
from docx import Document
import io
import re

class CarbonCreditDocGenerator:
    def __init__(self):
        self.sbert_model = SentenceTransformer('all-MiniLM-L6-v2')
        self.nlg_pipeline = pipeline("text-generation", model="gpt2", max_length=1000)
        self.knowledge_base = self.load_knowledge_base()
        
    def load_knowledge_base(self):
        return [
            "Carbon credits represent the reduction of one metric ton of carbon dioxide emissions.",
            "Afforestation projects involve planting trees in areas where there were none before.",
            "The Verified Carbon Standard (VCS) is a widely recognized certification for carbon credits.",
            "Carbon credit projects must demonstrate additionality, meaning the reductions wouldn't have occurred without the project.",
            "Monitoring, reporting, and verification (MRV) are crucial components of carbon credit projects.",
            "Project developers must provide detailed information about project location, type, and expected carbon sequestration.",
            "Carbon credit pricing can vary based on project type, location, and additional benefits.",
            "Environmental Impact Assessments (EIA) are often required for carbon credit projects.",
            "Community engagement and social benefits are important aspects of many carbon credit projects.",
            "Risk assessment and mitigation strategies are crucial for project success and credibility."
        ]
        
    def process_input_data(self, input_text):
        sections = re.split(r'\d+\.\s+', input_text)[1:]  # Split by numbered sections
        data = {}
        current_section = ""
        for section in sections:
            lines = section.strip().split('\n')
            section_title = lines[0].strip()
            current_section = section_title
            data[current_section] = {}
            for line in lines[1:]:
                if ':' in line:
                    key, value = line.split(':', 1)
                    data[current_section][key.strip()] = value.strip()
                else:
                    # Append to the last key if no colon is found
                    if data[current_section]:
                        last_key = list(data[current_section].keys())[-1]
                        data[current_section][last_key] += " " + line.strip()
        return data
    
    def retrieve_relevant_knowledge(self, query, top_k=3):
        query_embedding = self.sbert_model.encode([query])[0]
        knowledge_embeddings = self.sbert_model.encode(self.knowledge_base)
        
        similarities = cosine_similarity([query_embedding], knowledge_embeddings)[0]
        top_indices = np.argsort(similarities)[-top_k:][::-1]
        
        return [self.knowledge_base[i] for i in top_indices]
    
    def generate_section_content(self, section_title, input_data, max_length=1000):
        query = f"Generate content for the '{section_title}' section of a carbon credit document."
        relevant_knowledge = self.retrieve_relevant_knowledge(query)
        
        section_data = input_data.get(section_title, input_data)
        context = f"Input data: {section_data}\n\nRelevant knowledge: {' '.join(relevant_knowledge)}"
        prompt = f"{context}\n\nTask: {query}\n\nContent:"
        
        generated_text = self.nlg_pipeline(prompt, max_length=max_length, num_return_sequences=1)[0]['generated_text']
        
        corrected_text = self.apply_corrective_rag(generated_text, section_data, relevant_knowledge)
        
        return corrected_text
    
    def apply_corrective_rag(self, generated_text, input_data, relevant_knowledge):
        corrected_text = generated_text
        
        for key, value in input_data.items():
            if isinstance(value, dict):
                for sub_key, sub_value in value.items():
                    if sub_value.lower() not in corrected_text.lower():
                        corrected_text += f" {sub_key}: {sub_value}."
            elif value.lower() not in corrected_text.lower():
                corrected_text += f" {key}: {value}."
        
        for knowledge in relevant_knowledge:
            if knowledge.lower() not in corrected_text.lower():
                corrected_text += f" {knowledge}"
        
        return corrected_text
    
    def create_document(self, input_text):
        doc = Document()
        doc.add_heading('Carbon Credit Project Document', 0)
        
        input_data = self.process_input_data(input_text)
        
        sections = [
            "Project Overview",
            "Seller/Proponent Information",
            "Carbon Credit Specifications",
            "Financial & Pricing Information",
            "Project Impact and Sustainability",
            "Risks & Mitigation Strategies",
            "Supporting Documentation",
            "Declarations and Acknowledgements"
        ]
        
        for section in sections:
            doc.add_heading(section, level=1)
            content = self.generate_section_content(section, input_data)
            doc.add_paragraph(content)
        
        return doc
    
    def generate_document(self, input_text):
        doc = self.create_document(input_text)
        doc_io = io.BytesIO()
        doc.save(doc_io)
        doc_io.seek(0)
        return doc_io

# Streamlit app
def main():
    st.set_page_config(page_title="Carbon Credit Document Generator", page_icon="๐ŸŒฟ")
    st.title("Carbon Credit Document Generator")

    st.markdown("""
    This app generates a comprehensive Carbon Credit Project Document based on your input.
    Upload a text file or paste your project details below.
    """)

    input_method = st.radio("Choose input method:", ("Upload File", "Paste Text"))

    if input_method == "Upload File":
        uploaded_file = st.file_uploader("Choose a text file", type="txt")
        if uploaded_file is not None:
            input_text = uploaded_file.read().decode("utf-8")
            st.text_area("File Contents (Read-only)", input_text, height=300, disabled=True)
    else:
        input_text = st.text_area("Paste your project details here:", height=400, help="Enter your project details in a structured format, similar to the Carbon Credit Project Submission Form.")

    if st.button("Generate Document"):
        if not input_text:
            st.error("Please provide input data before generating the document.")
        else:
            try:
                generator = CarbonCreditDocGenerator()
                
                with st.spinner("Generating document... This may take a few moments."):
                    doc_io = generator.generate_document(input_text)

                st.success("Document generated successfully!")

                st.download_button(
                    label="๐Ÿ“ฅ Download Carbon Credit Document",
                    data=doc_io.getvalue(),
                    file_name="carbon_credit_document.docx",
                    mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
                )
                
                st.info("Your document is ready for download. Click the button above to save it.")
            except Exception as e:
                st.error(f"An error occurred while generating the document: {str(e)}")
                st.info("Please try again or contact support if the problem persists.")

    st.markdown("---")
    st.markdown("Developed by Carbon Connect")

if __name__ == "__main__":
    main()