File size: 8,330 Bytes
f90d327
 
82a8657
f90d327
 
8ce4fcc
f90d327
1d95e91
8ce4fcc
f90d327
3285eb4
f90d327
 
1d95e91
f90d327
 
82a8657
c49f91a
 
82a8657
c49f91a
 
 
38399ff
c49f91a
 
87f6aa6
 
 
 
82a8657
 
 
 
 
 
f90d327
 
 
82a8657
f90d327
 
 
 
 
 
 
199272d
 
 
 
b85d243
 
 
 
199272d
82a8657
f90d327
 
82a8657
199272d
 
 
 
 
dabb465
f90d327
dabb465
 
 
 
f90d327
82a8657
dabb465
 
 
 
 
 
 
 
 
82a8657
c49f91a
 
82a8657
f90d327
 
82a8657
8ce4fcc
82a8657
 
f90d327
82a8657
f90d327
 
82a8657
833e1ba
82a8657
833e1ba
82a8657
8ce4fcc
 
 
 
87f6aa6
82a8657
8ce4fcc
82a8657
 
 
8ce4fcc
 
 
82a8657
8ce4fcc
 
82a8657
d372b37
82a8657
833e1ba
 
505cf15
833e1ba
 
82a8657
8ce4fcc
82a8657
f90d327
 
82a8657
505cf15
82a8657
 
 
 
505cf15
82a8657
1d95e91
 
 
82a8657
833e1ba
 
 
82a8657
833e1ba
82a8657
8ce4fcc
 
f90d327
 
8ce4fcc
f90d327
82a8657
f90d327
 
 
 
 
 
82a8657
 
c49f91a
82a8657
 
c49f91a
f90d327
 
82a8657
f90d327
 
82a8657
f90d327
82a8657
 
f90d327
 
82a8657
f90d327
8ce4fcc
f90d327
82a8657
f90d327
 
 
 
 
 
 
 
 
 
 
82a8657
f90d327
 
 
 
 
 
 
 
 
 
 
 
8877a66
f90d327
82a8657
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import os
from paddleocr import PaddleOCR
from PIL import Image
import gradio as gr
import re
from simple_salesforce import Salesforce
import pandas as pd

# Attribute mappings: readable names to Salesforce API names
ATTRIBUTE_MAPPING = {
    "Name": "Patient_Name__c",
    "Age": "Age__c",
    "Gender": "Gender__c",
    "Phone Number": "Phone_Number__c"
}

# Desired order of attributes for display
ATTRIBUTE_ORDER = ["Name", "Age", "Gender", "Phone Number"]

# Mapping for Gender__c picklist values
GENDER_MAPPING = {
    "Male": "Male",
    "Female": "Female",
    "Other": "Others"
}

# Salesforce credentials from environment variables
SALESFORCE_USERNAME = os.getenv("SALESFORCE_USERNAME")
SALESFORCE_PASSWORD = os.getenv("SALESFORCE_PASSWORD")
SALESFORCE_SECURITY_TOKEN = os.getenv("SALESFORCE_SECURITY_TOKEN")

# Log the credentials being used (for debugging)
print(f"Using Salesforce credentials - Username: {SALESFORCE_USERNAME}")
print(f"Password set: {'Yes' if SALESFORCE_PASSWORD else 'No'}")
print(f"Security token set: {'Yes' if SALESFORCE_SECURITY_TOKEN else 'No'}")

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')

# Function to extract text from an image using PaddleOCR
def extract_text(image):
    result = ocr.ocr(image)
    extracted_text = []
    for line in result[0]:
        extracted_text.append(line[1][0])
    return "\n".join(extracted_text)

# Function to clean extracted text
def clean_extracted_text(text):
    # Replace carriage returns and normalize newlines
    text = text.replace('\r\n', '\n').replace('\r', '\n')
    # Split into lines, clean each line, then join back
    lines = text.split('\n')
    cleaned_lines = [re.sub(r'\s+', ' ', line.strip()) for line in lines]
    return '\n'.join(cleaned_lines)

# Function to extract attributes using regex
def extract_attributes(extracted_text):
    attributes = {}

    # Clean the extracted text
    cleaned_text = clean_extracted_text(extracted_text)
    print(f"Raw extracted text: '{extracted_text}'")
    print(f"Cleaned extracted text: '{cleaned_text}'")

    # Patterns for extracting personal information (simplified for line-by-line matching)
    patterns = {
        "Name": r"Name\s*[:\-]?\s*([\w\s\-\.\',]+)",
        "Age": r"Age\s*[:\-]?\s*(\d{1,3})",
        "Gender": r"Gender\s*[:\-]?\s*(Male|Female|Other)",
        "Phone Number": r"(?:(?:Phone Number)|Phone|Mobile|Phonenumber)\s*[:\-]?\s*(?:\+91)?([6-9]\d{9})"
    }

    # Process each line separately
    lines = cleaned_text.split('\n')
    for line in lines:
        for readable_attr, pattern in patterns.items():
            match = re.search(pattern, line, re.IGNORECASE)
            if match:
                attributes[readable_attr] = match.group(1).strip()
                print(f"Extracted {readable_attr}: '{attributes[readable_attr]}' from line: '{line}'")
                break  # Move to the next line once a match is found

    if "Gender" in attributes:
        attributes["Gender"] = GENDER_MAPPING.get(attributes["Gender"], attributes["Gender"])

    return attributes

# Function to filter attributes for valid Salesforce fields
def filter_valid_attributes(attributes, valid_fields):
    filtered = {ATTRIBUTE_MAPPING[key]: value for key, value in attributes.items() if ATTRIBUTE_MAPPING[key] in valid_fields}
    return filtered

# Function to create a record in Salesforce
def interact_with_salesforce(attributes):
    try:
        # Validate that credentials are not empty
        if not all([SALESFORCE_USERNAME, SALESFORCE_PASSWORD, SALESFORCE_SECURITY_TOKEN]):
            raise ValueError("One or more Salesforce credentials are missing. Check environment variables.")

        # Initialize Salesforce connection
        sf = Salesforce(
            username=SALESFORCE_USERNAME,
            password=SALESFORCE_PASSWORD,
            security_token=SALESFORCE_SECURITY_TOKEN,
            domain="login",
            version="60.0"
        )
        print(f"Successfully connected to Salesforce as {SALESFORCE_USERNAME}")

        # Reference the Patient_Registration__c object
        object_name = "Patient_Registration__c"
        sf_object = sf.__getattr__(object_name)

        # Get the object's schema to validate fields
        schema = sf_object.describe()
        valid_fields = {field["name"] for field in schema["fields"]}
        print(f"Valid fields for {object_name}: {valid_fields}")

        # Check field permissions and picklist values for Gender__c
        field_details = {field["name"]: {
            "createable": field["createable"],
            "required": not field["nillable"] and not field["defaultedOnCreate"],
            "picklist_values": [val["value"] for val in field.get("picklistValues", [])] if field.get("picklistValues") else None
        } for field in schema["fields"]}
        print(f"Field details: {field_details}")

        # Filter attributes to match valid Salesforce fields
        filtered_attributes = filter_valid_attributes(attributes, valid_fields)

        # Ensure Patient_Name__c is provided (likely required)
        if "Patient_Name__c" not in filtered_attributes or not filtered_attributes["Patient_Name__c"]:
            raise ValueError("Patient_Name__c is required but was not provided.")

        # Log the attributes being sent for debugging
        print(f"Attributes being sent to Salesforce: {filtered_attributes}")

        # Ensure Age__c is a number
        if "Age__c" in filtered_attributes:
            filtered_attributes["Age__c"] = int(filtered_attributes["Age__c"])

        # Validate Gender__c against picklist values
        if "Gender__c" in filtered_attributes:
            gender_values = field_details.get("Gender__c", {}).get("picklist_values", [])
            if gender_values and filtered_attributes["Gender__c"] not in gender_values:
                raise ValueError(f"Invalid value for Gender__c: '{filtered_attributes['Gender__c']}'. Allowed values: {gender_values}")

        # Create the record
        result = sf_object.create(filtered_attributes)
        return f"βœ… Successfully created Patient Registration record with ID: {result['id']}."

    except Exception as e:
        return f"❌ Error interacting with Salesforce: {str(e)}"

# Function to process the image and extract attributes
def process_image(image):
    extracted_text = extract_text(image)
    if not extracted_text:
        return "No text detected in the image.", None, None

    attributes = extract_attributes(extracted_text)

    # Ensure all attributes are present, even if empty, in the desired order
    ordered_attributes = {attr: attributes.get(attr, "") for attr in ATTRIBUTE_ORDER}

    # Convert attributes to DataFrame for display
    df = pd.DataFrame(list(ordered_attributes.items()), columns=["Attribute", "Value"])
    return f"Extracted Text:\n{extracted_text}", df, None

# Function to handle edited attributes and export to Salesforce
def export_to_salesforce(edited_df):
    try:
        # Convert edited DataFrame back to dictionary
        edited_attributes = dict(zip(edited_df["Attribute"], edited_df["Value"]))

        # Export to Salesforce
        message = interact_with_salesforce(edited_attributes)
        return message

    except Exception as e:
        return f"❌ Error exporting to Salesforce: {str(e)}"

# Gradio Interface
def app():
    with gr.Blocks() as demo:
        with gr.Tab("πŸ“₯ OCR Processing"):
            with gr.Row():
                image_input = gr.Image(type="numpy", label="πŸ“„ Upload Image")
            extract_button = gr.Button("Extract Text and Attributes")
            extracted_text_output = gr.Text(label="πŸ“ Extracted Image Data")
            editable_df_output = gr.Dataframe(label="✏️ Edit Attributes (Key-Value Pairs)", interactive=True)
            ok_button = gr.Button("OK")
            result_output = gr.Text(label="πŸš€ Result")

        # Define button actions
        extract_button.click(
            fn=process_image,
            inputs=[image_input],
            outputs=[extracted_text_output, editable_df_output, result_output]
        )
        ok_button.click(
            fn=export_to_salesforce,
            inputs=[editable_df_output],
            outputs=[result_output]
        )

    return demo

if __name__ == "__main__":
    app().launch(share=True)