File size: 8,330 Bytes
f90d327 82a8657 f90d327 8ce4fcc f90d327 1d95e91 8ce4fcc f90d327 3285eb4 f90d327 1d95e91 f90d327 82a8657 c49f91a 82a8657 c49f91a 38399ff c49f91a 87f6aa6 82a8657 f90d327 82a8657 f90d327 199272d b85d243 199272d 82a8657 f90d327 82a8657 199272d dabb465 f90d327 dabb465 f90d327 82a8657 dabb465 82a8657 c49f91a 82a8657 f90d327 82a8657 8ce4fcc 82a8657 f90d327 82a8657 f90d327 82a8657 833e1ba 82a8657 833e1ba 82a8657 8ce4fcc 87f6aa6 82a8657 8ce4fcc 82a8657 8ce4fcc 82a8657 8ce4fcc 82a8657 d372b37 82a8657 833e1ba 505cf15 833e1ba 82a8657 8ce4fcc 82a8657 f90d327 82a8657 505cf15 82a8657 505cf15 82a8657 1d95e91 82a8657 833e1ba 82a8657 833e1ba 82a8657 8ce4fcc f90d327 8ce4fcc f90d327 82a8657 f90d327 82a8657 c49f91a 82a8657 c49f91a f90d327 82a8657 f90d327 82a8657 f90d327 82a8657 f90d327 82a8657 f90d327 8ce4fcc f90d327 82a8657 f90d327 82a8657 f90d327 8877a66 f90d327 82a8657 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
import os
from paddleocr import PaddleOCR
from PIL import Image
import gradio as gr
import re
from simple_salesforce import Salesforce
import pandas as pd
# Attribute mappings: readable names to Salesforce API names
ATTRIBUTE_MAPPING = {
"Name": "Patient_Name__c",
"Age": "Age__c",
"Gender": "Gender__c",
"Phone Number": "Phone_Number__c"
}
# Desired order of attributes for display
ATTRIBUTE_ORDER = ["Name", "Age", "Gender", "Phone Number"]
# Mapping for Gender__c picklist values
GENDER_MAPPING = {
"Male": "Male",
"Female": "Female",
"Other": "Others"
}
# Salesforce credentials from environment variables
SALESFORCE_USERNAME = os.getenv("SALESFORCE_USERNAME")
SALESFORCE_PASSWORD = os.getenv("SALESFORCE_PASSWORD")
SALESFORCE_SECURITY_TOKEN = os.getenv("SALESFORCE_SECURITY_TOKEN")
# Log the credentials being used (for debugging)
print(f"Using Salesforce credentials - Username: {SALESFORCE_USERNAME}")
print(f"Password set: {'Yes' if SALESFORCE_PASSWORD else 'No'}")
print(f"Security token set: {'Yes' if SALESFORCE_SECURITY_TOKEN else 'No'}")
# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')
# Function to extract text from an image using PaddleOCR
def extract_text(image):
result = ocr.ocr(image)
extracted_text = []
for line in result[0]:
extracted_text.append(line[1][0])
return "\n".join(extracted_text)
# Function to clean extracted text
def clean_extracted_text(text):
# Replace carriage returns and normalize newlines
text = text.replace('\r\n', '\n').replace('\r', '\n')
# Split into lines, clean each line, then join back
lines = text.split('\n')
cleaned_lines = [re.sub(r'\s+', ' ', line.strip()) for line in lines]
return '\n'.join(cleaned_lines)
# Function to extract attributes using regex
def extract_attributes(extracted_text):
attributes = {}
# Clean the extracted text
cleaned_text = clean_extracted_text(extracted_text)
print(f"Raw extracted text: '{extracted_text}'")
print(f"Cleaned extracted text: '{cleaned_text}'")
# Patterns for extracting personal information (simplified for line-by-line matching)
patterns = {
"Name": r"Name\s*[:\-]?\s*([\w\s\-\.\',]+)",
"Age": r"Age\s*[:\-]?\s*(\d{1,3})",
"Gender": r"Gender\s*[:\-]?\s*(Male|Female|Other)",
"Phone Number": r"(?:(?:Phone Number)|Phone|Mobile|Phonenumber)\s*[:\-]?\s*(?:\+91)?([6-9]\d{9})"
}
# Process each line separately
lines = cleaned_text.split('\n')
for line in lines:
for readable_attr, pattern in patterns.items():
match = re.search(pattern, line, re.IGNORECASE)
if match:
attributes[readable_attr] = match.group(1).strip()
print(f"Extracted {readable_attr}: '{attributes[readable_attr]}' from line: '{line}'")
break # Move to the next line once a match is found
if "Gender" in attributes:
attributes["Gender"] = GENDER_MAPPING.get(attributes["Gender"], attributes["Gender"])
return attributes
# Function to filter attributes for valid Salesforce fields
def filter_valid_attributes(attributes, valid_fields):
filtered = {ATTRIBUTE_MAPPING[key]: value for key, value in attributes.items() if ATTRIBUTE_MAPPING[key] in valid_fields}
return filtered
# Function to create a record in Salesforce
def interact_with_salesforce(attributes):
try:
# Validate that credentials are not empty
if not all([SALESFORCE_USERNAME, SALESFORCE_PASSWORD, SALESFORCE_SECURITY_TOKEN]):
raise ValueError("One or more Salesforce credentials are missing. Check environment variables.")
# Initialize Salesforce connection
sf = Salesforce(
username=SALESFORCE_USERNAME,
password=SALESFORCE_PASSWORD,
security_token=SALESFORCE_SECURITY_TOKEN,
domain="login",
version="60.0"
)
print(f"Successfully connected to Salesforce as {SALESFORCE_USERNAME}")
# Reference the Patient_Registration__c object
object_name = "Patient_Registration__c"
sf_object = sf.__getattr__(object_name)
# Get the object's schema to validate fields
schema = sf_object.describe()
valid_fields = {field["name"] for field in schema["fields"]}
print(f"Valid fields for {object_name}: {valid_fields}")
# Check field permissions and picklist values for Gender__c
field_details = {field["name"]: {
"createable": field["createable"],
"required": not field["nillable"] and not field["defaultedOnCreate"],
"picklist_values": [val["value"] for val in field.get("picklistValues", [])] if field.get("picklistValues") else None
} for field in schema["fields"]}
print(f"Field details: {field_details}")
# Filter attributes to match valid Salesforce fields
filtered_attributes = filter_valid_attributes(attributes, valid_fields)
# Ensure Patient_Name__c is provided (likely required)
if "Patient_Name__c" not in filtered_attributes or not filtered_attributes["Patient_Name__c"]:
raise ValueError("Patient_Name__c is required but was not provided.")
# Log the attributes being sent for debugging
print(f"Attributes being sent to Salesforce: {filtered_attributes}")
# Ensure Age__c is a number
if "Age__c" in filtered_attributes:
filtered_attributes["Age__c"] = int(filtered_attributes["Age__c"])
# Validate Gender__c against picklist values
if "Gender__c" in filtered_attributes:
gender_values = field_details.get("Gender__c", {}).get("picklist_values", [])
if gender_values and filtered_attributes["Gender__c"] not in gender_values:
raise ValueError(f"Invalid value for Gender__c: '{filtered_attributes['Gender__c']}'. Allowed values: {gender_values}")
# Create the record
result = sf_object.create(filtered_attributes)
return f"β
Successfully created Patient Registration record with ID: {result['id']}."
except Exception as e:
return f"β Error interacting with Salesforce: {str(e)}"
# Function to process the image and extract attributes
def process_image(image):
extracted_text = extract_text(image)
if not extracted_text:
return "No text detected in the image.", None, None
attributes = extract_attributes(extracted_text)
# Ensure all attributes are present, even if empty, in the desired order
ordered_attributes = {attr: attributes.get(attr, "") for attr in ATTRIBUTE_ORDER}
# Convert attributes to DataFrame for display
df = pd.DataFrame(list(ordered_attributes.items()), columns=["Attribute", "Value"])
return f"Extracted Text:\n{extracted_text}", df, None
# Function to handle edited attributes and export to Salesforce
def export_to_salesforce(edited_df):
try:
# Convert edited DataFrame back to dictionary
edited_attributes = dict(zip(edited_df["Attribute"], edited_df["Value"]))
# Export to Salesforce
message = interact_with_salesforce(edited_attributes)
return message
except Exception as e:
return f"β Error exporting to Salesforce: {str(e)}"
# Gradio Interface
def app():
with gr.Blocks() as demo:
with gr.Tab("π₯ OCR Processing"):
with gr.Row():
image_input = gr.Image(type="numpy", label="π Upload Image")
extract_button = gr.Button("Extract Text and Attributes")
extracted_text_output = gr.Text(label="π Extracted Image Data")
editable_df_output = gr.Dataframe(label="βοΈ Edit Attributes (Key-Value Pairs)", interactive=True)
ok_button = gr.Button("OK")
result_output = gr.Text(label="π Result")
# Define button actions
extract_button.click(
fn=process_image,
inputs=[image_input],
outputs=[extracted_text_output, editable_df_output, result_output]
)
ok_button.click(
fn=export_to_salesforce,
inputs=[editable_df_output],
outputs=[result_output]
)
return demo
if __name__ == "__main__":
app().launch(share=True) |