chatbot / app.py
Pranay25's picture
Update app.py
dabb465 verified
import os
from paddleocr import PaddleOCR
from PIL import Image
import gradio as gr
import re
from simple_salesforce import Salesforce
import pandas as pd
# Attribute mappings: readable names to Salesforce API names
ATTRIBUTE_MAPPING = {
"Name": "Patient_Name__c",
"Age": "Age__c",
"Gender": "Gender__c",
"Phone Number": "Phone_Number__c"
}
# Desired order of attributes for display
ATTRIBUTE_ORDER = ["Name", "Age", "Gender", "Phone Number"]
# Mapping for Gender__c picklist values
GENDER_MAPPING = {
"Male": "Male",
"Female": "Female",
"Other": "Others"
}
# Salesforce credentials from environment variables
SALESFORCE_USERNAME = os.getenv("SALESFORCE_USERNAME")
SALESFORCE_PASSWORD = os.getenv("SALESFORCE_PASSWORD")
SALESFORCE_SECURITY_TOKEN = os.getenv("SALESFORCE_SECURITY_TOKEN")
# Log the credentials being used (for debugging)
print(f"Using Salesforce credentials - Username: {SALESFORCE_USERNAME}")
print(f"Password set: {'Yes' if SALESFORCE_PASSWORD else 'No'}")
print(f"Security token set: {'Yes' if SALESFORCE_SECURITY_TOKEN else 'No'}")
# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')
# Function to extract text from an image using PaddleOCR
def extract_text(image):
result = ocr.ocr(image)
extracted_text = []
for line in result[0]:
extracted_text.append(line[1][0])
return "\n".join(extracted_text)
# Function to clean extracted text
def clean_extracted_text(text):
# Replace carriage returns and normalize newlines
text = text.replace('\r\n', '\n').replace('\r', '\n')
# Split into lines, clean each line, then join back
lines = text.split('\n')
cleaned_lines = [re.sub(r'\s+', ' ', line.strip()) for line in lines]
return '\n'.join(cleaned_lines)
# Function to extract attributes using regex
def extract_attributes(extracted_text):
attributes = {}
# Clean the extracted text
cleaned_text = clean_extracted_text(extracted_text)
print(f"Raw extracted text: '{extracted_text}'")
print(f"Cleaned extracted text: '{cleaned_text}'")
# Patterns for extracting personal information (simplified for line-by-line matching)
patterns = {
"Name": r"Name\s*[:\-]?\s*([\w\s\-\.\',]+)",
"Age": r"Age\s*[:\-]?\s*(\d{1,3})",
"Gender": r"Gender\s*[:\-]?\s*(Male|Female|Other)",
"Phone Number": r"(?:(?:Phone Number)|Phone|Mobile|Phonenumber)\s*[:\-]?\s*(?:\+91)?([6-9]\d{9})"
}
# Process each line separately
lines = cleaned_text.split('\n')
for line in lines:
for readable_attr, pattern in patterns.items():
match = re.search(pattern, line, re.IGNORECASE)
if match:
attributes[readable_attr] = match.group(1).strip()
print(f"Extracted {readable_attr}: '{attributes[readable_attr]}' from line: '{line}'")
break # Move to the next line once a match is found
if "Gender" in attributes:
attributes["Gender"] = GENDER_MAPPING.get(attributes["Gender"], attributes["Gender"])
return attributes
# Function to filter attributes for valid Salesforce fields
def filter_valid_attributes(attributes, valid_fields):
filtered = {ATTRIBUTE_MAPPING[key]: value for key, value in attributes.items() if ATTRIBUTE_MAPPING[key] in valid_fields}
return filtered
# Function to create a record in Salesforce
def interact_with_salesforce(attributes):
try:
# Validate that credentials are not empty
if not all([SALESFORCE_USERNAME, SALESFORCE_PASSWORD, SALESFORCE_SECURITY_TOKEN]):
raise ValueError("One or more Salesforce credentials are missing. Check environment variables.")
# Initialize Salesforce connection
sf = Salesforce(
username=SALESFORCE_USERNAME,
password=SALESFORCE_PASSWORD,
security_token=SALESFORCE_SECURITY_TOKEN,
domain="login",
version="60.0"
)
print(f"Successfully connected to Salesforce as {SALESFORCE_USERNAME}")
# Reference the Patient_Registration__c object
object_name = "Patient_Registration__c"
sf_object = sf.__getattr__(object_name)
# Get the object's schema to validate fields
schema = sf_object.describe()
valid_fields = {field["name"] for field in schema["fields"]}
print(f"Valid fields for {object_name}: {valid_fields}")
# Check field permissions and picklist values for Gender__c
field_details = {field["name"]: {
"createable": field["createable"],
"required": not field["nillable"] and not field["defaultedOnCreate"],
"picklist_values": [val["value"] for val in field.get("picklistValues", [])] if field.get("picklistValues") else None
} for field in schema["fields"]}
print(f"Field details: {field_details}")
# Filter attributes to match valid Salesforce fields
filtered_attributes = filter_valid_attributes(attributes, valid_fields)
# Ensure Patient_Name__c is provided (likely required)
if "Patient_Name__c" not in filtered_attributes or not filtered_attributes["Patient_Name__c"]:
raise ValueError("Patient_Name__c is required but was not provided.")
# Log the attributes being sent for debugging
print(f"Attributes being sent to Salesforce: {filtered_attributes}")
# Ensure Age__c is a number
if "Age__c" in filtered_attributes:
filtered_attributes["Age__c"] = int(filtered_attributes["Age__c"])
# Validate Gender__c against picklist values
if "Gender__c" in filtered_attributes:
gender_values = field_details.get("Gender__c", {}).get("picklist_values", [])
if gender_values and filtered_attributes["Gender__c"] not in gender_values:
raise ValueError(f"Invalid value for Gender__c: '{filtered_attributes['Gender__c']}'. Allowed values: {gender_values}")
# Create the record
result = sf_object.create(filtered_attributes)
return f"βœ… Successfully created Patient Registration record with ID: {result['id']}."
except Exception as e:
return f"❌ Error interacting with Salesforce: {str(e)}"
# Function to process the image and extract attributes
def process_image(image):
extracted_text = extract_text(image)
if not extracted_text:
return "No text detected in the image.", None, None
attributes = extract_attributes(extracted_text)
# Ensure all attributes are present, even if empty, in the desired order
ordered_attributes = {attr: attributes.get(attr, "") for attr in ATTRIBUTE_ORDER}
# Convert attributes to DataFrame for display
df = pd.DataFrame(list(ordered_attributes.items()), columns=["Attribute", "Value"])
return f"Extracted Text:\n{extracted_text}", df, None
# Function to handle edited attributes and export to Salesforce
def export_to_salesforce(edited_df):
try:
# Convert edited DataFrame back to dictionary
edited_attributes = dict(zip(edited_df["Attribute"], edited_df["Value"]))
# Export to Salesforce
message = interact_with_salesforce(edited_attributes)
return message
except Exception as e:
return f"❌ Error exporting to Salesforce: {str(e)}"
# Gradio Interface
def app():
with gr.Blocks() as demo:
with gr.Tab("πŸ“₯ OCR Processing"):
with gr.Row():
image_input = gr.Image(type="numpy", label="πŸ“„ Upload Image")
extract_button = gr.Button("Extract Text and Attributes")
extracted_text_output = gr.Text(label="πŸ“ Extracted Image Data")
editable_df_output = gr.Dataframe(label="✏️ Edit Attributes (Key-Value Pairs)", interactive=True)
ok_button = gr.Button("OK")
result_output = gr.Text(label="πŸš€ Result")
# Define button actions
extract_button.click(
fn=process_image,
inputs=[image_input],
outputs=[extracted_text_output, editable_df_output, result_output]
)
ok_button.click(
fn=export_to_salesforce,
inputs=[editable_df_output],
outputs=[result_output]
)
return demo
if __name__ == "__main__":
app().launch(share=True)