Spaces:

Pranay25
/

chatbot

Sleeping

App Files Files Community

chatbot / app.py

Pranay25

Update app.py

dabb465 verified 3 months ago

raw

history blame contribute delete

8.33 kB

	import os
	from paddleocr import PaddleOCR
	from PIL import Image
	import gradio as gr
	import re
	from simple_salesforce import Salesforce
	import pandas as pd

	# Attribute mappings: readable names to Salesforce API names
	ATTRIBUTE_MAPPING = {
	"Name": "Patient_Name__c",
	"Age": "Age__c",
	"Gender": "Gender__c",
	"Phone Number": "Phone_Number__c"
	}

	# Desired order of attributes for display
	ATTRIBUTE_ORDER = ["Name", "Age", "Gender", "Phone Number"]

	# Mapping for Gender__c picklist values
	GENDER_MAPPING = {
	"Male": "Male",
	"Female": "Female",
	"Other": "Others"
	}

	# Salesforce credentials from environment variables
	SALESFORCE_USERNAME = os.getenv("SALESFORCE_USERNAME")
	SALESFORCE_PASSWORD = os.getenv("SALESFORCE_PASSWORD")
	SALESFORCE_SECURITY_TOKEN = os.getenv("SALESFORCE_SECURITY_TOKEN")

	# Log the credentials being used (for debugging)
	print(f"Using Salesforce credentials - Username: {SALESFORCE_USERNAME}")
	print(f"Password set: {'Yes' if SALESFORCE_PASSWORD else 'No'}")
	print(f"Security token set: {'Yes' if SALESFORCE_SECURITY_TOKEN else 'No'}")

	# Initialize PaddleOCR
	ocr = PaddleOCR(use_angle_cls=True, lang='en')

	# Function to extract text from an image using PaddleOCR
	def extract_text(image):
	result = ocr.ocr(image)
	extracted_text = []
	for line in result[0]:
	extracted_text.append(line[1][0])
	return "\n".join(extracted_text)

	# Function to clean extracted text
	def clean_extracted_text(text):
	# Replace carriage returns and normalize newlines
	text = text.replace('\r\n', '\n').replace('\r', '\n')
	# Split into lines, clean each line, then join back
	lines = text.split('\n')
	cleaned_lines = [re.sub(r'\s+', ' ', line.strip()) for line in lines]
	return '\n'.join(cleaned_lines)

	# Function to extract attributes using regex
	def extract_attributes(extracted_text):
	attributes = {}

	# Clean the extracted text
	cleaned_text = clean_extracted_text(extracted_text)
	print(f"Raw extracted text: '{extracted_text}'")
	print(f"Cleaned extracted text: '{cleaned_text}'")

	# Patterns for extracting personal information (simplified for line-by-line matching)
	patterns = {
	"Name": r"Name\s[:\-]?\s([\w\s\-\.\',]+)",
	"Age": r"Age\s[:\-]?\s(\d{1,3})",
	"Gender": r"Gender\s[:\-]?\s(Male\|Female\|Other)",
	"Phone Number": r"(?:(?:Phone Number)\|Phone\|Mobile\|Phonenumber)\s[:\-]?\s(?:\+91)?([6-9]\d{9})"
	}

	# Process each line separately
	lines = cleaned_text.split('\n')
	for line in lines:
	for readable_attr, pattern in patterns.items():
	match = re.search(pattern, line, re.IGNORECASE)
	if match:
	attributes[readable_attr] = match.group(1).strip()
	print(f"Extracted {readable_attr}: '{attributes[readable_attr]}' from line: '{line}'")
	break # Move to the next line once a match is found

	if "Gender" in attributes:
	attributes["Gender"] = GENDER_MAPPING.get(attributes["Gender"], attributes["Gender"])

	return attributes

	# Function to filter attributes for valid Salesforce fields
	def filter_valid_attributes(attributes, valid_fields):
	filtered = {ATTRIBUTE_MAPPING[key]: value for key, value in attributes.items() if ATTRIBUTE_MAPPING[key] in valid_fields}
	return filtered

	# Function to create a record in Salesforce
	def interact_with_salesforce(attributes):
	try:
	# Validate that credentials are not empty
	if not all([SALESFORCE_USERNAME, SALESFORCE_PASSWORD, SALESFORCE_SECURITY_TOKEN]):
	raise ValueError("One or more Salesforce credentials are missing. Check environment variables.")

	# Initialize Salesforce connection
	sf = Salesforce(
	username=SALESFORCE_USERNAME,
	password=SALESFORCE_PASSWORD,
	security_token=SALESFORCE_SECURITY_TOKEN,
	domain="login",
	version="60.0"
	)
	print(f"Successfully connected to Salesforce as {SALESFORCE_USERNAME}")

	# Reference the Patient_Registration__c object
	object_name = "Patient_Registration__c"
	sf_object = sf.__getattr__(object_name)

	# Get the object's schema to validate fields
	schema = sf_object.describe()
	valid_fields = {field["name"] for field in schema["fields"]}
	print(f"Valid fields for {object_name}: {valid_fields}")

	# Check field permissions and picklist values for Gender__c
	field_details = {field["name"]: {
	"createable": field["createable"],
	"required": not field["nillable"] and not field["defaultedOnCreate"],
	"picklist_values": [val["value"] for val in field.get("picklistValues", [])] if field.get("picklistValues") else None
	} for field in schema["fields"]}
	print(f"Field details: {field_details}")

	# Filter attributes to match valid Salesforce fields
	filtered_attributes = filter_valid_attributes(attributes, valid_fields)

	# Ensure Patient_Name__c is provided (likely required)
	if "Patient_Name__c" not in filtered_attributes or not filtered_attributes["Patient_Name__c"]:
	raise ValueError("Patient_Name__c is required but was not provided.")

	# Log the attributes being sent for debugging
	print(f"Attributes being sent to Salesforce: {filtered_attributes}")

	# Ensure Age__c is a number
	if "Age__c" in filtered_attributes:
	filtered_attributes["Age__c"] = int(filtered_attributes["Age__c"])

	# Validate Gender__c against picklist values
	if "Gender__c" in filtered_attributes:
	gender_values = field_details.get("Gender__c", {}).get("picklist_values", [])
	if gender_values and filtered_attributes["Gender__c"] not in gender_values:
	raise ValueError(f"Invalid value for Gender__c: '{filtered_attributes['Gender__c']}'. Allowed values: {gender_values}")

	# Create the record
	result = sf_object.create(filtered_attributes)
	return f"✅ Successfully created Patient Registration record with ID: {result['id']}."

	except Exception as e:
	return f"❌ Error interacting with Salesforce: {str(e)}"

	# Function to process the image and extract attributes
	def process_image(image):
	extracted_text = extract_text(image)
	if not extracted_text:
	return "No text detected in the image.", None, None

	attributes = extract_attributes(extracted_text)

	# Ensure all attributes are present, even if empty, in the desired order
	ordered_attributes = {attr: attributes.get(attr, "") for attr in ATTRIBUTE_ORDER}

	# Convert attributes to DataFrame for display
	df = pd.DataFrame(list(ordered_attributes.items()), columns=["Attribute", "Value"])
	return f"Extracted Text:\n{extracted_text}", df, None

	# Function to handle edited attributes and export to Salesforce
	def export_to_salesforce(edited_df):
	try:
	# Convert edited DataFrame back to dictionary
	edited_attributes = dict(zip(edited_df["Attribute"], edited_df["Value"]))

	# Export to Salesforce
	message = interact_with_salesforce(edited_attributes)
	return message

	except Exception as e:
	return f"❌ Error exporting to Salesforce: {str(e)}"

	# Gradio Interface
	def app():
	with gr.Blocks() as demo:
	with gr.Tab("📥 OCR Processing"):
	with gr.Row():
	image_input = gr.Image(type="numpy", label="📄 Upload Image")
	extract_button = gr.Button("Extract Text and Attributes")
	extracted_text_output = gr.Text(label="📝 Extracted Image Data")
	editable_df_output = gr.Dataframe(label="✏️ Edit Attributes (Key-Value Pairs)", interactive=True)
	ok_button = gr.Button("OK")
	result_output = gr.Text(label="🚀 Result")

	# Define button actions
	extract_button.click(
	fn=process_image,
	inputs=[image_input],
	outputs=[extracted_text_output, editable_df_output, result_output]
	)
	ok_button.click(
	fn=export_to_salesforce,
	inputs=[editable_df_output],
	outputs=[result_output]
	)

	return demo

	if __name__ == "__main__":
	app().launch(share=True)