Sakshiw1 commited on
Commit
0dac856
·
verified ·
1 Parent(s): 729a749

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytesseract
2
+ from PIL import Image
3
+ import gradio as gr
4
+ import re
5
+
6
+ pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
7
+
8
+ def perform_ocr(image):
9
+ text = pytesseract.image_to_string(image, lang='hin+eng')
10
+ return text
11
+
12
+ def search_first_keyword_in_text(text, keyword):
13
+ if keyword:
14
+ text = text.replace('\n', ' ')
15
+ sentences = re.split(r'(?<=[.!?]) +', text)
16
+ for sentence in sentences:
17
+ if re.search(keyword, sentence, re.IGNORECASE):
18
+ highlighted_sentence = re.sub(f'({re.escape(keyword)})', r'<b>\1</b>', sentence, flags=re.IGNORECASE)
19
+ return highlighted_sentence.strip()
20
+ return "No matching sentence found."
21
+ else:
22
+ return "Please enter a keyword to search."
23
+
24
+ def ocr_and_search(image, keyword):
25
+ try:
26
+ extracted_text = perform_ocr(image)
27
+ search_result = search_first_keyword_in_text(extracted_text, keyword)
28
+ return extracted_text, search_result
29
+ except Exception as e:
30
+ return str(e), str(e)
31
+
32
+ def web_app():
33
+ interface = gr.Interface(
34
+ fn=ocr_and_search,
35
+ inputs=[
36
+ gr.Image(type="pil", label="Upload Image"),
37
+ gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search")
38
+ ],
39
+ outputs=[
40
+ gr.Textbox(label="Extracted Text", lines=10),
41
+ gr.HTML(label="Search Result (First Matching Sentence)")
42
+ ],
43
+ title="OCR and Keyword Search Application"
44
+ )
45
+ interface.launch()
46
+
47
+ if __name__ == "__main__":
48
+ web_app()