ddxorg commited on
Commit
e234587
·
1 Parent(s): b6bad66

add redact

Browse files
Files changed (3) hide show
  1. app.py +8 -2
  2. packages.txt +1 -0
  3. requirements.txt +2 -5
app.py CHANGED
@@ -5,7 +5,12 @@ import gradio as gr
5
  from mistralai import Mistral, TextChunk, ImageURLChunk
6
  from PIL import Image
7
  import requests
 
8
 
 
 
 
 
9
 
10
  def process_msg(client, target):
11
 
@@ -113,9 +118,10 @@ with gr.Blocks(
113
 
114
  submit_btn = gr.Button("Extract Text and Images")
115
 
116
- with gr.Column(scale=2):
 
117
  ocr_result = gr.Textbox(label="Annoymized Text", show_copy_button=True)
118
- cleaned_output = gr.Textbox(label="Extracted Plain Text", lines=10, show_copy_button=True)
119
 
120
  def update_visibility(choice):
121
  return gr.update(visible=(choice == "URL")), gr.update(visible=(choice == "Upload file"))
 
5
  from mistralai import Mistral, TextChunk, ImageURLChunk
6
  from PIL import Image
7
  import requests
8
+ from presidio_image_redactor import ImageRedactorEngine
9
 
10
+ def redact_imag(img):
11
+ redactor = ImageRedactorEngine()
12
+ redact_img = redactor.redact(image=img, entities=["PERSON", "LOCATION", "DATE_TIME", "PHONE_NUMBER","MEDICAL_LICENSE"])
13
+ return redact_img
14
 
15
  def process_msg(client, target):
16
 
 
118
 
119
  submit_btn = gr.Button("Extract Text and Images")
120
 
121
+ with gr.Column(scale=3):
122
+ image_output = gr.Gallery(label="Redacted Image", height="contain")
123
  ocr_result = gr.Textbox(label="Annoymized Text", show_copy_button=True)
124
+ cleaned_output = gr.Textbox(label="Extracted JSON", lines=10, show_copy_button=True)
125
 
126
  def update_visibility(choice):
127
  return gr.update(visible=(choice == "URL")), gr.update(visible=(choice == "Upload file"))
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ tesseract-ocr-all
requirements.txt CHANGED
@@ -1,6 +1,3 @@
1
  mistralai
2
- pillow
3
- fastai
4
- transformers
5
- paddlepaddle
6
- paddleocr
 
1
  mistralai
2
+ presidio-image-redactor
3
+ pytesseract