ddxorg commited on
Commit
65f7fc3
·
1 Parent(s): 1db90b6

add redact

Browse files
Files changed (2) hide show
  1. app.py +3 -1
  2. requirements.txt +2 -1
app.py CHANGED
@@ -31,6 +31,8 @@ def process_msg(client, image):
31
  "image_url": f"data:image/jpeg;base64,{base_encode}"
32
  }
33
  )
 
 
34
 
35
  processed_output=ocr_response.pages[0].markdown
36
 
@@ -50,8 +52,8 @@ def process_msg(client, image):
50
  "and frequency (in medical short forms),"
51
  "dispense quantity,"
52
  "and number of refills"
53
- "prescriber name, phone number and ID."
54
  "return in json message only"
 
55
  )
56
  )
57
  ]
 
31
  "image_url": f"data:image/jpeg;base64,{base_encode}"
32
  }
33
  )
34
+ # in real model need to run Document AI first then redact after
35
+ # risk of AI seeing redacted info under black visual boxes
36
 
37
  processed_output=ocr_response.pages[0].markdown
38
 
 
52
  "and frequency (in medical short forms),"
53
  "dispense quantity,"
54
  "and number of refills"
 
55
  "return in json message only"
56
+ "run this query twice and only return consistent information"
57
  )
58
  )
59
  ]
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  mistralai
2
  presidio-image-redactor
3
- pytesseract
 
 
1
  mistralai
2
  presidio-image-redactor
3
+ pytesseract
4
+ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl