luck210 commited on
Commit
ae6c842
·
verified ·
1 Parent(s): 66ea69c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -21
app.py CHANGED
@@ -1,25 +1,44 @@
1
- from fastapi import FastAPI
2
- from transformers import pipeline
3
-
4
- ## create a new FASTAPI app instance
5
- app=FastAPI()
6
-
7
- # Initialize the text generation pipeline
8
- pipe = pipeline("text2text-generation", model="google/flan-t5-small")
9
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  @app.get("/")
12
  def home():
13
- return {"message":"Hello World"}
14
-
15
- # Define a function to handle the GET request at `/generate`
16
-
17
-
18
- @app.get("/generate")
19
- def generate(text:str):
20
- ## use the pipeline to generate text from given input text
21
- output=pipe(text)
22
-
23
- ## return the generate text in Json reposne
24
- return {"output":output[0]['generated_text']}
25
 
 
1
+ from fastapi import FastAPI, UploadFile, File
2
+ from transformers import MarianMTModel, MarianTokenizer
3
+ import PyPDF2
4
+ import docx
5
+ import io
6
+
7
+ app = FastAPI()
8
+
9
+ # Charger le modèle MarianMT pour la traduction (ex: anglais → français)
10
+ MODEL_NAME = "Helsinki-NLP/opus-mt-en-fr"
11
+ tokenizer = MarianTokenizer.from_pretrained(MODEL_NAME)
12
+ model = MarianMTModel.from_pretrained(MODEL_NAME)
13
+
14
+ def translate_text(text, src_lang="en", tgt_lang="fr"):
15
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
16
+ translated = model.generate(**inputs)
17
+ return tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
18
+
19
+ def extract_text_from_pdf(pdf_file):
20
+ reader = PyPDF2.PdfReader(pdf_file)
21
+ text = " ".join(page.extract_text() for page in reader.pages if page.extract_text())
22
+ return text
23
+
24
+ def extract_text_from_docx(docx_file):
25
+ doc = docx.Document(docx_file)
26
+ text = " ".join([p.text for p in doc.paragraphs])
27
+ return text
28
+
29
+ @app.post("/translate/")
30
+ async def translate_file(file: UploadFile = File(...), src_lang: str = "en", tgt_lang: str = "fr"):
31
+ if file.filename.endswith(".pdf"):
32
+ text = extract_text_from_pdf(io.BytesIO(await file.read()))
33
+ elif file.filename.endswith(".docx"):
34
+ text = extract_text_from_docx(io.BytesIO(await file.read()))
35
+ else:
36
+ return {"error": "Format non supporté. Utilise PDF ou DOCX."}
37
+
38
+ translated_text = translate_text(text, src_lang, tgt_lang)
39
+ return {"translated_text": translated_text}
40
 
41
  @app.get("/")
42
  def home():
43
+ return {"message": "Bienvenue sur l'API de traduction de fichiers !"}
 
 
 
 
 
 
 
 
 
 
 
44