luck210 commited on
Commit
fa0996d
·
verified ·
1 Parent(s): 81b8811

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -32
app.py CHANGED
@@ -1,44 +1,52 @@
1
- from fastapi import FastAPI, UploadFile, File
2
- from transformers import MarianMTModel, MarianTokenizer
3
  import PyPDF2
4
  import docx
5
- import io
 
 
6
 
7
  app = FastAPI()
8
 
9
- # Charger le modèle MarianMT pour la traduction (ex: anglais français)
10
- MODEL_NAME = "Helsinki-NLP/opus-mt-en-fr"
11
- tokenizer = MarianTokenizer.from_pretrained(MODEL_NAME)
12
- model = MarianMTModel.from_pretrained(MODEL_NAME)
13
 
14
- def translate_text(text, src_lang="en", tgt_lang="fr"):
15
- inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
16
- translated = model.generate(**inputs)
17
- return tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
18
-
19
- def extract_text_from_pdf(pdf_file):
20
- reader = PyPDF2.PdfReader(pdf_file)
21
- text = " ".join(page.extract_text() for page in reader.pages if page.extract_text())
22
  return text
23
 
24
- def extract_text_from_docx(docx_file):
25
- doc = docx.Document(docx_file)
26
- text = " ".join([p.text for p in doc.paragraphs])
 
27
  return text
28
 
29
  @app.post("/translate/")
30
- async def translate_file(file: UploadFile = File(...), src_lang: str = "en", tgt_lang: str = "fr"):
31
- if file.filename.endswith(".pdf"):
32
- text = extract_text_from_pdf(io.BytesIO(await file.read()))
33
- elif file.filename.endswith(".docx"):
34
- text = extract_text_from_docx(io.BytesIO(await file.read()))
35
- else:
36
- return {"error": "Format non supporté. Utilise PDF ou DOCX."}
37
-
38
- translated_text = translate_text(text, src_lang, tgt_lang)
39
- return {"translated_text": translated_text}
40
-
41
- @app.get("/")
42
- def home():
43
- return {"message": "Bienvenue sur l'API de traduction de fichiers !"}
 
 
 
 
 
 
 
 
 
 
 
44
 
 
1
+ from fastapi import FastAPI, File, UploadFile
2
+ from transformers import pipeline
3
  import PyPDF2
4
  import docx
5
+ import os
6
+ import uvicorn
7
+ from io import BytesIO
8
 
9
  app = FastAPI()
10
 
11
+ # Charger le modèle de traduction depuis Hugging Face (Exemple : Anglais -> Français)
12
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr")
 
 
13
 
14
+ def extract_text_from_pdf(file: BytesIO) -> str:
15
+ """Extrait le texte d'un fichier PDF."""
16
+ reader = PyPDF2.PdfReader(file)
17
+ text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
 
 
 
 
18
  return text
19
 
20
+ def extract_text_from_docx(file: BytesIO) -> str:
21
+ """Extrait le texte d'un fichier DOCX."""
22
+ doc = docx.Document(file)
23
+ text = "\n".join([para.text for para in doc.paragraphs])
24
  return text
25
 
26
  @app.post("/translate/")
27
+ async def translate_file(file: UploadFile = File(...)):
28
+ """Endpoint pour traduire un fichier PDF ou DOCX."""
29
+ try:
30
+ contents = await file.read()
31
+ file_io = BytesIO(contents)
32
+ file_extension = file.filename.split(".")[-1]
33
+
34
+ if file_extension == "pdf":
35
+ text = extract_text_from_pdf(file_io)
36
+ elif file_extension == "docx":
37
+ text = extract_text_from_docx(file_io)
38
+ else:
39
+ return {"error": "Format non supporté. Utilisez PDF ou DOCX."}
40
+
41
+ # Traduire le texte
42
+ translation = translator(text, max_length=1000)
43
+ translated_text = " ".join([t["translation_text"] for t in translation])
44
+
45
+ return {"original_text": text[:500], "translated_text": translated_text[:500]} # Limite pour affichage
46
+ except Exception as e:
47
+ return {"error": str(e)}
48
+
49
+ if __name__ == "__main__":
50
+ port = int(os.environ.get("PORT", 7860))
51
+ uvicorn.run(app, host="0.0.0.0", port=port)
52