File size: 2,452 Bytes
5806e12 04360c2 5806e12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import os
import tempfile
import json
from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
from transcription import translate_audio_file
from segmentation import segment_batchalign
from segmentation import reorganize_transcription_c_unit
from annotation import annotate_maze_for_mazewhisper
from morpheme import stanza_v1
from morpheme import annotate_morpheme
app = Flask(__name__)
from huggingface_hub import login
HF_TOKEN = os.environ.get("HF_TOKEN")
if HF_TOKEN:
login(token=HF_TOKEN)
@app.route('/process', methods=['POST'])
def process_audio():
if 'audio_file' not in request.files:
return jsonify({'error': 'Missing audio file '}), 400
audio_file = request.files['audio_file']
filename = secure_filename(audio_file.filename)
suffix = os.path.splitext(filename)[1] or '.wav'
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
audio_path = tmp.name
audio_file.save(audio_path)
device = request.form.get('device', 'cuda')
pause_threshold = float(request.form.get('pause_threshold', 0.5))
num_speakers = int(request.form.get('num_speakers', 2))
app.logger.info(f"Processing uploaded audio: {audio_path}")
result, session_id = translate_audio_file(model="mazeWhisper", audio_path = audio_path, device=device, original_filename=filename)
cunit_count, ignored_count = reorganize_transcription_c_unit(session_id, segment_batchalign)
print(f"Created {cunit_count} C-units, ignored {ignored_count} boundaries")
annotate_maze_for_mazewhisper(session_id)
annotate_morpheme(session_id = session_id, morpheme_function = stanza_v1)
# annotate_pauses(session_id, pause_threshold)
# annotate_repetitions(session_id)
# # annotate_syllables(session_id)
# annotate_fillerwords(session_id)
# # annotate_transcript(session_id)
# annotate_morpheme(session_id)
# annotate_morpheme_omission(session_id)
json_path = f"session_data/{session_id}/transcription_cunit.json"
if not os.path.isfile(json_path):
return jsonify({'error': f"Annotation file {json_path} not found"}), 500
with open(json_path, 'r', encoding='utf-8') as f:
transcription = json.load(f)
try:
os.remove(audio_path)
except OSError:
pass
return jsonify(transcription), 200
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860, debug=True)
|