import os import tempfile import json from flask import Flask, request, jsonify from werkzeug.utils import secure_filename from transcription import translate_audio_file from segmentation import segment_batchalign from segmentation import reorganize_transcription_c_unit from annotation import annotate_maze_for_mazewhisper from morpheme import stanza_v1 from morpheme import annotate_morpheme app = Flask(__name__) from huggingface_hub import login HF_TOKEN = os.environ.get("HF_TOKEN") if HF_TOKEN: login(token=HF_TOKEN) @app.route('/process', methods=['POST']) def process_audio(): if 'audio_file' not in request.files: return jsonify({'error': 'Missing audio file '}), 400 audio_file = request.files['audio_file'] filename = secure_filename(audio_file.filename) suffix = os.path.splitext(filename)[1] or '.wav' with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: audio_path = tmp.name audio_file.save(audio_path) device = request.form.get('device', 'cuda') pause_threshold = float(request.form.get('pause_threshold', 0.5)) num_speakers = int(request.form.get('num_speakers', 2)) app.logger.info(f"Processing uploaded audio: {audio_path}") result, session_id = translate_audio_file(model="mazeWhisper", audio_path = audio_path, device=device, original_filename=filename) cunit_count, ignored_count = reorganize_transcription_c_unit(session_id, segment_batchalign) print(f"Created {cunit_count} C-units, ignored {ignored_count} boundaries") annotate_maze_for_mazewhisper(session_id) annotate_morpheme(session_id = session_id, morpheme_function = stanza_v1) # annotate_pauses(session_id, pause_threshold) # annotate_repetitions(session_id) # # annotate_syllables(session_id) # annotate_fillerwords(session_id) # # annotate_transcript(session_id) # annotate_morpheme(session_id) # annotate_morpheme_omission(session_id) json_path = f"session_data/{session_id}/transcription_cunit.json" if not os.path.isfile(json_path): return jsonify({'error': f"Annotation file {json_path} not found"}), 500 with open(json_path, 'r', encoding='utf-8') as f: transcription = json.load(f) try: os.remove(audio_path) except OSError: pass return jsonify(transcription), 200 if __name__ == "__main__": app.run(host="0.0.0.0", port=7860, debug=True)