|
import os |
|
import tempfile |
|
import json |
|
from flask import Flask, request, jsonify |
|
from werkzeug.utils import secure_filename |
|
|
|
from transcription import translate_audio_file |
|
|
|
from segmentation import segment_batchalign |
|
from segmentation import reorganize_transcription_c_unit |
|
from annotation import annotate_maze_for_mazewhisper |
|
from morpheme import stanza_v1 |
|
from morpheme import annotate_morpheme |
|
|
|
|
|
|
|
app = Flask(__name__) |
|
|
|
from huggingface_hub import login |
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
if HF_TOKEN: |
|
login(token=HF_TOKEN) |
|
|
|
@app.route('/process', methods=['POST']) |
|
def process_audio(): |
|
if 'audio_file' not in request.files: |
|
return jsonify({'error': 'Missing audio file '}), 400 |
|
audio_file = request.files['audio_file'] |
|
filename = secure_filename(audio_file.filename) |
|
|
|
suffix = os.path.splitext(filename)[1] or '.wav' |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: |
|
audio_path = tmp.name |
|
audio_file.save(audio_path) |
|
|
|
device = request.form.get('device', 'cuda') |
|
pause_threshold = float(request.form.get('pause_threshold', 0.5)) |
|
num_speakers = int(request.form.get('num_speakers', 2)) |
|
|
|
app.logger.info(f"Processing uploaded audio: {audio_path}") |
|
|
|
result, session_id = translate_audio_file(model="mazeWhisper", audio_path = audio_path, device=device, original_filename=filename) |
|
|
|
cunit_count, ignored_count = reorganize_transcription_c_unit(session_id, segment_batchalign) |
|
print(f"Created {cunit_count} C-units, ignored {ignored_count} boundaries") |
|
|
|
annotate_maze_for_mazewhisper(session_id) |
|
|
|
annotate_morpheme(session_id = session_id, morpheme_function = stanza_v1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
json_path = f"session_data/{session_id}/transcription_cunit.json" |
|
if not os.path.isfile(json_path): |
|
return jsonify({'error': f"Annotation file {json_path} not found"}), 500 |
|
|
|
with open(json_path, 'r', encoding='utf-8') as f: |
|
transcription = json.load(f) |
|
|
|
try: |
|
os.remove(audio_path) |
|
except OSError: |
|
pass |
|
|
|
return jsonify(transcription), 200 |
|
|
|
|
|
if __name__ == "__main__": |
|
app.run(host="0.0.0.0", port=7860, debug=True) |
|
|