File size: 2,452 Bytes
5806e12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04360c2
 
 
 
 
5806e12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import tempfile
import json
from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename

from transcription import translate_audio_file

from segmentation import segment_batchalign
from segmentation import reorganize_transcription_c_unit
from annotation import annotate_maze_for_mazewhisper
from morpheme import stanza_v1
from morpheme import annotate_morpheme



app = Flask(__name__)

from huggingface_hub import login

HF_TOKEN = os.environ.get("HF_TOKEN")
if HF_TOKEN:
    login(token=HF_TOKEN)

@app.route('/process', methods=['POST'])
def process_audio():
    if 'audio_file' not in request.files:
        return jsonify({'error': 'Missing audio file '}), 400
    audio_file = request.files['audio_file']
    filename = secure_filename(audio_file.filename)
    
    suffix = os.path.splitext(filename)[1] or '.wav'
    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
        audio_path = tmp.name
        audio_file.save(audio_path)

    device = request.form.get('device', 'cuda')
    pause_threshold = float(request.form.get('pause_threshold', 0.5))
    num_speakers = int(request.form.get('num_speakers', 2))

    app.logger.info(f"Processing uploaded audio: {audio_path}")

    result, session_id = translate_audio_file(model="mazeWhisper", audio_path = audio_path, device=device, original_filename=filename)

    cunit_count, ignored_count = reorganize_transcription_c_unit(session_id, segment_batchalign)
    print(f"Created {cunit_count} C-units, ignored {ignored_count} boundaries")

    annotate_maze_for_mazewhisper(session_id)

    annotate_morpheme(session_id = session_id, morpheme_function = stanza_v1)


    # annotate_pauses(session_id, pause_threshold)
    # annotate_repetitions(session_id)
    # # annotate_syllables(session_id)
    # annotate_fillerwords(session_id)
    # # annotate_transcript(session_id)
    # annotate_morpheme(session_id)
    # annotate_morpheme_omission(session_id)

    

    json_path = f"session_data/{session_id}/transcription_cunit.json"
    if not os.path.isfile(json_path):
        return jsonify({'error': f"Annotation file {json_path} not found"}), 500

    with open(json_path, 'r', encoding='utf-8') as f:
        transcription = json.load(f)

    try:
        os.remove(audio_path)
    except OSError:
        pass

    return jsonify(transcription), 200


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860, debug=True)