import argparse import logging import multiprocessing import os import sys import gentle parser = argparse.ArgumentParser( description='Align a transcript to audio by generating a new language model. Outputs JSON') parser.add_argument( '--nthreads', default=multiprocessing.cpu_count(), type=int, help='number of alignment threads') parser.add_argument( '-o', '--output', metavar='output', type=str, help='output filename') parser.add_argument( '--conservative', dest='conservative', action='store_true', help='conservative alignment') parser.set_defaults(conservative=False) parser.add_argument( '--disfluency', dest='disfluency', action='store_true', help='include disfluencies (uh, um) in alignment') parser.set_defaults(disfluency=False) parser.add_argument( '--log', default="INFO", help='the log level (DEBUG, INFO, WARNING, ERROR, or CRITICAL)') parser.add_argument( 'audiofile', type=str, help='audio file') parser.add_argument( 'txtfile', type=str, help='transcript text file') args = parser.parse_args() log_level = args.log.upper() logging.getLogger().setLevel(log_level) disfluencies = set(['uh', 'um']) def on_progress(p): for k,v in p.items(): logging.debug("%s: %s" % (k, v)) with open(args.txtfile, encoding="utf-8") as fh: transcript = fh.read() resources = gentle.Resources() logging.info("converting audio to 8K sampled wav") with gentle.resampled(args.audiofile) as wavfile: logging.info("starting alignment") aligner = gentle.ForcedAligner(resources, transcript, nthreads=args.nthreads, disfluency=args.disfluency, conservative=args.conservative, disfluencies=disfluencies) result = aligner.transcribe(wavfile, progress_cb=on_progress, logging=logging) fh = open(args.output, 'w', encoding="utf-8") if args.output else sys.stdout fh.write(result.to_json(indent=2)) if args.output: logging.info("output written to %s" % (args.output))