Aryan Wadhawan commited on
Commit
a8a95c8
·
1 Parent(s): acc8e3b
.history/app_20230718132721.py DELETED
File without changes
.history/app_20230718133117.py DELETED
@@ -1,24 +0,0 @@
1
- import gradio as gr
2
- from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
- import torch
4
- import phonemizer
5
- import librosa
6
-
7
- processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft")
8
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft")
9
-
10
- waveform, sample_rate = librosa.load('harvard.wav', sr=16000) # Downsample 44.1kHz to 8kHz
11
-
12
- input_values = processor(waveform, sampling_rate=sample_rate, return_tensors="pt").input_values
13
-
14
- with torch.no_grad():
15
- logits = model(input_values).logits
16
-
17
- predicted_ids = torch.argmax(logits, dim=-1)
18
- transcription = processor.batch_decode(predicted_ids)
19
-
20
- def showTranscription(transcription):
21
- return transcription
22
-
23
- iface = gr.Interface(fn=showTranscription, inputs="text", outputs="text")
24
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/app_20230718133128.py DELETED
@@ -1,24 +0,0 @@
1
- import gradio as gr
2
- from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
- import torch
4
- import phonemizer
5
- import librosa
6
-
7
- processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft")
8
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft")
9
-
10
- waveform, sample_rate = librosa.load('harvard.wav', sr=16000) # Downsample 44.1kHz to 8kHz
11
-
12
- input_values = processor(waveform, sampling_rate=sample_rate, return_tensors="pt").input_values
13
-
14
- with torch.no_grad():
15
- logits = model(input_values).logits
16
-
17
- predicted_ids = torch.argmax(logits, dim=-1)
18
- transcription = processor.batch_decode(predicted_ids)
19
-
20
- def showTranscription(transcription):
21
- return transcription
22
-
23
- iface = gr.Interface(fn=showTranscription, inputs="text", outputs="text")
24
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/app_20230718133340.py DELETED
@@ -1,32 +0,0 @@
1
- import gradio as gr
2
- from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
- import torch
4
- import phonemizer
5
- import librosa
6
- import base64
7
-
8
-
9
- processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft")
10
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft")
11
-
12
- waveform, sample_rate = librosa.load(
13
- "harvard.wav", sr=16000
14
- ) # Downsample 44.1kHz to 8kHz
15
-
16
- input_values = processor(
17
- waveform, sampling_rate=sample_rate, return_tensors="pt"
18
- ).input_values
19
-
20
- with torch.no_grad():
21
- logits = model(input_values).logits
22
-
23
- predicted_ids = torch.argmax(logits, dim=-1)
24
- transcription = processor.batch_decode(predicted_ids)
25
-
26
-
27
- def showTranscription(transcription):
28
- return transcription
29
-
30
-
31
- iface = gr.Interface(fn=showTranscription, inputs="text", outputs="text")
32
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/app_20230718133558.py DELETED
@@ -1,33 +0,0 @@
1
- import gradio as gr
2
- from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
- import torch
4
- import phonemizer
5
- import librosa
6
- import base64
7
-
8
-
9
- def lark(audioAsB64):
10
- with open("audio.wav", "wb") as preWaveform:
11
- preWaveform.write(base64.b64encode())
12
-
13
-
14
- processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft")
15
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft")
16
-
17
- waveform, sample_rate = librosa.load(
18
- "harvard.wav", sr=16000
19
- ) # Downsample 44.1kHz to 8kHz
20
-
21
- input_values = processor(
22
- waveform, sampling_rate=sample_rate, return_tensors="pt"
23
- ).input_values
24
-
25
- with torch.no_grad():
26
- logits = model(input_values).logits
27
-
28
- predicted_ids = torch.argmax(logits, dim=-1)
29
- transcription = processor.batch_decode(predicted_ids)
30
-
31
-
32
- iface = gr.Interface(fn=lark, inputs="text", outputs="text")
33
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/app_20230718133701.py DELETED
@@ -1,36 +0,0 @@
1
- import gradio as gr
2
- from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
- import torch
4
- import phonemizer
5
- import librosa
6
- import base64
7
-
8
-
9
- def lark(audioAsB64):
10
- # convert b64 audio to wav
11
- with open("audio.wav", "wb") as preWaveform:
12
- preWaveform.write(base64.b64encode())
13
-
14
- # processing
15
- processor = Wav2Vec2Processor.from_pretrained(
16
- "facebook/wav2vec2-xlsr-53-espeak-cv-ft"
17
- )
18
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft")
19
-
20
- waveform, sample_rate = librosa.load(
21
- "harvard.wav", sr=16000
22
- ) # Downsample 44.1kHz to 8kHz
23
-
24
- input_values = processor(
25
- waveform, sampling_rate=sample_rate, return_tensors="pt"
26
- ).input_values
27
-
28
- with torch.no_grad():
29
- logits = model(input_values).logits
30
-
31
- predicted_ids = torch.argmax(logits, dim=-1)
32
- transcription = processor.batch_decode(predicted_ids)
33
-
34
-
35
- iface = gr.Interface(fn=lark, inputs="text", outputs="text")
36
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/app_20230718133728.py DELETED
@@ -1,38 +0,0 @@
1
- import gradio as gr
2
- from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
- import torch
4
- import phonemizer
5
- import librosa
6
- import base64
7
-
8
-
9
- def lark(audioAsB64):
10
- # convert b64 audio to wav
11
- with open("audio.wav", "wb") as preWaveform:
12
- preWaveform.write(base64.b64encode())
13
-
14
- # processing
15
- processor = Wav2Vec2Processor.from_pretrained(
16
- "facebook/wav2vec2-xlsr-53-espeak-cv-ft"
17
- )
18
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft")
19
-
20
- waveform, sample_rate = librosa.load(
21
- "harvard.wav", sr=16000
22
- ) # Downsample 44.1kHz to 8kHz
23
-
24
- input_values = processor(
25
- waveform, sampling_rate=sample_rate, return_tensors="pt"
26
- ).input_values
27
-
28
- with torch.no_grad():
29
- logits = model(input_values).logits
30
-
31
- predicted_ids = torch.argmax(logits, dim=-1)
32
- transcription = processor.batch_decode(predicted_ids)
33
-
34
- return transcription
35
-
36
-
37
- iface = gr.Interface(fn=lark, inputs="text", outputs="text")
38
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/app_20230718134339.py DELETED
@@ -1,38 +0,0 @@
1
- import gradio as gr
2
- from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
- import torch
4
- import phonemizer
5
- import librosa
6
- import base64
7
-
8
-
9
- def lark(audioAsB64):
10
- # convert b64 audio to wav
11
- with open("audio.wav", "wb") as preWaveform:
12
- preWaveform.write(base64.b64encode())
13
-
14
- # processing
15
- processor = Wav2Vec2Processor.from_pretrained(
16
- "facebook/wav2vec2-xlsr-53-espeak-cv-ft"
17
- )
18
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-xlsr-53-espeak-cv-ft")
19
-
20
- waveform, sample_rate = librosa.load(
21
- "harvard.wav", sr=16000
22
- ) # Downsample 44.1kHz to 8kHz
23
-
24
- input_values = processor(
25
- waveform, sampling_rate=sample_rate, return_tensors="pt"
26
- ).input_values
27
-
28
- with torch.no_grad():
29
- logits = model(input_values).logits
30
-
31
- predicted_ids = torch.argmax(logits, dim=-1)
32
- transcription = processor.batch_decode(predicted_ids)
33
-
34
- return transcription
35
-
36
-
37
- iface = gr.Interface(fn=lark, inputs="text", outputs="text")
38
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/packages_20230718132731.txt DELETED
File without changes
.history/packages_20230718132746.txt DELETED
File without changes
.history/packages_20230718132842.txt DELETED
@@ -1 +0,0 @@
1
- espeak
 
 
.history/requirements_20230718132726.txt DELETED
File without changes
.history/requirements_20230718132835.txt DELETED
@@ -1,4 +0,0 @@
1
- phonemizer
2
- librosa
3
- transformers
4
- torch
 
 
 
 
 
.history/requirements_20230718133331.txt DELETED
@@ -1,5 +0,0 @@
1
- phonemizer
2
- librosa
3
- transformers
4
- torch
5
- base64
 
 
 
 
 
 
.history/requirements_20230718134813.txt DELETED
@@ -1,4 +0,0 @@
1
- phonemizer
2
- librosa
3
- transformers
4
- torch
 
 
 
 
 
.history/requirements_20230718134828.txt DELETED
@@ -1,4 +0,0 @@
1
- phonemizer
2
- librosa
3
- transformers
4
- torch