Spaces:
Sleeping
Sleeping
Update app (7).py
Browse files- app (7).py +8 -33
app (7).py
CHANGED
|
@@ -1,18 +1,3 @@
|
|
| 1 |
-
# Copyright (c) 2022 Horizon Robotics. (authors: Binbin Zhang)
|
| 2 |
-
# 2022 Chengdong Liang (liangchengdong@mail.nwpu.edu.cn)
|
| 3 |
-
#
|
| 4 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
-
# you may not use this file except in compliance with the License.
|
| 6 |
-
# You may obtain a copy of the License at
|
| 7 |
-
#
|
| 8 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
| 9 |
-
#
|
| 10 |
-
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 12 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 13 |
-
# See the License for the specific language governing permissions and
|
| 14 |
-
# limitations under the License.
|
| 15 |
-
|
| 16 |
import gradio as gr
|
| 17 |
import torch
|
| 18 |
from wenet.cli.model import load_model
|
|
@@ -43,7 +28,7 @@ model = download_rev_models()
|
|
| 43 |
def recognition(audio, style=0):
|
| 44 |
if audio is None:
|
| 45 |
return "Input Error! Please enter one audio!"
|
| 46 |
-
|
| 47 |
|
| 48 |
cat_embs = ','.join([str(s) for s in (style, 1-style)])
|
| 49 |
cat_embs = process_cat_embs(cat_embs)
|
|
@@ -62,29 +47,21 @@ inputs = [
|
|
| 62 |
gr.Slider(0, 1, value=0, label="Verbatimicity - from non-verbatim (0) to verbatim (1)", info="Choose a transcription style between non-verbatim and verbatim"),
|
| 63 |
]
|
| 64 |
|
| 65 |
-
examples = [
|
| 66 |
-
['examples/POD1000000012_S0000335.wav'],
|
| 67 |
-
['examples/POD1000000013_S0000062.wav'],
|
| 68 |
-
['examples/POD1000000032_S0000020.wav'],
|
| 69 |
-
['examples/POD1000000032_S0000038.wav'],
|
| 70 |
-
['examples/POD1000000032_S0000050.wav'],
|
| 71 |
-
['examples/POD1000000032_S0000058.wav'],
|
| 72 |
-
]
|
| 73 |
-
|
| 74 |
|
| 75 |
output = gr.outputs.Textbox(label="Output Text")
|
| 76 |
|
| 77 |
-
text = "
|
| 78 |
|
| 79 |
# description
|
| 80 |
description = (
|
| 81 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
)
|
| 83 |
|
| 84 |
-
|
| 85 |
-
"<p style='text-align: center'>"
|
| 86 |
-
"<a href='https://rev.com' target='_blank'>Learn more about Rev</a>" # noqa
|
| 87 |
-
"</p>")
|
| 88 |
|
| 89 |
interface = gr.Interface(
|
| 90 |
fn=recognition,
|
|
@@ -92,8 +69,6 @@ interface = gr.Interface(
|
|
| 92 |
outputs=output,
|
| 93 |
title=text,
|
| 94 |
description=description,
|
| 95 |
-
article=article,
|
| 96 |
-
examples=examples,
|
| 97 |
theme='huggingface',
|
| 98 |
)
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from wenet.cli.model import load_model
|
|
|
|
| 28 |
def recognition(audio, style=0):
|
| 29 |
if audio is None:
|
| 30 |
return "Input Error! Please enter one audio!"
|
| 31 |
+
|
| 32 |
|
| 33 |
cat_embs = ','.join([str(s) for s in (style, 1-style)])
|
| 34 |
cat_embs = process_cat_embs(cat_embs)
|
|
|
|
| 47 |
gr.Slider(0, 1, value=0, label="Verbatimicity - from non-verbatim (0) to verbatim (1)", info="Choose a transcription style between non-verbatim and verbatim"),
|
| 48 |
]
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
output = gr.outputs.Textbox(label="Output Text")
|
| 52 |
|
| 53 |
+
text = "ASR Transcription Opensource Demo"
|
| 54 |
|
| 55 |
# description
|
| 56 |
description = (
|
| 57 |
+
" Opensource Automatic Speech Recognition in English
|
| 58 |
+
|
| 59 |
+
Verbatim Transcript style(1) refers to word to word-to-word transcription of an audio
|
| 60 |
+
Non Verbatim Transcript style(0) refers to just conserving the message of the original audio
|
| 61 |
+
"
|
| 62 |
)
|
| 63 |
|
| 64 |
+
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
interface = gr.Interface(
|
| 67 |
fn=recognition,
|
|
|
|
| 69 |
outputs=output,
|
| 70 |
title=text,
|
| 71 |
description=description,
|
|
|
|
|
|
|
| 72 |
theme='huggingface',
|
| 73 |
)
|
| 74 |
|