-
Notifications
You must be signed in to change notification settings - Fork 80
Expand file tree
/
Copy pathdecode.py
More file actions
59 lines (48 loc) · 1.71 KB
/
decode.py
File metadata and controls
59 lines (48 loc) · 1.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import hydra
import torch
from omegaconf import DictConfig, ListConfig, OmegaConf
@hydra.main(config_name=None, version_base=None)
def main_hydra(cfg: DictConfig):
def to_plain_list(cfg_item):
if isinstance(cfg_item, ListConfig):
return OmegaConf.to_container(cfg_item, resolve=True)
elif isinstance(cfg_item, DictConfig):
return {k: to_plain_list(v) for k, v in cfg_item.items()}
else:
return cfg_item
kwargs = to_plain_list(cfg)
model_dir = kwargs.get("model_dir", "FunAudioLLM/Fun-ASR-Nano-2512")
scp_file = kwargs["scp_file"]
output_file = kwargs["output_file"]
device = (
"cuda:0"
if torch.cuda.is_available()
else "mps"
if torch.backends.mps.is_available()
else "cpu"
)
from funasr import AutoModel
model = AutoModel(
model=model_dir,
trust_remote_code=True,
vad_model="fsmn-vad",
vad_kwargs={"max_single_segment_time": 30000},
remote_code="./model.py",
device=device,
)
output_dir = os.path.dirname(output_file)
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir, exist_ok=True)
with open(scp_file, "r", encoding="utf-8") as f1:
with open(output_file, "w", encoding="utf-8") as f2:
for line in f1:
line = line.strip()
if not line:
continue
parts = line.split(maxsplit=1)
if len(parts) == 2:
text = model.generate(input=[parts[1]], cache={}, batch_size=1)[0]["text"]
f2.write(f"{parts[0]}\t{text}\n")
if __name__ == "__main__":
main_hydra()