Commit 6cf78d8
Changed files (5)
src/asr/gemini_asr.py
@@ -83,8 +83,7 @@ async def gemini_stream_asr(client: Client, message: Message, path: str | Path,
"""
prompt = """请转录这段音频, 要求:
1. 以 `[mm:ss] sentence` 格式输出句子内容, 包括标点符号。其中mm:ss为此句话开始时间的分钟和秒
- 2. 请使用简体中文输出
- 3. 直接输出音频转录内容, 不要输出任何与音频内容无关的寒暄问候
+ 2. 直接输出音频转录内容, 不要输出任何与音频内容无关的寒暄问候
输出实例:
[00:02] 大家好, 我是小明, 欢迎来到我的频道。
src/llm/utils.py
@@ -7,7 +7,7 @@ import tiktoken
from loguru import logger
from config import DOWNLOAD_DIR, GPT, PREFIX
-from utils import number_to_emoji, remove_consecutive_newlines, remove_dash, remove_pound
+from utils import number_to_emoji, remove_consecutive_newlines, remove_dash, remove_pound, zhcn
BOT_TIPS = "回复以继续"
@@ -100,6 +100,7 @@ def beautify_llm_response(text: str, newline_level: int = 3) -> str:
clean_text = clean_source_marks(text)
clean_text = remove_pound(clean_text)
clean_text = remove_dash(clean_text)
+ clean_text = zhcn(clean_text)
return remove_consecutive_newlines(clean_text, newline_level)
src/utils.py
@@ -11,6 +11,7 @@ from pathlib import Path
from typing import Any
from zoneinfo import ZoneInfo
+import zhconv
from bs4.element import PageElement
from glom import PathAccessError, glom
from loguru import logger
@@ -366,6 +367,11 @@ def cleanup_old_files(root: Path | str | None = None, duration: int = 7200) -> N
path.unlink(missing_ok=True)
+def zhcn(text: str) -> str:
+ """Convert zh-tw to zh-cn."""
+ return zhconv.convert_for_mw(text, locale="zh-cn")
+
+
if __name__ == "__main__":
print(rand_string())
print(rand_number())
pyproject.toml
@@ -26,6 +26,7 @@ dependencies = [
"uvloop>=0.21.0",
"youtube-transcript-api>=0.6.3",
"yt-dlp>=2025.1.12rc",
+ "zhconv>=1.4.3",
]
name = "bennybot"
requires-python = ">=3.11"
uv.lock
@@ -239,6 +239,7 @@ dependencies = [
{ name = "uvloop" },
{ name = "youtube-transcript-api" },
{ name = "yt-dlp" },
+ { name = "zhconv" },
]
[package.dev-dependencies]
@@ -274,6 +275,7 @@ requires-dist = [
{ name = "uvloop", specifier = ">=0.21.0" },
{ name = "youtube-transcript-api", specifier = ">=0.6.3" },
{ name = "yt-dlp", specifier = ">=2025.1.12rc0" },
+ { name = "zhconv", specifier = ">=1.4.3" },
]
[package.metadata.requires-dev]
@@ -1990,3 +1992,9 @@ sdist = { url = "https://files.pythonhosted.org/packages/f1/97/c9866e58dd9686446
wheels = [
{ url = "https://files.pythonhosted.org/packages/09/f7/4b2fb219733d8a0a5cef48ff49e08e6e2d2e7a0b98ee29404cb54c809266/yt_dlp-2025.4.26.232923.dev0-py3-none-any.whl", hash = "sha256:7c5f6af7922ab57543b71bd13661faead7424e9235b6e97d00e8939cf50c1e6d", size = 3234967 },
]
+
+[[package]]
+name = "zhconv"
+version = "1.4.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/25/47/c8ae2d5d4025e253211ff3d8c163f457db1da94976cb582337a5ab76cb87/zhconv-1.4.3.tar.gz", hash = "sha256:ad42d9057ca0605f8e41d62b67ca797f879f58193ee6840562c51459b2698c45", size = 211571 }