Commit 4c86efd

benny-dou <60535774+benny-dou@users.noreply.github.com>
2025-10-06 14:32:41
feat(tts): support MS Edge TTS
1 parent 08afe24
Changed files (4)
src/tts/edge.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from pathlib import Path
+
+import anyio
+from loguru import logger
+
+from asr.utils import audio_duration
+from config import CAPTION_LENGTH, DOWNLOAD_DIR, TTS
+from messages.utils import blockquote
+from networking import hx_req
+from utils import markdown_to_text, rand_string
+
+
+async def edge_tts(texts: str, model: str = "", voice_name: str = "") -> dict:
+    """Edge TTS.
+
+    https://github.com/wangwangit/tts
+
+    Returns:
+        {"voice": str, "duration": int, "caption": str}
+    """
+    model = model or TTS.EDGE_MODEL
+    voice_name = voice_name or TTS.EDGE_VOICE
+    raw_texts = markdown_to_text(texts)
+    logger.debug(f"TTS via {model}, voice: {voice_name}, texts: {texts}")
+    response = await hx_req(
+        f"{TTS.EDGE_DOMAIN}/v1/audio/speech",
+        "POST",
+        headers={"Content-Type": "application/json"},
+        json_data={"input": raw_texts, "voice": model, "speed": 1.0, "pitch": "0", "style": "general"},
+        proxy=TTS.EDGE_PROXY,
+        rformat="content",
+    )
+    if not isinstance(response.get("content"), bytes):
+        return {}
+    save_path = Path(DOWNLOAD_DIR) / f"{rand_string(8)}.mp3"
+    async with await anyio.open_file(save_path, "wb") as f:
+        await f.write(response["content"])
+    caption = f"🗣音色: {voice_name}\n🤖引擎: {model}\n{blockquote(texts[: CAPTION_LENGTH - 20])}"
+    return {"voice": save_path, "duration": audio_duration(save_path), "caption": caption}
src/tts/engines.py
@@ -45,7 +45,7 @@ ENGINES = [
     {"name": "Sunny", "desc": "【方言】四川话", "engine": "qwen", "sex": "female"},
     # Sambert
     {"name": "知楠", "desc": "通用场景, 广告男声", "engine": "sambert", "sex": "male"},
-    {"name": "知琪", "desc": "通用场景, 温柔女声", "engine": "sambert", "sex": "f emale"},
+    {"name": "知琪", "desc": "通用场景, 温柔女声", "engine": "sambert", "sex": "female"},
     {"name": "知厨", "desc": "新闻播报, 舌尖男声", "engine": "sambert", "sex": "male"},
     {"name": "知德", "desc": "新闻播报, 新闻男声", "engine": "sambert", "sex": "male"},
     {"name": "知佳", "desc": "新闻播报, 标准女声", "engine": "sambert", "sex": "female"},
@@ -74,6 +74,28 @@ ENGINES = [
     {"name": "知伦", "desc": "配音解说, 悬疑解说", "engine": "sambert", "sex": "male"},
     {"name": "知飞", "desc": "配音解说, 激昂解说", "engine": "sambert", "sex": "male"},
     {"name": "知达", "desc": "新闻播报, 标准男声", "engine": "sambert", "sex": "male"},
+    # EdgeTTS
+    {"name": "晓晓", "desc": "温柔", "engine": "edge", "sex": "female"},
+    {"name": "晓伊", "desc": "甜美", "engine": "edge", "sex": "female"},
+    {"name": "晓辰", "desc": "知性", "engine": "edge", "sex": "female"},
+    {"name": "晓涵", "desc": "优雅", "engine": "edge", "sex": "female"},
+    {"name": "晓梦", "desc": "梦幻", "engine": "edge", "sex": "female"},
+    {"name": "晓墨", "desc": "文艺", "engine": "edge", "sex": "female"},
+    {"name": "晓秋", "desc": "成熟", "engine": "edge", "sex": "female"},
+    {"name": "晓睿", "desc": "智慧", "engine": "edge", "sex": "female"},
+    {"name": "晓双", "desc": "活泼", "engine": "edge", "sex": "female"},
+    {"name": "晓萱", "desc": "清新", "engine": "edge", "sex": "female"},
+    {"name": "晓颜", "desc": "柔美", "engine": "edge", "sex": "female"},
+    {"name": "晓悠", "desc": "悠扬", "engine": "edge", "sex": "female"},
+    {"name": "晓甄", "desc": "端庄", "engine": "edge", "sex": "female"},
+    {"name": "云希", "desc": "清朗", "engine": "edge", "sex": "male"},
+    {"name": "云扬", "desc": "阳光", "engine": "edge", "sex": "male"},
+    {"name": "云健", "desc": "稳重", "engine": "edge", "sex": "male"},
+    {"name": "云枫", "desc": "磁性", "engine": "edge", "sex": "male"},
+    {"name": "云皓", "desc": "豪迈", "engine": "edge", "sex": "male"},
+    {"name": "云夏", "desc": "热情", "engine": "edge", "sex": "male"},
+    {"name": "云野", "desc": "野性", "engine": "edge", "sex": "male"},
+    {"name": "云泽", "desc": "深沉", "engine": "edge", "sex": "male"},
 ]
 
 LIMIT_FOR_MODEL = {
@@ -110,6 +132,27 @@ LIMIT_FOR_MODEL = {
     "知伦": ["sambert-zhilun-v1"],
     "知飞": ["sambert-zhifei-v1"],
     "知达": ["sambert-zhida-v1"],
+    "晓晓": ["zh-CN-XiaoxiaoNeural"],
+    "晓伊": ["zh-CN-XiaoyiNeural"],
+    "晓辰": ["zh-CN-XiaochenNeural"],
+    "晓涵": ["zh-CN-XiaohanNeural"],
+    "晓梦": ["zh-CN-XiaomengNeural"],
+    "晓墨": ["zh-CN-XiaomoNeural"],
+    "晓秋": ["zh-CN-XiaoqiuNeural"],
+    "晓睿": ["zh-CN-XiaoruiNeural"],
+    "晓双": ["zh-CN-XiaoshuangNeural"],
+    "晓萱": ["zh-CN-XiaoxuanNeural"],
+    "晓颜": ["zh-CN-XiaoyanNeural"],
+    "晓悠": ["zh-CN-XiaoyouNeural"],
+    "晓甄": ["zh-CN-XiaozhenNeural"],
+    "云希": ["zh-CN-YunxiNeural"],
+    "云扬": ["zh-CN-YunyangNeural"],
+    "云健": ["zh-CN-YunjianNeural"],
+    "云枫": ["zh-CN-YunfengNeural"],
+    "云皓": ["zh-CN-YunhaoNeural"],
+    "云夏": ["zh-CN-YunxiaNeural"],
+    "云野": ["zh-CN-YunyeNeural"],
+    "云泽": ["zh-CN-YunzeNeural"],
 }
 
 
@@ -181,6 +224,10 @@ def get_tts_config(texts: str) -> tuple[str, str, str, str]:
         info = get_random_one(engine="sambert")
         model = random.choice(LIMIT_FOR_MODEL.get(info["name"], [""]))
         return info["name"], info["engine"], model, texts[8:].lstrip()
+    if texts.lower().startswith("@edge"):
+        info = get_random_one(engine="edge")
+        model = random.choice(LIMIT_FOR_MODEL.get(info["name"], [""]))
+        return info["name"], info["engine"], model, texts[8:].lstrip()
 
     texts = texts.removeprefix("@").lstrip()
 
src/tts/tts.py
@@ -9,6 +9,7 @@ from config import DOWNLOAD_DIR, PREFIX, TTS
 from messages.parser import parse_msg
 from messages.sender import send2tg
 from messages.utils import blockquote, equal_prefix, set_reaction, startswith_prefix
+from tts.edge import edge_tts
 from tts.engines import get_tts_config, list_engines
 from tts.gemini import gemini_tts
 from tts.qwen import qwen_tts
@@ -27,6 +28,7 @@ HELP = f"""🗣**文字转语音**
 - `{PREFIX.TTS} @gemini`: 随机一款Gemini音色
 - `{PREFIX.TTS} @qwen`: 随机一款通义千问音色
 - `{PREFIX.TTS} @sambert`: 随机一款阿里Sambert音色
+- `{PREFIX.TTS} @edge`: 随机一款MS Edge音色
 {blockquote(list_engines())}
 """
 
@@ -63,6 +65,8 @@ async def text_to_speech(client: Client, message: Message, **kwargs):
         resp = await qwen_tts(texts, model, voice_name)
     elif engine == "sambert":
         resp = await sambert_tts(texts, model, voice_name)
+    elif engine == "edge":
+        resp = await edge_tts(texts, model, voice_name)
     else:
         msg = f"Unknown engine: {engine}"
         raise ValueError(msg)
src/config.py
@@ -355,6 +355,10 @@ class TTS:
     QWEN_VOICE = os.getenv("TTS_QWEN_VOICE", "Chelsie")
     SAMBERT_MODEL = os.getenv("TTS_SAMBERT_MODEL", "ramdom")  # comma separated models for load balance. use "random" to randomly choose a model
     SAMBERT_LENGTH_LIMIT = int(os.getenv("TTS_SAMBERT_LENGTH_LIMIT", "20000"))  # token limit of the tts model
+    EDGE_DOMAIN = os.getenv("TTS_EDGE_DOMAIN", "https://tts.wangwangit.com")
+    EDGE_VOICE = os.getenv("TTS_EDGE_VOICE", "zh-CN-XiaoxiaoNeural")
+    EDGE_MODEL = os.getenv("TTS_EDGE_MODEL", "zh-CN-XiaoxiaoNeural")
+    EDGE_PROXY = os.getenv("TTS_EDGE_PROXY", None)
 
 
 class GPT: