Commit 0059f10
Changed files (1)
src
llm
src/llm/utils.py
@@ -1,8 +1,8 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
+import tempfile
from pathlib import Path
-from typing import BinaryIO
import tiktoken
from loguru import logger
@@ -209,19 +209,22 @@ def clean_gemini_sourcemarks(contexts: list[dict]) -> None:
part.text = clean_source_marks(part.text)
-def convert_md(path: str | Path | BinaryIO) -> str:
- """Convert file to markdown format."""
- if isinstance(path, (str, Path)):
+def convert_md(path: str | Path | None = None, html: str | None = None) -> str:
+ """Convert to markdown format."""
+ md = MarkItDown()
+ if path is not None:
path = Path(path).expanduser().resolve()
if not path.is_file():
return ""
- md = MarkItDown()
- try:
result = md.convert(path)
- except Exception as e:
- logger.error(f"Failed to convert to markdown: {e}")
- return ""
- return result.text_content
+ return result.text_content
+ if html is not None:
+ with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as f:
+ f.write(html)
+ result = md.convert(f.name)
+ Path(f.name).unlink(missing_ok=True)
+ return result.text_content
+ return ""
def split_reasoning(text: str) -> tuple[str, str]: