refactor: replace Next.js server with Uvicorn for backend, remove backfill script
Build and Push Reader API Image / docker (push) Successful in 17s
Build and Push Reader API Image / docker (push) Successful in 17s
This commit is contained in:
@@ -0,0 +1,38 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import html2text
|
||||
from ebooklib import ITEM_DOCUMENT
|
||||
from ebooklib import epub as epublib
|
||||
|
||||
|
||||
def _html_to_text(html_content: str) -> str:
|
||||
h = html2text.HTML2Text()
|
||||
h.ignore_links = True
|
||||
h.ignore_images = True
|
||||
h.ignore_emphasis = False
|
||||
h.body_width = 0
|
||||
return h.handle(html_content).strip()
|
||||
|
||||
|
||||
def build_chapters_from_epub(epub_path: Path) -> list[dict[str, Any]]:
|
||||
book = epublib.read_epub(str(epub_path), options={"ignore_ncx": False})
|
||||
out: list[dict[str, Any]] = []
|
||||
idx = 1
|
||||
for item in book.get_items_of_type(ITEM_DOCUMENT):
|
||||
content = item.get_content().decode("utf-8", errors="replace")
|
||||
txt = _html_to_text(content)
|
||||
if not txt:
|
||||
continue
|
||||
out.append(
|
||||
{
|
||||
"number": idx,
|
||||
"title": item.get_name() or f"Chapter {idx}",
|
||||
"content": content,
|
||||
"txt": txt,
|
||||
}
|
||||
)
|
||||
idx += 1
|
||||
return out
|
||||
Reference in New Issue
Block a user