mirror of
https://github.com/JefferyHcool/BiliNote.git
synced 2026-05-11 18:10:06 +08:00
🐞 fix: 增加错误之后对已解析段落的缓存功能,再次重试时不再重头开始
解析长视频时,当附件大小过大时不再调用后进行报错,而是将附件进行分批次发送 在每篇笔记开头默认增加地址来源链接,对模糊处可溯源
This commit is contained in:
0
backend/tests/__init__.py
Normal file
0
backend/tests/__init__.py
Normal file
35
backend/tests/test_note_helper.py
Normal file
35
backend/tests/test_note_helper.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import importlib.util
|
||||
import pathlib
|
||||
import unittest
|
||||
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
MODULE_PATH = ROOT / "app" / "utils" / "note_helper.py"
|
||||
spec = importlib.util.spec_from_file_location("note_helper", MODULE_PATH)
|
||||
if spec is None or spec.loader is None:
|
||||
raise ImportError("note_helper module spec not found")
|
||||
note_helper = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(note_helper)
|
||||
|
||||
|
||||
class TestNoteHelper(unittest.TestCase):
|
||||
def test_prepend_source_link_adds_header_at_top(self):
|
||||
source_url = "https://www.bilibili.com/video/BV1xx411c7mD"
|
||||
markdown = "## 标题\n\n内容"
|
||||
|
||||
result = note_helper.prepend_source_link(markdown, source_url)
|
||||
|
||||
self.assertTrue(result.startswith(f"> 来源链接:{source_url}\n\n"))
|
||||
self.assertIn("## 标题", result)
|
||||
|
||||
def test_prepend_source_link_does_not_duplicate_when_header_exists(self):
|
||||
source_url = "https://www.youtube.com/watch?v=abc123"
|
||||
markdown = f"> 来源链接:{source_url}\n\n## 标题\n\n内容"
|
||||
|
||||
result = note_helper.prepend_source_link(markdown, source_url)
|
||||
|
||||
self.assertEqual(result, markdown)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
97
backend/tests/test_request_chunker.py
Normal file
97
backend/tests/test_request_chunker.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import importlib.util
|
||||
import pathlib
|
||||
import unittest
|
||||
from dataclasses import dataclass
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
MODULE_PATH = ROOT / "app" / "gpt" / "request_chunker.py"
|
||||
spec = importlib.util.spec_from_file_location("request_chunker", MODULE_PATH)
|
||||
if spec is None or spec.loader is None:
|
||||
raise ImportError("request_chunker module spec not found")
|
||||
request_chunker = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(request_chunker)
|
||||
RequestChunker = request_chunker.RequestChunker
|
||||
|
||||
|
||||
@dataclass
|
||||
class DummySeg:
|
||||
start: float
|
||||
end: float
|
||||
text: str
|
||||
|
||||
|
||||
def build_messages(segments, image_urls, **_):
|
||||
content = [{"type": "text", "text": "".join(s.text for s in segments)}]
|
||||
for url in image_urls:
|
||||
content.append({"type": "image_url", "image_url": {"url": url, "detail": "auto"}})
|
||||
return [{"role": "user", "content": content}]
|
||||
|
||||
|
||||
def size_estimator(messages):
|
||||
size = 0
|
||||
for part in messages[0]["content"]:
|
||||
if part["type"] == "text":
|
||||
size += len(part["text"])
|
||||
else:
|
||||
size += len(part["image_url"]["url"])
|
||||
return size
|
||||
|
||||
|
||||
class TestRequestChunker(unittest.TestCase):
|
||||
def test_chunk_segments_preserves_order_and_content(self):
|
||||
segments = [
|
||||
DummySeg(0, 1, "aaaa"),
|
||||
DummySeg(1, 2, "bbbb"),
|
||||
DummySeg(2, 3, "cccc"),
|
||||
]
|
||||
chunker = RequestChunker(build_messages, max_bytes=8, size_estimator=size_estimator)
|
||||
chunks = chunker.chunk(segments, [])
|
||||
texts = ["".join(seg.text for seg in c.segments) for c in chunks]
|
||||
self.assertEqual("".join(texts), "aaaabbbbcccc")
|
||||
self.assertTrue(all(texts))
|
||||
|
||||
def test_chunk_images_distributed_across_batches(self):
|
||||
segments = [DummySeg(0, 1, "aa")]
|
||||
images = ["i" * 6, "j" * 6, "k" * 6]
|
||||
chunker = RequestChunker(build_messages, max_bytes=10, size_estimator=size_estimator)
|
||||
chunks = chunker.chunk(segments, images)
|
||||
all_images = [img for c in chunks for img in c.image_urls]
|
||||
self.assertEqual(all_images, images)
|
||||
|
||||
def test_chunk_images_are_not_front_loaded_when_multiple_segment_chunks(self):
|
||||
segments = [
|
||||
DummySeg(0, 1, "aaaaaa"),
|
||||
DummySeg(1, 2, "bbbbbb"),
|
||||
DummySeg(2, 3, "cccccc"),
|
||||
]
|
||||
images = ["11111", "22222", "33333"]
|
||||
chunker = RequestChunker(build_messages, max_bytes=12, size_estimator=size_estimator)
|
||||
chunks = chunker.chunk(segments, images)
|
||||
|
||||
self.assertGreaterEqual(len(chunks), 3)
|
||||
image_counts = [len(c.image_urls) for c in chunks]
|
||||
self.assertGreater(image_counts[1], 0)
|
||||
self.assertGreater(image_counts[2], 0)
|
||||
all_images = [img for c in chunks for img in c.image_urls]
|
||||
self.assertEqual(all_images, images)
|
||||
|
||||
def test_split_oversized_segment(self):
|
||||
segments = [DummySeg(0, 1, "x" * 25)]
|
||||
chunker = RequestChunker(build_messages, max_bytes=10, size_estimator=size_estimator)
|
||||
chunks = chunker.chunk(segments, [])
|
||||
combined = "".join(seg.text for c in chunks for seg in c.segments)
|
||||
self.assertEqual(combined, "x" * 25)
|
||||
|
||||
def test_group_texts_by_budget(self):
|
||||
chunker = RequestChunker(build_messages, max_bytes=10, size_estimator=size_estimator)
|
||||
|
||||
def build_text_messages(texts, *_args, **_kwargs):
|
||||
content = [{"type": "text", "text": "".join(texts)}]
|
||||
return [{"role": "user", "content": content}]
|
||||
|
||||
groups = chunker.group_texts_by_budget(["aaaaa", "bbbbb", "ccccc"], build_text_messages)
|
||||
self.assertEqual(groups, [["aaaaa", "bbbbb"], ["ccccc"]])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
35
backend/tests/test_screenshot_marker.py
Normal file
35
backend/tests/test_screenshot_marker.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import importlib.util
|
||||
import pathlib
|
||||
import unittest
|
||||
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
MODULE_PATH = ROOT / "app" / "utils" / "screenshot_marker.py"
|
||||
spec = importlib.util.spec_from_file_location("screenshot_marker", MODULE_PATH)
|
||||
if spec is None or spec.loader is None:
|
||||
raise ImportError("screenshot_marker module spec not found")
|
||||
screenshot_marker = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(screenshot_marker)
|
||||
extract_screenshot_timestamps = screenshot_marker.extract_screenshot_timestamps
|
||||
|
||||
|
||||
class TestScreenshotMarker(unittest.TestCase):
|
||||
def test_extract_accepts_star_bracket_format(self):
|
||||
markdown = "A\n*Screenshot-[01:02]\nB"
|
||||
matches = extract_screenshot_timestamps(markdown)
|
||||
self.assertEqual(matches, [("*Screenshot-[01:02]", 62)])
|
||||
|
||||
def test_extract_accepts_legacy_formats(self):
|
||||
markdown = "*Screenshot-03:04 and Screenshot-[05:06]"
|
||||
matches = extract_screenshot_timestamps(markdown)
|
||||
self.assertEqual(
|
||||
matches,
|
||||
[
|
||||
("*Screenshot-03:04", 184),
|
||||
("Screenshot-[05:06]", 306),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
42
backend/tests/test_task_serial_executor.py
Normal file
42
backend/tests/test_task_serial_executor.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import importlib.util
|
||||
import pathlib
|
||||
import threading
|
||||
import time
|
||||
import unittest
|
||||
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
MODULE_PATH = ROOT / "app" / "services" / "task_serial_executor.py"
|
||||
spec = importlib.util.spec_from_file_location("task_serial_executor", MODULE_PATH)
|
||||
if spec is None or spec.loader is None:
|
||||
raise ImportError("task_serial_executor module spec not found")
|
||||
task_serial_executor = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(task_serial_executor)
|
||||
SerialTaskExecutor = task_serial_executor.SerialTaskExecutor
|
||||
|
||||
|
||||
class TestTaskSerialExecutor(unittest.TestCase):
|
||||
def test_executor_runs_tasks_one_by_one(self):
|
||||
executor = SerialTaskExecutor()
|
||||
state_lock = threading.Lock()
|
||||
state = {"active": 0, "peak_active": 0}
|
||||
|
||||
def critical_work():
|
||||
with state_lock:
|
||||
state["active"] += 1
|
||||
state["peak_active"] = max(state["peak_active"], state["active"])
|
||||
time.sleep(0.05)
|
||||
with state_lock:
|
||||
state["active"] -= 1
|
||||
|
||||
threads = [threading.Thread(target=lambda: executor.run(critical_work)) for _ in range(2)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
self.assertEqual(state["peak_active"], 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
147
backend/tests/test_universal_gpt_checkpoint.py
Normal file
147
backend/tests/test_universal_gpt_checkpoint.py
Normal file
@@ -0,0 +1,147 @@
|
||||
import importlib.util
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
import tempfile
|
||||
import types
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _install_stubs():
|
||||
app_mod = types.ModuleType("app")
|
||||
gpt_pkg = types.ModuleType("app.gpt")
|
||||
models_pkg = types.ModuleType("app.models")
|
||||
|
||||
base_mod = types.ModuleType("app.gpt.base")
|
||||
|
||||
class _GPT:
|
||||
pass
|
||||
|
||||
base_mod.GPT = _GPT
|
||||
|
||||
prompt_builder_mod = types.ModuleType("app.gpt.prompt_builder")
|
||||
|
||||
def _generate_base_prompt(**_kwargs):
|
||||
return "prompt"
|
||||
|
||||
prompt_builder_mod.generate_base_prompt = _generate_base_prompt
|
||||
|
||||
prompt_mod = types.ModuleType("app.gpt.prompt")
|
||||
prompt_mod.BASE_PROMPT = ""
|
||||
prompt_mod.AI_SUM = ""
|
||||
prompt_mod.SCREENSHOT = ""
|
||||
prompt_mod.LINK = ""
|
||||
prompt_mod.MERGE_PROMPT = "merge"
|
||||
|
||||
utils_mod = types.ModuleType("app.gpt.utils")
|
||||
|
||||
def _fix_markdown(text):
|
||||
return text
|
||||
|
||||
utils_mod.fix_markdown = _fix_markdown
|
||||
|
||||
request_chunker_mod = types.ModuleType("app.gpt.request_chunker")
|
||||
|
||||
class _RequestChunker:
|
||||
def __init__(self, *_args, **_kwargs):
|
||||
pass
|
||||
|
||||
def group_texts_by_budget(self, texts, _builder, **_kwargs):
|
||||
return [texts]
|
||||
|
||||
request_chunker_mod.RequestChunker = _RequestChunker
|
||||
|
||||
gpt_model_mod = types.ModuleType("app.models.gpt_model")
|
||||
|
||||
class _GPTSource:
|
||||
pass
|
||||
|
||||
gpt_model_mod.GPTSource = _GPTSource
|
||||
|
||||
transcriber_model_mod = types.ModuleType("app.models.transcriber_model")
|
||||
|
||||
class _TranscriptSegment:
|
||||
def __init__(self, **kwargs):
|
||||
self.start = kwargs.get("start", 0)
|
||||
self.end = kwargs.get("end", 0)
|
||||
self.text = kwargs.get("text", "")
|
||||
|
||||
transcriber_model_mod.TranscriptSegment = _TranscriptSegment
|
||||
|
||||
sys.modules.setdefault("app", app_mod)
|
||||
sys.modules.setdefault("app.gpt", gpt_pkg)
|
||||
sys.modules.setdefault("app.models", models_pkg)
|
||||
sys.modules["app.gpt.base"] = base_mod
|
||||
sys.modules["app.gpt.prompt_builder"] = prompt_builder_mod
|
||||
sys.modules["app.gpt.prompt"] = prompt_mod
|
||||
sys.modules["app.gpt.utils"] = utils_mod
|
||||
sys.modules["app.gpt.request_chunker"] = request_chunker_mod
|
||||
sys.modules["app.models.gpt_model"] = gpt_model_mod
|
||||
sys.modules["app.models.transcriber_model"] = transcriber_model_mod
|
||||
|
||||
|
||||
def _load_universal_gpt_class():
|
||||
_install_stubs()
|
||||
root = pathlib.Path(__file__).resolve().parents[1]
|
||||
module_path = root / "app" / "gpt" / "universal_gpt.py"
|
||||
spec = importlib.util.spec_from_file_location("universal_gpt", module_path)
|
||||
if spec is None or spec.loader is None:
|
||||
raise ImportError("universal_gpt module spec not found")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module.UniversalGPT
|
||||
|
||||
|
||||
UniversalGPT = _load_universal_gpt_class()
|
||||
|
||||
|
||||
class _FailingCompletions:
|
||||
def create(self, **_kwargs):
|
||||
raise Exception("Error code: 524 - bad_response_status_code")
|
||||
|
||||
|
||||
class _DummyChat:
|
||||
def __init__(self):
|
||||
self.completions = _FailingCompletions()
|
||||
|
||||
|
||||
class _DummyModels:
|
||||
@staticmethod
|
||||
def list():
|
||||
return []
|
||||
|
||||
|
||||
class _DummyClient:
|
||||
def __init__(self):
|
||||
self.chat = _DummyChat()
|
||||
self.models = _DummyModels()
|
||||
|
||||
|
||||
class TestUniversalGPTCheckpoint(unittest.TestCase):
|
||||
def test_merge_524_error_persists_checkpoint(self):
|
||||
original_attempts = os.environ.get("OPENAI_RETRY_ATTEMPTS")
|
||||
os.environ["OPENAI_RETRY_ATTEMPTS"] = "1"
|
||||
gpt = UniversalGPT(_DummyClient(), model="mock-model")
|
||||
try:
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
gpt.checkpoint_dir = Path(tmp_dir)
|
||||
|
||||
with self.assertRaises(Exception):
|
||||
gpt._merge_partials(["part-a", "part-b"], "task-1", "sig-1")
|
||||
|
||||
checkpoint_path = gpt._checkpoint_path("task-1")
|
||||
self.assertTrue(checkpoint_path.exists())
|
||||
payload = json.loads(checkpoint_path.read_text(encoding="utf-8"))
|
||||
self.assertEqual(payload["phase"], "merge")
|
||||
self.assertEqual(payload["partials"], ["part-a", "part-b"])
|
||||
finally:
|
||||
if original_attempts is None:
|
||||
os.environ.pop("OPENAI_RETRY_ATTEMPTS", None)
|
||||
else:
|
||||
os.environ["OPENAI_RETRY_ATTEMPTS"] = original_attempts
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
142
backend/tests/test_video_reader_dedupe.py
Normal file
142
backend/tests/test_video_reader_dedupe.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import importlib.util
|
||||
import pathlib
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
import types
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
def _install_stubs():
|
||||
app_mod = types.ModuleType("app")
|
||||
utils_pkg = types.ModuleType("app.utils")
|
||||
|
||||
logger_mod = types.ModuleType("app.utils.logger")
|
||||
|
||||
class _Logger:
|
||||
@staticmethod
|
||||
def info(*_args, **_kwargs):
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def warning(*_args, **_kwargs):
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def error(*_args, **_kwargs):
|
||||
return None
|
||||
|
||||
def _get_logger(_name):
|
||||
return _Logger()
|
||||
|
||||
logger_mod.get_logger = _get_logger
|
||||
|
||||
path_helper_mod = types.ModuleType("app.utils.path_helper")
|
||||
ffmpeg_mod = types.ModuleType("ffmpeg")
|
||||
|
||||
pil_mod = types.ModuleType("PIL")
|
||||
pil_image_mod = types.ModuleType("PIL.Image")
|
||||
pil_draw_mod = types.ModuleType("PIL.ImageDraw")
|
||||
pil_font_mod = types.ModuleType("PIL.ImageFont")
|
||||
|
||||
class _FakeImage:
|
||||
pass
|
||||
|
||||
class _FakeImageDraw:
|
||||
@staticmethod
|
||||
def Draw(*_args, **_kwargs):
|
||||
return None
|
||||
|
||||
class _FakeImageFont:
|
||||
@staticmethod
|
||||
def truetype(*_args, **_kwargs):
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def load_default():
|
||||
return None
|
||||
|
||||
pil_image_mod.Image = _FakeImage
|
||||
pil_draw_mod.ImageDraw = _FakeImageDraw
|
||||
pil_font_mod.ImageFont = _FakeImageFont
|
||||
|
||||
def _get_app_dir(name):
|
||||
return name
|
||||
|
||||
path_helper_mod.get_app_dir = _get_app_dir
|
||||
ffmpeg_mod.probe = lambda *_args, **_kwargs: {"format": {"duration": "0"}}
|
||||
|
||||
sys.modules.setdefault("app", app_mod)
|
||||
sys.modules.setdefault("app.utils", utils_pkg)
|
||||
sys.modules["PIL"] = pil_mod
|
||||
sys.modules["PIL.Image"] = pil_image_mod
|
||||
sys.modules["PIL.ImageDraw"] = pil_draw_mod
|
||||
sys.modules["PIL.ImageFont"] = pil_font_mod
|
||||
sys.modules["ffmpeg"] = ffmpeg_mod
|
||||
sys.modules["app.utils.logger"] = logger_mod
|
||||
sys.modules["app.utils.path_helper"] = path_helper_mod
|
||||
|
||||
|
||||
def _load_video_reader_module():
|
||||
_install_stubs()
|
||||
root = pathlib.Path(__file__).resolve().parents[1]
|
||||
module_path = root / "app" / "utils" / "video_reader.py"
|
||||
spec = importlib.util.spec_from_file_location("video_reader", module_path)
|
||||
if spec is None or spec.loader is None:
|
||||
raise ImportError("video_reader module spec not found")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
video_reader_module = _load_video_reader_module()
|
||||
VideoReader = video_reader_module.VideoReader
|
||||
|
||||
|
||||
def _make_fake_ffmpeg_runner(colors_by_second):
|
||||
def _runner(cmd, check=True):
|
||||
output_path = next((arg for arg in cmd if isinstance(arg, str) and arg.endswith(".jpg")), None)
|
||||
if output_path is None:
|
||||
raise AssertionError("Output path not found in ffmpeg cmd")
|
||||
match = re.search(r"frame_(\d{2})_(\d{2})\.jpg$", output_path)
|
||||
if match is None:
|
||||
raise AssertionError("Unexpected output path")
|
||||
sec = int(match.group(1)) * 60 + int(match.group(2))
|
||||
payload = colors_by_second[sec]
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(payload)
|
||||
return 0
|
||||
|
||||
return _runner
|
||||
|
||||
|
||||
class TestVideoReaderDeduplicateFrames(unittest.TestCase):
|
||||
def test_extract_frames_skips_adjacent_duplicates_when_enabled(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
frame_dir = pathlib.Path(tmp_dir) / "frames"
|
||||
grid_dir = pathlib.Path(tmp_dir) / "grids"
|
||||
reader = VideoReader(
|
||||
video_path="dummy.mp4",
|
||||
frame_interval=1,
|
||||
frame_dir=str(frame_dir),
|
||||
grid_dir=str(grid_dir),
|
||||
)
|
||||
|
||||
fake_colors = {
|
||||
0: b"frame-a",
|
||||
1: b"frame-a",
|
||||
2: b"frame-b",
|
||||
3: b"frame-b",
|
||||
}
|
||||
|
||||
with patch.object(video_reader_module.ffmpeg, "probe", return_value={"format": {"duration": "4"}}), \
|
||||
patch.object(video_reader_module.subprocess, "run", side_effect=_make_fake_ffmpeg_runner(fake_colors)):
|
||||
paths = reader.extract_frames(max_frames=10)
|
||||
|
||||
names = [pathlib.Path(p).name for p in paths]
|
||||
self.assertEqual(names, ["frame_00_00.jpg", "frame_00_02.jpg"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user