feat: implement cursor-based pagination across various components and APIs

This commit is contained in:
shiyu
2026-05-10 00:36:41 +08:00
parent 56b48b28a1
commit f89292e451
12 changed files with 275 additions and 148 deletions

View File

@@ -1,4 +1,4 @@
from typing import List, Dict, Protocol, runtime_checkable, Tuple, AsyncIterator
from typing import List, Dict, Protocol, runtime_checkable, Tuple, AsyncIterator, Any
from models import StorageAdapter
# 约定:任意新适配器模块需定义:
@@ -9,7 +9,7 @@ from models import StorageAdapter
@runtime_checkable
class BaseAdapter(Protocol):
record: StorageAdapter
async def list_dir(self, root: str, rel: str, page_num: int = 1, page_size: int = 50, sort_by: str = "name", sort_order: str = "asc") -> Tuple[List[Dict], int]: ...
async def list_dir(self, root: str, rel: str, page_num: int = 1, page_size: int = 50, sort_by: str = "name", sort_order: str = "asc", cursor: str | None = None) -> Tuple[List[Dict], int] | Dict[str, Any]: ...
async def read_file(self, root: str, rel: str) -> bytes: ...
async def write_file(self, root: str, rel: str, data: bytes): ...
async def write_file_stream(self, root: str, rel: str, data_iter: AsyncIterator[bytes]): ...

View File

@@ -4,6 +4,7 @@ import httpx
from fastapi.responses import StreamingResponse, Response
from fastapi import HTTPException
from models import StorageAdapter
from api.response import cursor_page
MS_GRAPH_URL = "https://graph.microsoft.com/v1.0"
MS_OAUTH_URL = "https://login.microsoftonline.com/common/oauth2/v2.0/token"
@@ -114,65 +115,51 @@ class OneDriveAdapter:
"type": "dir" if is_dir else "file",
}
async def list_dir(self, root: str, rel: str, page_num: int = 1, page_size: int = 50, sort_by: str = "name", sort_order: str = "asc") -> Tuple[List[Dict], int]:
async def list_dir(
self,
root: str,
rel: str,
page_num: int = 1,
page_size: int = 50,
sort_by: str = "name",
sort_order: str = "asc",
cursor: str | None = None,
):
"""
列出目录内容。
由于 Graph API 不支持基于偏移($skip)的分页,此方法将获取所有项目,
Graph API 不提供目录总数,使用 nextLink 游标分页。
:param root: 根路径 (在此适配器中未使用,通过配置的 root 确定)。
:param rel: 相对路径。
:param page_num: 页码。
:param page_size: 每页大小。
:param sort_by: 排序字段
:param sort_order: 排序顺序
:return: 文件/目录列表和总数
:param cursor: Graph nextLink
:return: 游标分页结果。
"""
api_path = self._get_api_path(rel)
children_path = f"{api_path}:/children" if api_path else "/children"
all_items = []
params = {"$top": 999}
resp = await self._request("GET", api_path_segment=children_path, params=params)
if cursor:
resp = await self._request("GET", full_url=cursor)
else:
api_path = self._get_api_path(rel)
children_path = f"{api_path}:/children" if api_path else "/children"
resp = await self._request("GET", api_path_segment=children_path, params={"$top": page_size})
while True:
if resp.status_code == 404 and not all_items:
return [], 0
resp.raise_for_status()
if resp.status_code == 404:
return cursor_page([], page_size, cursor=cursor)
resp.raise_for_status()
try:
data = resp.json()
except Exception as e:
raise IOError(f"解析 Graph API 响应失败: {e}") from e
try:
data = resp.json()
except Exception as e:
raise IOError(f"解析 Graph API 响应失败: {e}") from e
all_items.extend(data.get("value", []))
next_link = data.get("@odata.nextLink")
if not next_link:
break
resp = await self._request("GET", full_url=next_link)
formatted_items = [self._format_item(item) for item in all_items]
# 排序
reverse = sort_order.lower() == "desc"
def get_sort_key(item):
key = (not item["is_dir"],)
sort_field = sort_by.lower()
if sort_field == "name":
key += (item["name"].lower(),)
elif sort_field == "size":
key += (item["size"],)
elif sort_field == "mtime":
key += (item["mtime"],)
else:
key += (item["name"].lower(),)
return key
formatted_items.sort(key=get_sort_key, reverse=reverse)
total_count = len(formatted_items)
start_idx = (page_num - 1) * page_size
end_idx = start_idx + page_size
return formatted_items[start_idx:end_idx], total_count
formatted_items = [self._format_item(item) for item in data.get("value", [])]
return cursor_page(
formatted_items,
page_size,
cursor=cursor,
next_cursor=data.get("@odata.nextLink"),
)
async def read_file(self, root: str, rel: str) -> bytes:
"""

View File

@@ -6,6 +6,7 @@ import os
import struct
import time
from models import StorageAdapter
from api.response import cursor_page
from telethon import TelegramClient, errors, utils
from telethon.crypto import AuthKey
from telethon.sessions import StringSession
@@ -280,81 +281,79 @@ class TelegramAdapter:
def get_effective_root(self, sub_path: str | None) -> str:
return ""
async def list_dir(self, root: str, rel: str, page_num: int = 1, page_size: int = 50, sort_by: str = "name", sort_order: str = "asc") -> Tuple[List[Dict], int]:
async def list_dir(
self,
root: str,
rel: str,
page_num: int = 1,
page_size: int = 50,
sort_by: str = "name",
sort_order: str = "asc",
cursor: str | None = None,
):
if rel:
return [], 0
return cursor_page([], page_size, cursor=cursor)
client = self._get_client()
entries = []
next_cursor = None
try:
await client.connect()
messages = await client.get_messages(self.chat_id, limit=200)
for message in messages:
if not message:
continue
offset_id = int(cursor) if cursor else 0
batch_limit = min(max(page_size, 50), 200)
while len(entries) < page_size:
messages = await client.get_messages(self.chat_id, limit=batch_limit, offset_id=offset_id)
if not messages:
next_cursor = None
break
media = message.document or message.video or message.photo
if not media:
continue
offset_id = messages[-1].id
next_cursor = str(offset_id)
for message in messages:
if not message:
continue
file_meta = message.file
if not file_meta:
continue
media = message.document or message.video or message.photo
if not media:
continue
filename = file_meta.name
if not filename:
if message.text and '.' in message.text and len(message.text) < 256 and '\n' not in message.text:
filename = message.text
else:
filename = f"unknown_{message.id}"
file_meta = message.file
if not file_meta:
continue
size = file_meta.size
if size is None:
# 兼容缺失 size 的情况
if hasattr(media, "size") and media.size is not None:
size = media.size
elif message.photo and getattr(message.photo, "sizes", None):
photo_size = message.photo.sizes[-1]
size = getattr(photo_size, "size", 0) or 0
else:
size = 0
filename = file_meta.name
if not filename:
if message.text and '.' in message.text and len(message.text) < 256 and '\n' not in message.text:
filename = message.text
else:
filename = f"unknown_{message.id}"
entries.append({
"name": f"{message.id}_{filename}",
"is_dir": False,
"size": size,
"mtime": int(message.date.timestamp()),
"type": "file",
"has_thumbnail": False,
})
size = file_meta.size
if size is None:
# 兼容缺失 size 的情况
if hasattr(media, "size") and media.size is not None:
size = media.size
elif message.photo and getattr(message.photo, "sizes", None):
photo_size = message.photo.sizes[-1]
size = getattr(photo_size, "size", 0) or 0
else:
size = 0
entries.append({
"name": f"{message.id}_{filename}",
"is_dir": False,
"size": size,
"mtime": int(message.date.timestamp()),
"type": "file",
"has_thumbnail": False,
})
if len(entries) >= page_size:
break
finally:
if client.is_connected():
await client.disconnect()
# 排序
reverse = sort_order.lower() == "desc"
def get_sort_key(item):
key = (not item["is_dir"],)
sort_field = sort_by.lower()
if sort_field == "name":
key += (item["name"].lower(),)
elif sort_field == "size":
key += (item["size"],)
elif sort_field == "mtime":
key += (item["mtime"],)
else:
key += (item["name"].lower(),)
return key
entries.sort(key=get_sort_key, reverse=reverse)
total_count = len(entries)
# 分页
start_idx = (page_num - 1) * page_size
end_idx = start_idx + page_size
page_entries = entries[start_idx:end_idx]
return page_entries, total_count
return cursor_page(entries, page_size, cursor=cursor, next_cursor=next_cursor)
async def read_file(self, root: str, rel: str) -> bytes:
message_id = self._parse_message_id(rel)

View File

@@ -183,9 +183,10 @@ async def browse_fs(
page_size: int = Query(50, ge=1, le=500, description="每页条数"),
sort_by: str = Query("name", description="按字段排序: name, size, mtime"),
sort_order: str = Query("asc", description="排序顺序: asc, desc"),
cursor: str | None = Query(None, description="游标分页位置"),
):
data = await VirtualFSService.list_directory_with_permission(
full_path, current_user.id, page_num, page_size, sort_by, sort_order
full_path, current_user.id, page_num, page_size, sort_by, sort_order, cursor
)
return success(data)
@@ -211,9 +212,10 @@ async def root_listing(
page_size: int = Query(50, ge=1, le=500, description="每页条数"),
sort_by: str = Query("name", description="按字段排序: name, size, mtime"),
sort_order: str = Query("asc", description="排序顺序: asc, desc"),
cursor: str | None = Query(None, description="游标分页位置"),
):
# 根目录不需要权限检查,但需要过滤无权限的子目录
data = await VirtualFSService.list_directory_with_permission(
"/", current_user.id, page_num, page_size, sort_by, sort_order
"/", current_user.id, page_num, page_size, sort_by, sort_order, cursor
)
return success(data)

View File

@@ -57,6 +57,7 @@ class VirtualFSListingMixin(VirtualFSResolverMixin):
page_size: int = 50,
sort_by: str = "name",
sort_order: str = "asc",
cursor: str | None = None,
) -> Dict:
norm = cls._normalize_path(path).rstrip("/") or "/"
adapters = await StorageAdapter.filter(enabled=True)
@@ -119,12 +120,28 @@ class VirtualFSListingMixin(VirtualFSResolverMixin):
adapter_entries_for_merge: List[Dict] = []
adapter_entries_page: List[Dict] | None = None
adapter_total: int | None = None
adapter_listing: Dict[str, Any] | None = None
if adapter_model and adapter_instance:
list_dir = getattr(adapter_instance, "list_dir", None)
if callable(list_dir):
adapter_entries_page, adapter_total = await list_dir(
effective_root, rel, page_num, page_size, sort_by, sort_order
)
try:
parameters = inspect.signature(list_dir).parameters
except (TypeError, ValueError):
parameters = {}
if "cursor" in parameters:
raw_listing = await list_dir(
effective_root, rel, page_num, page_size, sort_by, sort_order, cursor=cursor
)
else:
raw_listing = await list_dir(
effective_root, rel, page_num, page_size, sort_by, sort_order
)
if isinstance(raw_listing, dict):
adapter_listing = raw_listing
adapter_entries_page = raw_listing.get("items", [])
adapter_total = raw_listing.get("total")
else:
adapter_entries_page, adapter_total = raw_listing
if rel:
parent_rel = cls._parent_rel(rel)
if rel:
@@ -189,6 +206,9 @@ class VirtualFSListingMixin(VirtualFSResolverMixin):
annotate_entry_list = adapter_entries_page or []
for ent in annotate_entry_list:
annotate_entry(ent)
if adapter_listing and adapter_listing.get("pagination_mode") == "cursor":
adapter_listing["items"] = annotate_entry_list
return adapter_listing
return page(adapter_entries_page, adapter_total, page_num, page_size)
@classmethod
@@ -296,13 +316,14 @@ class VirtualFSListingMixin(VirtualFSResolverMixin):
page_size: int = 50,
sort_by: str = "name",
sort_order: str = "asc",
cursor: str | None = None,
) -> Dict:
"""
带权限过滤的目录列表
过滤掉用户没有读取权限的条目
"""
result = await cls.list_virtual_dir(path, page_num, page_size, sort_by, sort_order)
result = await cls.list_virtual_dir(path, page_num, page_size, sort_by, sort_order, cursor)
items = result.get("items", [])
if not items:
return result

View File

@@ -275,15 +275,30 @@ class VirtualFSRouteMixin(VirtualFSTempLinkMixin):
async def list_directory(cls, full_path: str, page_num: int, page_size: int, sort_by: str, sort_order: str):
full_path = cls._normalize_path(full_path)
result = await cls.list_virtual_dir(full_path, page_num, page_size, sort_by, sort_order)
pagination = {
"mode": result.get("pagination_mode", "paged"),
"page_size": result.get("page_size", page_size),
}
if pagination["mode"] == "cursor":
pagination.update(
{
"cursor": result.get("cursor"),
"next_cursor": result.get("next_cursor"),
"has_next": bool(result.get("has_next")),
}
)
else:
pagination.update(
{
"total": result["total"],
"page": result["page"],
"pages": result["pages"],
}
)
return {
"path": full_path,
"entries": result["items"],
"pagination": {
"total": result["total"],
"page": result["page"],
"page_size": result["page_size"],
"pages": result["pages"],
},
"pagination": pagination,
}
@classmethod

View File

@@ -26,9 +26,10 @@ class VirtualFSService(
page_size: int = 50,
sort_by: str = "name",
sort_order: str = "asc",
cursor: str | None = None,
):
"""列出目录内容"""
return await cls.list_virtual_dir(path, page_num, page_size, sort_by, sort_order)
return await cls.list_virtual_dir(path, page_num, page_size, sort_by, sort_order, cursor)
@classmethod
async def list_directory_with_permission(
@@ -39,19 +40,35 @@ class VirtualFSService(
page_size: int = 50,
sort_by: str = "name",
sort_order: str = "asc",
cursor: str | None = None,
):
"""列出目录内容(带权限过滤)"""
full_path = cls._normalize_path(path).rstrip("/") or "/"
result = await cls.list_virtual_dir_with_permission(
full_path, user_id, page_num, page_size, sort_by, sort_order
full_path, user_id, page_num, page_size, sort_by, sort_order, cursor
)
pagination = {
"mode": result.get("pagination_mode", "paged") if isinstance(result, dict) else "paged",
"page_size": result.get("page_size", page_size) if isinstance(result, dict) else page_size,
}
if pagination["mode"] == "cursor":
pagination.update(
{
"cursor": result.get("cursor") if isinstance(result, dict) else cursor,
"next_cursor": result.get("next_cursor") if isinstance(result, dict) else None,
"has_next": bool(result.get("has_next")) if isinstance(result, dict) else False,
}
)
else:
pagination.update(
{
"total": result.get("total", 0) if isinstance(result, dict) else 0,
"page": result.get("page", page_num) if isinstance(result, dict) else page_num,
"pages": result.get("pages", 0) if isinstance(result, dict) else 0,
}
)
return {
"path": full_path,
"entries": result.get("items", []) if isinstance(result, dict) else [],
"pagination": {
"total": result.get("total", 0) if isinstance(result, dict) else 0,
"page": result.get("page", page_num) if isinstance(result, dict) else page_num,
"page_size": result.get("page_size", page_size) if isinstance(result, dict) else page_size,
"pages": result.get("pages", 0) if isinstance(result, dict) else 0,
},
"pagination": pagination,
}