From 32af3720e32eda7f427827ba55fee3f211003690 Mon Sep 17 00:00:00 2001 From: ShiYu Date: Sat, 31 May 2025 00:33:37 +0800 Subject: [PATCH] feat(vector-db): integrate vector database for image search - Replace pgvector with Microsoft Vector DB for image vector storage and search - Update Picture model to use float[] instead of Vector type - Modify PictureService to use VectorDbService for vector search - Remove vector-related code from MyDbContext - Add PictureVector model for Vector DB integration --- Extensions/ServiceCollectionExtensions.cs | 4 +- Foxel.csproj | 5 +- Models/DataBase/Picture.cs | 11 +- Models/Vector/PictureVector.cs | 12 ++ MyDbContext.cs | 11 -- Program.cs | 4 +- Services/Background/BackgroundTaskQueue.cs | 26 +++- Services/Media/PictureService.cs | 145 ++++++++------------- 8 files changed, 98 insertions(+), 120 deletions(-) create mode 100644 Models/Vector/PictureVector.cs diff --git a/Extensions/ServiceCollectionExtensions.cs b/Extensions/ServiceCollectionExtensions.cs index d7fc7fe..9d91190 100644 --- a/Extensions/ServiceCollectionExtensions.cs +++ b/Extensions/ServiceCollectionExtensions.cs @@ -4,8 +4,6 @@ using Microsoft.AspNetCore.Authentication.Cookies; using Microsoft.EntityFrameworkCore; using Microsoft.IdentityModel.Tokens; using System.Text; -using Foxel.Services.Attributes; -using System.Reflection; using Foxel.Services.AI; using Foxel.Services.Auth; using Foxel.Services.Background; @@ -48,7 +46,7 @@ public static class ServiceCollectionExtensions Console.WriteLine($"数据库连接: {connectionString}"); services.AddDbContextFactory(options => - options.UseNpgsql(connectionString, o => o.UseVector())); + options.UseNpgsql(connectionString)); } public static void AddApplicationOpenApi(this IServiceCollection services) diff --git a/Foxel.csproj b/Foxel.csproj index ca26b5c..654a441 100644 --- a/Foxel.csproj +++ b/Foxel.csproj @@ -16,10 +16,9 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive + - - - + diff --git a/Models/DataBase/Picture.cs b/Models/DataBase/Picture.cs index 6b09011..3f7eb18 100644 --- a/Models/DataBase/Picture.cs +++ b/Models/DataBase/Picture.cs @@ -2,7 +2,6 @@ using System.ComponentModel.DataAnnotations; using System.ComponentModel.DataAnnotations.Schema; using System.Text.Json; using Foxel.Services.Attributes; -using Vector = Pgvector.Vector; namespace Foxel.Models.DataBase; @@ -15,7 +14,7 @@ public class Picture : BaseModel [StringLength(1024)] public string? ThumbnailPath { get; set; } = string.Empty; [StringLength(2000)] public string Description { get; set; } = string.Empty; - [Column(TypeName = "vector(1024)")] public Vector? Embedding { get; set; } + public float[]? Embedding { get; set; } public DateTime? TakenAt { get; set; } @@ -57,8 +56,8 @@ public enum PermissionType public enum ProcessingStatus { - Pending, // 等待处理 - Processing, // 处理中 - Completed, // 处理完成 - Failed // 处理失败 + Pending, // 等待处理 + Processing, // 处理中 + Completed, // 处理完成 + Failed // 处理失败 } \ No newline at end of file diff --git a/Models/Vector/PictureVector.cs b/Models/Vector/PictureVector.cs new file mode 100644 index 0000000..61d4e7a --- /dev/null +++ b/Models/Vector/PictureVector.cs @@ -0,0 +1,12 @@ +namespace Foxel.Models.Vector; + +using Microsoft.Extensions.VectorData; + +public class PictureVector +{ + [VectorStoreKey] public int Id { get; set; } + [VectorStoreData] public string? Name { get; set; } + + [VectorStoreVector(Dimensions: 1024, DistanceFunction = DistanceFunction.CosineSimilarity)] + public ReadOnlyMemory? Embedding { get; set; } +} \ No newline at end of file diff --git a/MyDbContext.cs b/MyDbContext.cs index 14bec42..1c1f60d 100644 --- a/MyDbContext.cs +++ b/MyDbContext.cs @@ -5,17 +5,6 @@ namespace Foxel; public class MyDbContext(DbContextOptions options) : DbContext(options) { - protected override void OnModelCreating(ModelBuilder modelBuilder) - { - modelBuilder.HasPostgresExtension("vector"); - - modelBuilder.Entity() - .HasIndex(p => p.Embedding) - .HasMethod("ivfflat") - .HasOperators("vector_cosine_ops") - .HasStorageParameter("lists", 100); - } - public DbSet Pictures { get; set; } = null!; public DbSet Users { get; set; } = null!; public DbSet Tags { get; set; } = null!; diff --git a/Program.cs b/Program.cs index dccc7d4..1f078ca 100644 --- a/Program.cs +++ b/Program.cs @@ -1,8 +1,7 @@ -using Foxel; using Foxel.Extensions; using Foxel.Services.Initializer; +using Foxel.Services.VectorDB; using Microsoft.AspNetCore.HttpOverrides; -using Microsoft.EntityFrameworkCore; var builder = WebApplication.CreateBuilder(args); var environment = builder.Environment; @@ -17,6 +16,7 @@ builder.Services.AddHttpContextAccessor(); builder.Services.AddApplicationAuthentication(); builder.Services.AddApplicationAuthorization(); builder.Services.AddApplicationCors(); +builder.Services.AddSingleton(); builder.Services.Configure(options => { options.ForwardedHeaders = ForwardedHeaders.XForwardedFor | ForwardedHeaders.XForwardedProto; diff --git a/Services/Background/BackgroundTaskQueue.cs b/Services/Background/BackgroundTaskQueue.cs index 7bb3251..5037402 100644 --- a/Services/Background/BackgroundTaskQueue.cs +++ b/Services/Background/BackgroundTaskQueue.cs @@ -209,7 +209,7 @@ public sealed class BackgroundTaskQueue : IBackgroundTaskQueue, IDisposable // 1. 获取图片信息 await UpdatePictureStatus(task.PictureId, ProcessingStatus.Processing, 10); - + if (picture == null) { throw new Exception($"找不到ID为{task.PictureId}的图片"); @@ -225,7 +225,7 @@ public sealed class BackgroundTaskQueue : IBackgroundTaskQueue, IDisposable { // 非本地存储需要先下载文件 await UpdatePictureStatus(task.PictureId, ProcessingStatus.Processing, 15); - localFilePath = await storageService.ExecuteAsync(picture.StorageType, + localFilePath = await storageService.ExecuteAsync(picture.StorageType, provider => provider.DownloadFileAsync(picture.Path)); isTempFile = true; } @@ -245,11 +245,12 @@ public sealed class BackgroundTaskQueue : IBackgroundTaskQueue, IDisposable // 更新缩略图路径到数据库 await UpdatePictureStatus(task.PictureId, ProcessingStatus.Processing, 25); - + if (picture.StorageType == StorageType.Local) { // 本地存储缩略图 - var relativeThumbnailPath = $"/Uploads/{Path.GetRelativePath("Uploads", Path.GetDirectoryName(thumbnailPath)!)}/{Path.GetFileName(thumbnailPath)}"; + var relativeThumbnailPath = + $"/Uploads/{Path.GetRelativePath("Uploads", Path.GetDirectoryName(thumbnailPath)!)}/{Path.GetFileName(thumbnailPath)}"; picture.ThumbnailPath = relativeThumbnailPath.Replace('\\', '/'); } else @@ -275,7 +276,7 @@ public sealed class BackgroundTaskQueue : IBackgroundTaskQueue, IDisposable // 4. 从EXIF中提取拍摄时间并确保是UTC格式 picture.TakenAt = ImageHelper.ParseExifDateTime(exifInfo.DateTimeOriginal); - + // 保存缩略图和EXIF信息的更改,确保这些基本信息即使在后续步骤失败时也能保存 await dbContext.SaveChangesAsync(); @@ -300,7 +301,18 @@ public sealed class BackgroundTaskQueue : IBackgroundTaskQueue, IDisposable await UpdatePictureStatus(task.PictureId, ProcessingStatus.Processing, 60); var combinedText = $"{finalTitle}. {finalDescription}"; var embedding = await aiService.GetEmbeddingAsync(combinedText); - picture.Embedding = new Pgvector.Vector(embedding); + picture.Embedding = embedding; + if (picture.UserId.HasValue && embedding.Length > 0) + { + var vectorDbService = scope.ServiceProvider.GetRequiredService(); + var pictureVector = new Foxel.Models.Vector.PictureVector + { + Id = picture.Id, + Name = picture.Name, + Embedding = embedding + }; + await vectorDbService.AddPictureToUserCollectionAsync(picture.UserId.Value, pictureVector); + } // 8. 获取所有可用标签名称 await UpdatePictureStatus(task.PictureId, ProcessingStatus.Processing, 70); @@ -367,7 +379,7 @@ public sealed class BackgroundTaskQueue : IBackgroundTaskQueue, IDisposable { picture.ProcessingStatus = ProcessingStatus.Failed; picture.ProcessingError = ex.Message; - + try { await dbContext.SaveChangesAsync(); diff --git a/Services/Media/PictureService.cs b/Services/Media/PictureService.cs index c63cebf..07ed9cb 100644 --- a/Services/Media/PictureService.cs +++ b/Services/Media/PictureService.cs @@ -8,10 +8,9 @@ using Foxel.Services.Attributes; using Foxel.Services.Background; using Foxel.Services.Configuration; using Foxel.Services.Storage; +using Foxel.Services.VectorDB; using Foxel.Utils; using Microsoft.EntityFrameworkCore; -using Pgvector; -using Pgvector.EntityFrameworkCore; namespace Foxel.Services.Media; @@ -20,6 +19,7 @@ public class PictureService( IAiService embeddingService, IConfigService configuration, IBackgroundTaskQueue backgroundTaskQueue, + VectorDbService vectorDbService, IStorageService storageService) : IPictureService { @@ -61,7 +61,7 @@ public class PictureService( { // 如果向量搜索失败,记录错误并回退到标准搜索 Console.WriteLine($"向量搜索失败,回退到标准搜索: {ex.Message}"); - + // 如果是明确的配置错误,则向上抛出异常 if (ex.Message.Contains("请检查嵌入模型配置")) { @@ -69,7 +69,7 @@ public class PictureService( } } } - + // 执行标准搜索(作为默认方法或向量搜索的回退选项) return await PerformStandardSearchAsync( dbContext, page, pageSize, searchQuery, tags, @@ -95,78 +95,41 @@ public class PictureService( int? ownerId, bool includeAllPublic) { - try + var queryEmbedding = await embeddingService.GetEmbeddingAsync(searchQuery); + var res = await vectorDbService.SearchAsync(queryEmbedding, userId); + + var ids = res.Select(r => r.Id).ToList(); + var picturesData = await dbContext.Pictures + .Include(p => p.Tags) + .Include(p => p.User) + .Where(p => ids.Contains(p.Id)) + .ToListAsync(); + var picturesOrdered = ids + .Select(id => picturesData.FirstOrDefault(p => p.Id == id)) + .Where(p => p != null) + .ToList(); + var paginatedResults = picturesOrdered + .Skip((page - 1) * pageSize) + .Take(pageSize) + .Select(p => MapPictureToResponse(p!)) + .ToList(); + + var totalCount = picturesOrdered.Count; + + await PopulateFavoriteInfo(dbContext, paginatedResults, userId); + + if (userId.HasValue) { - float[]? queryEmbedding = null; - try - { - queryEmbedding = await embeddingService.GetEmbeddingAsync(searchQuery); - - // 检查嵌入向量是否有效 - if (queryEmbedding == null || queryEmbedding.Length == 0) - { - throw new InvalidOperationException("嵌入模型返回了空向量"); - } - } - catch (Exception ex) - { - throw new InvalidOperationException($"向量搜索失败,请检查嵌入模型配置: {ex.Message}", ex); - } - - var queryVector = new Vector(queryEmbedding); - - // 构建基础查询 - var query = dbContext.Pictures - .Include(p => p.Tags) - .Include(p => p.User) - .Where(p => p.Embedding != null); - - // 应用共通的查询条件 - query = ApplyCommonFilters(query, tags, startDate, endDate, userId, onlyWithGps, - excludeAlbumId, albumId, onlyFavorites, ownerId, includeAllPublic); - - // 执行向量搜索 - var allResults = await query - .Select(p => new - { - Picture = p, - Similarity = 1.0 - p.Embedding!.CosineDistance(queryVector) - }) - .Where(p => p.Similarity >= similarityThreshold) - .OrderByDescending(p => p.Similarity) - .ToListAsync(); - - // 计算总数并分页 - var totalCount = allResults.Count; - - var paginatedResults = allResults - .Skip((page - 1) * pageSize) - .Take(pageSize) - .Select(r => MapPictureToResponse(r.Picture)) - .ToList(); - - // 处理收藏信息 - await PopulateFavoriteInfo(dbContext, paginatedResults, userId); - - // 为当前用户的图片添加相册信息 - if (userId.HasValue) - { - await PopulateAlbumInfo(dbContext, paginatedResults, userId.Value); - } - - return new PaginatedResult - { - Data = paginatedResults, - Page = page, - PageSize = pageSize, - TotalCount = totalCount - }; + await PopulateAlbumInfo(dbContext, paginatedResults, userId.Value); } - catch (Exception ex) + + return new PaginatedResult { - Console.WriteLine($"向量搜索失败: {ex.Message}"); - throw new InvalidOperationException($"向量搜索失败: {ex.Message}", ex); - } + Data = paginatedResults, + Page = page, + PageSize = pageSize, + TotalCount = totalCount + }; } // 执行标准搜索 @@ -377,9 +340,9 @@ public class PictureService( { Id = picture.Id, Name = picture.Name, - Path = storageService.ExecuteAsync(picture.StorageType, provider => + Path = storageService.ExecuteAsync(picture.StorageType, provider => Task.FromResult(provider.GetUrl(picture.Path ?? string.Empty))).Result, - ThumbnailPath = storageService.ExecuteAsync(picture.StorageType, provider => + ThumbnailPath = storageService.ExecuteAsync(picture.StorageType, provider => Task.FromResult(provider.GetUrl(picture.ThumbnailPath ?? string.Empty))).Result, Description = picture.Description, CreatedAt = picture.CreatedAt, @@ -482,8 +445,8 @@ public class PictureService( string? configValue = configuration[configKey]; return !string.IsNullOrEmpty(configValue) && Enum.TryParse(configValue, out var configStorageType) - ? configStorageType - : StorageType.Local; + ? configStorageType + : StorageType.Local; } if (userId == null) @@ -494,6 +457,7 @@ public class PictureService( { storageType = GetConfigStorageType("Storage:DefaultStorage"); } + ImageFormat convertToFormat = ImageFormat.Original; string defaultFormatConfig = configuration["Upload:DefaultImageFormat"]; if (!string.IsNullOrEmpty(defaultFormatConfig)) @@ -503,12 +467,14 @@ public class PictureService( convertToFormat = parsedFormat; } } + int quality = 100; string defaultQualityConfig = configuration["Upload:DefaultImageQuality"]; if (!string.IsNullOrEmpty(defaultQualityConfig)) { quality = int.Parse(defaultQualityConfig); } + string originalFileName = fileName; string finalFileName = fileName; string finalContentType = contentType; @@ -553,7 +519,7 @@ public class PictureService( try { // 使用存储服务保存文件 - string relativePath = await storageService.ExecuteAsync(storageType.Value, + string relativePath = await storageService.ExecuteAsync(storageType.Value, provider => provider.SaveAsync(finalStream, finalFileName, finalContentType)); // 创建基本的Picture对象,使用文件名作为标题和描述 @@ -620,10 +586,12 @@ public class PictureService( { Id = picture.Id, Name = picture.Name, - Path = await storageService.ExecuteAsync(picture.StorageType, provider => + Path = await storageService.ExecuteAsync(picture.StorageType, provider => Task.FromResult(provider.GetUrl(relativePath))), - ThumbnailPath = isAnonymous ? await storageService.ExecuteAsync(picture.StorageType, provider => - Task.FromResult(provider.GetUrl(relativePath))) : null, + ThumbnailPath = isAnonymous + ? await storageService.ExecuteAsync(picture.StorageType, provider => + Task.FromResult(provider.GetUrl(relativePath))) + : null, Description = picture.Description, CreatedAt = picture.CreatedAt, Tags = new List(), @@ -700,7 +668,8 @@ public class PictureService( new List<(int PictureId, string Path, string ThumbnailPath, int? UserId, StorageType StorageType)>(); foreach (var picture in picturesToDelete) { - filesToDelete.Add((picture.Id, picture.Path, picture.ThumbnailPath ?? string.Empty, picture.User?.Id, picture.StorageType)); + filesToDelete.Add((picture.Id, picture.Path, picture.ThumbnailPath ?? string.Empty, picture.User?.Id, + picture.StorageType)); } if (picturesToDelete.Any()) @@ -718,13 +687,13 @@ public class PictureService( try { // 使用存储服务删除文件 - await storageService.ExecuteAsync(storageType, + await storageService.ExecuteAsync(storageType, provider => provider.DeleteAsync(path)); // 删除缩略图 if (!string.IsNullOrEmpty(thumbnailPath)) { - await storageService.ExecuteAsync(storageType, + await storageService.ExecuteAsync(storageType, provider => provider.DeleteAsync(thumbnailPath)); } } @@ -780,11 +749,11 @@ public class PictureService( { var combinedText = $"{picture.Name}. {picture.Description}"; var embedding = await embeddingService.GetEmbeddingAsync(combinedText); - + // 只有在成功获取到非空嵌入向量时才更新 if (embedding != null && embedding.Length > 0) { - picture.Embedding = new Vector(embedding); + picture.Embedding = embedding; } else { @@ -826,9 +795,9 @@ public class PictureService( { Id = picture.Id, Name = picture.Name, - Path = await storageService.ExecuteAsync(picture.StorageType, provider => + Path = await storageService.ExecuteAsync(picture.StorageType, provider => Task.FromResult(provider.GetUrl(picture.Path ?? string.Empty))), - ThumbnailPath = await storageService.ExecuteAsync(picture.StorageType, provider => + ThumbnailPath = await storageService.ExecuteAsync(picture.StorageType, provider => Task.FromResult(provider.GetUrl(picture.ThumbnailPath ?? string.Empty))), Description = picture.Description, CreatedAt = picture.CreatedAt,