Skip to content

Commit d29473e

Browse files
committed
update dictionary
1 parent 7618fd6 commit d29473e

File tree

3 files changed

+62
-112
lines changed

3 files changed

+62
-112
lines changed

internal/logic/update_embedding.go

Lines changed: 11 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,11 @@ package logic
22

33
import (
44
"context"
5-
"errors"
65
"fmt"
7-
"strings"
86

97
"github.com/zeromicro/go-zero/core/logx"
10-
"github.com/zgsm-ai/codebase-indexer/internal/dao/model"
11-
"github.com/zgsm-ai/codebase-indexer/internal/errs"
12-
"github.com/zgsm-ai/codebase-indexer/internal/store/vector"
138
"github.com/zgsm-ai/codebase-indexer/internal/svc"
149
"github.com/zgsm-ai/codebase-indexer/internal/types"
15-
"gorm.io/gorm"
1610
)
1711

1812
type UpdateEmbeddingLogic struct {
@@ -30,125 +24,30 @@ func NewUpdateEmbeddingLogic(ctx context.Context, svcCtx *svc.ServiceContext) *U
3024
}
3125

3226
func (l *UpdateEmbeddingLogic) UpdateEmbeddingPath(req *types.UpdateEmbeddingPathRequest) (resp *types.UpdateEmbeddingPathResponseData, err error) {
33-
clientId := req.ClientId
3427
codebasePath := req.CodebasePath
3528
oldPath := req.OldPath
3629
newPath := req.NewPath
3730

38-
// 查找代码库记录
39-
codebase, err := l.svcCtx.Querier.Codebase.FindByClientIdAndPath(l.ctx, clientId, codebasePath)
40-
if errors.Is(err, gorm.ErrRecordNotFound) {
41-
return nil, errs.NewRecordNotFoundErr(types.NameCodeBase, fmt.Sprintf("client_id: %s, codebasePath: %s", clientId, codebasePath))
42-
}
43-
if err != nil {
44-
return nil, err
45-
}
46-
47-
// 检查是否是目录
48-
// fullOldPath := filepath.Join(codebasePath, oldPath)
49-
// info, err := os.Stat(fullOldPath)
50-
// if err != nil {
51-
// return nil, fmt.Errorf("failed to stat path %s: %w", fullOldPath, err)
52-
// }
53-
5431
var modifiedFiles []string
5532

56-
// if info.IsDir() {
57-
// // 处理目录情况
58-
// modifiedFiles, err = l.updateDirectoryPaths(codebase, oldPath, newPath)
59-
// if err != nil {
60-
// return nil, fmt.Errorf("failed to update directory paths: %w", err)
61-
// }
62-
// } else {
63-
// // 处理文件情况
64-
modifiedFiles, err = l.updateFilePath(codebase, oldPath, newPath)
33+
// 处理目录情况,使用 UpdateCodeChunksDictionary 接口
34+
err = l.svcCtx.VectorStore.UpdateCodeChunksDictionary(l.ctx, codebasePath, oldPath, newPath)
6535
if err != nil {
66-
return nil, fmt.Errorf("failed to update file path: %w", err)
67-
}
68-
// }
69-
70-
return &types.UpdateEmbeddingPathResponseData{
71-
ModifiedFiles: modifiedFiles,
72-
TotalFiles: len(modifiedFiles),
73-
}, nil
74-
}
75-
76-
func (l *UpdateEmbeddingLogic) updateDirectoryPaths(codebase *model.Codebase, oldDirPath, newDirPath string) ([]string, error) {
77-
// 获取该目录下所有的文件路径
78-
records, err := l.svcCtx.VectorStore.GetCodebaseRecords(l.ctx, codebase.ID, codebase.Path)
79-
if err != nil {
80-
return nil, fmt.Errorf("failed to get codebase records: %w", err)
81-
}
82-
83-
var modifiedFiles []string
84-
var pathUpdates []*types.CodeChunkPathUpdate
85-
86-
for _, record := range records {
87-
// 检查文件路径是否以旧目录路径开头
88-
if strings.HasPrefix(record.FilePath, oldDirPath) {
89-
// 构建新的文件路径
90-
newFilePath := strings.Replace(record.FilePath, oldDirPath, newDirPath, 1)
91-
92-
// 创建路径更新请求
93-
pathUpdate := &types.CodeChunkPathUpdate{
94-
CodebaseId: codebase.ID,
95-
OldFilePath: record.FilePath,
96-
NewFilePath: newFilePath,
97-
}
98-
99-
pathUpdates = append(pathUpdates, pathUpdate)
100-
modifiedFiles = append(modifiedFiles, newFilePath)
101-
}
36+
return nil, fmt.Errorf("failed to update directory paths: %w", err)
10237
}
10338

104-
if len(pathUpdates) > 0 {
105-
// 使用新的直接更新路径的方法,而不是删除再插入
106-
err = l.svcCtx.VectorStore.UpdateCodeChunksPaths(l.ctx, pathUpdates, vector.Options{
107-
CodebaseId: codebase.ID,
108-
CodebasePath: codebase.Path,
109-
})
110-
if err != nil {
111-
return nil, fmt.Errorf("failed to update chunk paths: %w", err)
112-
}
113-
}
114-
115-
return modifiedFiles, nil
116-
}
117-
118-
func (l *UpdateEmbeddingLogic) updateFilePath(codebase *model.Codebase, oldFilePath, newFilePath string) ([]string, error) {
119-
// 获取该文件的记录
120-
records, err := l.svcCtx.VectorStore.GetCodebaseRecords(l.ctx, codebase.ID, codebase.Path)
39+
// 获取更新后的记录以返回修改的文件列表
40+
records, err := l.svcCtx.VectorStore.GetDictionaryRecords(l.ctx, codebasePath, newPath)
12141
if err != nil {
122-
return nil, fmt.Errorf("failed to get codebase records: %w", err)
42+
return nil, fmt.Errorf("failed to get updated dictionary records: %w", err)
12343
}
12444

125-
var modifiedFiles []string
126-
127-
// 检查是否有需要更新的记录
12845
for _, record := range records {
129-
if record.FilePath == oldFilePath {
130-
modifiedFiles = append(modifiedFiles, newFilePath)
131-
}
132-
}
133-
134-
if len(modifiedFiles) > 0 {
135-
// 使用直接更新路径的方法
136-
pathUpdates := []*types.CodeChunkPathUpdate{
137-
{
138-
CodebaseId: codebase.ID,
139-
OldFilePath: oldFilePath,
140-
NewFilePath: newFilePath,
141-
},
142-
}
143-
144-
err = l.svcCtx.VectorStore.UpdateCodeChunksPaths(l.ctx, pathUpdates, vector.Options{
145-
CodebaseId: codebase.ID,
146-
CodebasePath: codebase.Path,
147-
})
148-
if err != nil {
149-
return nil, fmt.Errorf("failed to update chunk paths: %w", err)
150-
}
46+
modifiedFiles = append(modifiedFiles, record.FilePath)
15147
}
15248

153-
return modifiedFiles, nil
49+
return &types.UpdateEmbeddingPathResponseData{
50+
ModifiedFiles: modifiedFiles,
51+
TotalFiles: len(modifiedFiles),
52+
}, nil
15453
}

internal/store/vector/vector_store.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ type Store interface {
2121
UpsertCodeChunks(ctx context.Context, chunks []*types.CodeChunk, options Options) error
2222
DeleteCodeChunks(ctx context.Context, chunks []*types.CodeChunk, options Options) error
2323
UpdateCodeChunksPaths(ctx context.Context, updates []*types.CodeChunkPathUpdate, options Options) error
24+
UpdateCodeChunksDictionary(ctx context.Context, codebasePath string, dictionary string, newDictionary string) error
2425
Query(ctx context.Context, query string, topK int, options Options) ([]*types.SemanticFileItem, error)
2526
Close()
2627
}

internal/store/vector/weaviate_wrapper.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1422,3 +1422,53 @@ func (r *weaviateWrapper) getRecordsByPathPrefix(ctx context.Context, pathPrefix
14221422
// 解析响应获取记录
14231423
return r.unmarshalRecordsResponse(res)
14241424
}
1425+
1426+
// UpdateCodeChunksDictionary 更新代码块的目录路径,通过匹配filePath的前缀
1427+
func (r *weaviateWrapper) UpdateCodeChunksDictionary(ctx context.Context, codebasePath string, dictionary string, newDictionary string) error {
1428+
// 生成租户名称
1429+
tenantName, err := r.generateTenantName(codebasePath)
1430+
if err != nil {
1431+
return fmt.Errorf("failed to generate tenant name: %w", err)
1432+
}
1433+
1434+
// 确保路径前缀以/结尾,以便正确匹配子目录和文件
1435+
if dictionary != "" && !strings.HasSuffix(dictionary, "/") {
1436+
dictionary += "/"
1437+
}
1438+
if newDictionary != "" && !strings.HasSuffix(newDictionary, "/") {
1439+
newDictionary += "/"
1440+
}
1441+
1442+
// 获取所有匹配原目录前缀的记录
1443+
records, err := r.getRecordsByPathPrefix(ctx, dictionary, tenantName)
1444+
if err != nil {
1445+
return fmt.Errorf("failed to get records by path prefix: %w", err)
1446+
}
1447+
1448+
if len(records) == 0 {
1449+
// 没有找到需要更新的记录
1450+
return nil
1451+
}
1452+
1453+
// 批量更新记录路径
1454+
for _, record := range records {
1455+
// 构建新的文件路径:将原目录前缀替换为新目录前缀
1456+
newFilePath := strings.Replace(record.FilePath, dictionary, newDictionary, 1)
1457+
1458+
// 获取对象的ID
1459+
objectIds, err := r.getObjectIdsByPath(ctx, record.CodebaseId, record.FilePath, tenantName)
1460+
if err != nil {
1461+
return fmt.Errorf("failed to get object ids by path: %w", err)
1462+
}
1463+
1464+
// 更新每个对象的路径
1465+
for _, objectId := range objectIds {
1466+
err = r.updateObjectPath(ctx, objectId, newFilePath, tenantName, record)
1467+
if err != nil {
1468+
return fmt.Errorf("failed to update object path: %w", err)
1469+
}
1470+
}
1471+
}
1472+
1473+
return nil
1474+
}

0 commit comments

Comments
 (0)