|
1 | 1 | # -*- coding: utf-8 -*- |
2 | 2 | import logging |
3 | 3 | import time |
| 4 | + |
4 | 5 | from modelcache import cache |
5 | 6 | from modelcache.utils.error import NotInitError |
6 | 7 | from modelcache.utils.time import time_cal |
7 | 8 | from modelcache.processor.pre import multi_analysis |
8 | 9 | from FlagEmbedding import FlagReranker |
| 10 | +from modelcache.manager.vector_data import manager |
9 | 11 |
|
10 | 12 | USE_RERANKER = False # 如果为 True 则启用 reranker,否则使用原有逻辑 |
11 | 13 |
|
@@ -44,39 +46,47 @@ def adapt_query(cache_data_convert, *args, **kwargs): |
44 | 46 | cache_answers = [] |
45 | 47 | cache_questions = [] |
46 | 48 | cache_ids = [] |
47 | | - similarity_threshold = chat_cache.config.similarity_threshold |
48 | | - similarity_threshold_long = chat_cache.config.similarity_threshold_long |
| 49 | + cosine_similarity = cache_data_list[0][0] |
49 | 50 |
|
50 | | - min_rank, max_rank = chat_cache.similarity_evaluation.range() |
51 | | - rank_threshold = (max_rank - min_rank) * similarity_threshold * cache_factor |
52 | | - rank_threshold_long = (max_rank - min_rank) * similarity_threshold_long * cache_factor |
53 | | - rank_threshold = ( |
54 | | - max_rank |
55 | | - if rank_threshold > max_rank |
56 | | - else min_rank |
57 | | - if rank_threshold < min_rank |
58 | | - else rank_threshold |
59 | | - ) |
60 | | - rank_threshold_long = ( |
61 | | - max_rank |
62 | | - if rank_threshold_long > max_rank |
63 | | - else min_rank |
64 | | - if rank_threshold_long < min_rank |
65 | | - else rank_threshold_long |
66 | | - ) |
67 | | - if cache_data_list is None or len(cache_data_list) == 0: |
68 | | - rank_pre = -1.0 |
| 51 | + if manager.MPNet_base: |
| 52 | + # This code uses the built-in cosine similarity evaluation in milvus |
| 53 | + if cosine_similarity < 0.9: |
| 54 | + return None |
69 | 55 | else: |
70 | | - cache_data_dict = {'search_result': cache_data_list[0]} |
71 | | - rank_pre = chat_cache.similarity_evaluation.evaluation( |
72 | | - None, |
73 | | - cache_data_dict, |
74 | | - extra_param=context.get("evaluation_func", None), |
| 56 | + ## this is the code that uses L2 for similarity evaluation |
| 57 | + similarity_threshold = chat_cache.config.similarity_threshold |
| 58 | + similarity_threshold_long = chat_cache.config.similarity_threshold_long |
| 59 | + |
| 60 | + min_rank, max_rank = chat_cache.similarity_evaluation.range() |
| 61 | + rank_threshold = (max_rank - min_rank) * similarity_threshold * cache_factor |
| 62 | + rank_threshold_long = (max_rank - min_rank) * similarity_threshold_long * cache_factor |
| 63 | + rank_threshold = ( |
| 64 | + max_rank |
| 65 | + if rank_threshold > max_rank |
| 66 | + else min_rank |
| 67 | + if rank_threshold < min_rank |
| 68 | + else rank_threshold |
| 69 | + ) |
| 70 | + rank_threshold_long = ( |
| 71 | + max_rank |
| 72 | + if rank_threshold_long > max_rank |
| 73 | + else min_rank |
| 74 | + if rank_threshold_long < min_rank |
| 75 | + else rank_threshold_long |
75 | 76 | ) |
76 | | - if rank_pre < rank_threshold: |
77 | | - return None |
| 77 | + if cache_data_list is None or len(cache_data_list) == 0: |
| 78 | + rank_pre = -1.0 |
| 79 | + else: |
| 80 | + cache_data_dict = {'search_result': cache_data_list[0]} |
| 81 | + rank_pre = chat_cache.similarity_evaluation.evaluation( |
| 82 | + None, |
| 83 | + cache_data_dict, |
| 84 | + extra_param=context.get("evaluation_func", None), |
| 85 | + ) |
| 86 | + if rank_pre < rank_threshold: |
| 87 | + return None |
78 | 88 |
|
79 | | - if USE_RERANKER: |
| 89 | + if USE_RERANKER and not manager.MPNet_base: |
80 | 90 | reranker = FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=False) |
81 | 91 | for cache_data in cache_data_list: |
82 | 92 | primary_id = cache_data[1] |
@@ -132,45 +142,50 @@ def adapt_query(cache_data_convert, *args, **kwargs): |
132 | 142 | if ret is None: |
133 | 143 | continue |
134 | 144 |
|
135 | | - if "deps" in context and hasattr(ret.question, "deps"): |
136 | | - eval_query_data = { |
137 | | - "question": context["deps"][0]["data"], |
138 | | - "embedding": None |
139 | | - } |
140 | | - eval_cache_data = { |
141 | | - "question": ret.question.deps[0].data, |
142 | | - "answer": ret.answers[0].answer, |
143 | | - "search_result": cache_data, |
144 | | - "embedding": None, |
145 | | - } |
| 145 | + if manager.MPNet_base: |
| 146 | + cache_answers.append((cosine_similarity, ret[1])) |
| 147 | + cache_questions.append((cosine_similarity, ret[0])) |
| 148 | + cache_ids.append((cosine_similarity, primary_id)) |
146 | 149 | else: |
147 | | - eval_query_data = { |
148 | | - "question": pre_embedding_data, |
149 | | - "embedding": embedding_data, |
150 | | - } |
| 150 | + if "deps" in context and hasattr(ret.question, "deps"): |
| 151 | + eval_query_data = { |
| 152 | + "question": context["deps"][0]["data"], |
| 153 | + "embedding": None |
| 154 | + } |
| 155 | + eval_cache_data = { |
| 156 | + "question": ret.question.deps[0].data, |
| 157 | + "answer": ret.answers[0].answer, |
| 158 | + "search_result": cache_data, |
| 159 | + "embedding": None, |
| 160 | + } |
| 161 | + else: |
| 162 | + eval_query_data = { |
| 163 | + "question": pre_embedding_data, |
| 164 | + "embedding": embedding_data, |
| 165 | + } |
151 | 166 |
|
152 | | - eval_cache_data = { |
153 | | - "question": ret[0], |
154 | | - "answer": ret[1], |
155 | | - "search_result": cache_data, |
156 | | - "embedding": None |
157 | | - } |
158 | | - rank = chat_cache.similarity_evaluation.evaluation( |
159 | | - eval_query_data, |
160 | | - eval_cache_data, |
161 | | - extra_param=context.get("evaluation_func", None), |
162 | | - ) |
| 167 | + eval_cache_data = { |
| 168 | + "question": ret[0], |
| 169 | + "answer": ret[1], |
| 170 | + "search_result": cache_data, |
| 171 | + "embedding": None |
| 172 | + } |
| 173 | + rank = chat_cache.similarity_evaluation.evaluation( |
| 174 | + eval_query_data, |
| 175 | + eval_cache_data, |
| 176 | + extra_param=context.get("evaluation_func", None), |
| 177 | + ) |
163 | 178 |
|
164 | | - if len(pre_embedding_data) <= 256: |
165 | | - if rank_threshold <= rank: |
166 | | - cache_answers.append((rank, ret[1])) |
167 | | - cache_questions.append((rank, ret[0])) |
168 | | - cache_ids.append((rank, primary_id)) |
169 | | - else: |
170 | | - if rank_threshold_long <= rank: |
171 | | - cache_answers.append((rank, ret[1])) |
172 | | - cache_questions.append((rank, ret[0])) |
173 | | - cache_ids.append((rank, primary_id)) |
| 179 | + if len(pre_embedding_data) <= 256: |
| 180 | + if rank_threshold <= rank: |
| 181 | + cache_answers.append((rank, ret[1])) |
| 182 | + cache_questions.append((rank, ret[0])) |
| 183 | + cache_ids.append((rank, primary_id)) |
| 184 | + else: |
| 185 | + if rank_threshold_long <= rank: |
| 186 | + cache_answers.append((rank, ret[1])) |
| 187 | + cache_questions.append((rank, ret[0])) |
| 188 | + cache_ids.append((rank, primary_id)) |
174 | 189 |
|
175 | 190 | cache_answers = sorted(cache_answers, key=lambda x: x[0], reverse=True) |
176 | 191 | cache_questions = sorted(cache_questions, key=lambda x: x[0], reverse=True) |
|
0 commit comments