1111
1212from graphrag .index .cache .pipeline_cache import PipelineCache
1313from graphrag .index .config .pipeline import PipelineConfig
14- from graphrag .index .operations .embed_text import embed_text
1514from graphrag .index .operations .summarize_descriptions .strategies import (
1615 run_graph_intelligence as run_entity_summarization ,
1716)
@@ -67,8 +66,6 @@ def _group_and_resolve_entities(
6766 "description" : lambda x : list (x .astype (str )), # Ensure str
6867 # Concatenate nd.array into a single list
6968 "text_unit_ids" : lambda x : "," .join (str (i ) for j in x .tolist () for i in j ),
70- # Keep only descriptions where the original value wasn't modified
71- "description_embedding" : lambda x : x .iloc [0 ] if len (x ) == 1 else np .nan ,
7269 })
7370 .reset_index ()
7471 )
@@ -87,7 +84,6 @@ def _group_and_resolve_entities(
8784 "human_readable_id" ,
8885 "graph_embedding" ,
8986 "text_unit_ids" ,
90- "description_embedding" ,
9187 ],
9288 ]
9389
@@ -141,48 +137,3 @@ async def process_row(row):
141137 entities_df ["description" ] = results
142138
143139 return entities_df
144-
145-
146- async def _run_entity_description_embedding (
147- entities_df : pd .DataFrame ,
148- config : PipelineConfig ,
149- cache : PipelineCache ,
150- callbacks : VerbCallbacks ,
151- ) -> pd .DataFrame :
152- """Run entity description embedding.
153-
154- Parameters
155- ----------
156- entities_df : pd.DataFrame
157- The entities dataframe.
158- config : PipelineConfig
159- The pipeline configuration.
160- cache : PipelineCache
161- Pipeline cache used during the embedding process.
162- callbacks : WorkflowCallbacks
163- The workflow callbacks.
164-
165- Returns
166- -------
167- pd.DataFrame
168- The updated entities dataframe with description embeddings.
169- """
170- embed_config = _find_workflow_config (
171- config , "create_final_entities" , "entity_name_description_embed"
172- )
173-
174- # Concatenate name and description for embedding
175- entities_df ["name_description" ] = (
176- entities_df ["name" ] + ":" + entities_df ["description" ]
177- )
178-
179- # Run embedding
180- entities_df ["description_embedding" ] = await embed_text (
181- entities_df ,
182- callbacks ,
183- cache ,
184- embed_column = "name_description" ,
185- embedding_name = "entity.description" ,
186- strategy = embed_config .get ("strategy" , {}),
187- )
188- return entities_df .drop (columns = ["name_description" ])
0 commit comments