1818)
1919from graphrag .index .update .entities import (
2020 _group_and_resolve_entities ,
21- _run_entity_summarization ,
2221)
2322from graphrag .index .update .relationships import _update_and_merge_relationships
23+ from graphrag .index .workflows .extract_graph import get_summarized_entities_relationships
2424from graphrag .index .workflows .generate_text_embeddings import generate_text_embeddings
2525from graphrag .logger .print_progress import ProgressLogger
2626from graphrag .storage .pipeline_storage import PipelineStorage
@@ -104,18 +104,16 @@ async def update_dataframe_outputs(
104104 "documents" , previous_storage , delta_storage , output_storage
105105 )
106106
107- # Update entities and merge them
108- progress_logger .info ("Updating Entities" )
109- merged_entities_df , entity_id_mapping = await _update_entities (
107+ # Update entities, relationships and merge them
108+ progress_logger .info ("Updating Entities and Relationships" )
109+ (
110+ merged_entities_df ,
111+ merged_relationships_df ,
112+ entity_id_mapping ,
113+ ) = await _update_entities_and_relationships (
110114 previous_storage , delta_storage , output_storage , config , cache , callbacks
111115 )
112116
113- # Update relationships with the entities id mapping
114- progress_logger .info ("Updating Relationships" )
115- merged_relationships_df = await _update_relationships (
116- previous_storage , delta_storage , output_storage
117- )
118-
119117 # Update and merge final text units
120118 progress_logger .info ("Updating Text Units" )
121119 merged_text_units = await _update_text_units (
@@ -166,8 +164,11 @@ async def update_dataframe_outputs(
166164
167165
168166async def _update_community_reports (
169- previous_storage , delta_storage , output_storage , community_id_mapping
170- ):
167+ previous_storage : PipelineStorage ,
168+ delta_storage : PipelineStorage ,
169+ output_storage : PipelineStorage ,
170+ community_id_mapping : dict ,
171+ ) -> pd .DataFrame :
171172 """Update the community reports output."""
172173 old_community_reports = await load_table_from_storage (
173174 "community_reports" , previous_storage
@@ -186,7 +187,11 @@ async def _update_community_reports(
186187 return merged_community_reports
187188
188189
189- async def _update_communities (previous_storage , delta_storage , output_storage ):
190+ async def _update_communities (
191+ previous_storage : PipelineStorage ,
192+ delta_storage : PipelineStorage ,
193+ output_storage : PipelineStorage ,
194+ ) -> dict :
190195 """Update the communities output."""
191196 old_communities = await load_table_from_storage ("communities" , previous_storage )
192197 delta_communities = await load_table_from_storage ("communities" , delta_storage )
@@ -199,7 +204,11 @@ async def _update_communities(previous_storage, delta_storage, output_storage):
199204 return community_id_mapping
200205
201206
202- async def _update_covariates (previous_storage , delta_storage , output_storage ):
207+ async def _update_covariates (
208+ previous_storage : PipelineStorage ,
209+ delta_storage : PipelineStorage ,
210+ output_storage : PipelineStorage ,
211+ ) -> None :
203212 """Update the covariates output."""
204213 old_covariates = await load_table_from_storage ("covariates" , previous_storage )
205214 delta_covariates = await load_table_from_storage ("covariates" , delta_storage )
@@ -209,8 +218,11 @@ async def _update_covariates(previous_storage, delta_storage, output_storage):
209218
210219
211220async def _update_text_units (
212- previous_storage , delta_storage , output_storage , entity_id_mapping
213- ):
221+ previous_storage : PipelineStorage ,
222+ delta_storage : PipelineStorage ,
223+ output_storage : PipelineStorage ,
224+ entity_id_mapping : dict ,
225+ ) -> pd .DataFrame :
214226 """Update the text units output."""
215227 old_text_units = await load_table_from_storage ("text_units" , previous_storage )
216228 delta_text_units = await load_table_from_storage ("text_units" , delta_storage )
@@ -223,48 +235,65 @@ async def _update_text_units(
223235 return merged_text_units
224236
225237
226- async def _update_relationships (previous_storage , delta_storage , output_storage ):
227- """Update the relationships output."""
238+ async def _update_entities_and_relationships (
239+ previous_storage : PipelineStorage ,
240+ delta_storage : PipelineStorage ,
241+ output_storage : PipelineStorage ,
242+ config : GraphRagConfig ,
243+ cache : PipelineCache ,
244+ callbacks : WorkflowCallbacks ,
245+ ) -> tuple [pd .DataFrame , pd .DataFrame , dict ]:
246+ """Update Final Entities and Relationships output."""
247+ old_entities = await load_table_from_storage ("entities" , previous_storage )
248+ delta_entities = await load_table_from_storage ("entities" , delta_storage )
249+
250+ merged_entities_df , entity_id_mapping = _group_and_resolve_entities (
251+ old_entities , delta_entities
252+ )
253+
254+ # Update Relationships
228255 old_relationships = await load_table_from_storage ("relationships" , previous_storage )
229256 delta_relationships = await load_table_from_storage ("relationships" , delta_storage )
230257 merged_relationships_df = _update_and_merge_relationships (
231258 old_relationships ,
232259 delta_relationships ,
233260 )
234261
235- await write_table_to_storage (
236- merged_relationships_df , "relationships" , output_storage
262+ summarization_llm_settings = config . get_language_model_config (
263+ config . summarize_descriptions . model_id
237264 )
238-
239- return merged_relationships_df
240-
241-
242- async def _update_entities (
243- previous_storage , delta_storage , output_storage , config , cache , callbacks
244- ):
245- """Update Final Entities output."""
246- old_entities = await load_table_from_storage ("entities" , previous_storage )
247- delta_entities = await load_table_from_storage ("entities" , delta_storage )
248-
249- merged_entities_df , entity_id_mapping = _group_and_resolve_entities (
250- old_entities , delta_entities
265+ summarization_strategy = config .summarize_descriptions .resolved_strategy (
266+ config .root_dir , summarization_llm_settings
251267 )
252268
253- # Re-run description summarization
254- merged_entities_df = await _run_entity_summarization (
269+ (
255270 merged_entities_df ,
256- config ,
257- cache ,
258- callbacks ,
271+ merged_relationships_df ,
272+ ) = await get_summarized_entities_relationships (
273+ extracted_entities = merged_entities_df ,
274+ extracted_relationships = merged_relationships_df ,
275+ callbacks = callbacks ,
276+ cache = cache ,
277+ summarization_strategy = summarization_strategy ,
278+ summarization_num_threads = summarization_llm_settings .concurrent_requests ,
259279 )
260280
261281 # Save the updated entities back to storage
262282 await write_table_to_storage (merged_entities_df , "entities" , output_storage )
263283
264- return merged_entities_df , entity_id_mapping
284+ await write_table_to_storage (
285+ merged_relationships_df , "relationships" , output_storage
286+ )
287+
288+ return merged_entities_df , merged_relationships_df , entity_id_mapping
265289
266290
267- async def _concat_dataframes (name , previous_storage , delta_storage , output_storage ):
291+ async def _concat_dataframes (
292+ name : str ,
293+ previous_storage : PipelineStorage ,
294+ delta_storage : PipelineStorage ,
295+ output_storage : PipelineStorage ,
296+ ) -> pd .DataFrame :
268297 """Concatenate dataframes."""
269298 old_df = await load_table_from_storage (name , previous_storage )
270299 delta_df = await load_table_from_storage (name , delta_storage )
0 commit comments