11import logging
22import pathlib
33import tempfile
4- from collections import OrderedDict
4+ from collections import OrderedDict , defaultdict
55from dataclasses import dataclass
66from datetime import datetime , timezone
77from typing import Dict , Iterable , List , Optional , Set , Tuple , Union
@@ -709,10 +709,14 @@ def get_internal_workunits(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
709709 # Value: Tuple(model file name, connection name)
710710 view_connection_map : Dict [str , Tuple [str , str ]] = {}
711711
712- # Map of view name to explore name for API-based view lineage
713- # A view can be referenced by multiple explores, we only need one of the explores to use Looker Query API
714- # Key: view_name, Value: explore_name
715- view_to_explore_map : Dict [str , str ] = {}
712+ # Map of view name to all possible explores for API-based view lineage
713+ # A view can be referenced by multiple explores, we'll optimize the assignment
714+ # Key: view_name, Value: set of explore_names
715+ view_to_explores : Dict [str , Set [str ]] = defaultdict (set )
716+
717+ # Temporary map to keep track of the views in an explore
718+ # Key: explore_name, Value: set of view_names
719+ explore_to_views : Dict [str , Set [str ]] = defaultdict (set )
716720
717721 # The ** means "this directory and all subdirectories", and hence should
718722 # include all the files we want.
@@ -789,8 +793,9 @@ def get_internal_workunits(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
789793 for view_name in explore .upstream_views :
790794 if self .source_config .emit_reachable_views_only :
791795 explore_reachable_views .add (view_name .include )
792- # Build view to explore mapping for API-based view lineage
793- view_to_explore_map [view_name .include ] = explore .name
796+
797+ view_to_explores [view_name .include ].add (explore .name )
798+ explore_to_views [explore .name ].add (view_name .include )
794799 except Exception as e :
795800 self .reporter .report_warning (
796801 title = "Failed to process explores" ,
@@ -804,6 +809,16 @@ def get_internal_workunits(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
804809 model .connection , set ()
805810 )
806811
812+ view_to_explore_map = {}
813+ if view_to_explores and explore_to_views :
814+ view_to_explore_map = self ._optimize_views_by_common_explore (
815+ view_to_explores , explore_to_views
816+ )
817+ else :
818+ logger .warning (
819+ f"Either view_to_explores: { view_to_explores } or explore_to_views: { explore_to_views } is empty"
820+ )
821+
807822 project_name = self .get_project_name (model_name )
808823
809824 looker_view_id_cache : LookerViewIdCache = LookerViewIdCache (
@@ -888,9 +903,7 @@ def get_internal_workunits(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
888903 config = self .source_config ,
889904 ctx = self .ctx ,
890905 looker_client = self .looker_client ,
891- view_to_explore_map = view_to_explore_map
892- if view_to_explore_map
893- else None ,
906+ view_to_explore_map = view_to_explore_map ,
894907 )
895908 except Exception as e :
896909 self .reporter .report_warning (
@@ -1040,5 +1053,61 @@ def report_skipped_unreachable_views(
10401053 context = (f"Project: { project } , View File Path: { path } " ),
10411054 )
10421055
1056+ def _optimize_views_by_common_explore (
1057+ self ,
1058+ view_to_explores : Dict [str , Set [str ]],
1059+ explore_to_views : Dict [str , Set [str ]],
1060+ ) -> Dict [str , str ]:
1061+ """
1062+ Optimize view-to-explore mapping by grouping views to minimize API calls.
1063+
1064+ This uses a greedy algorithm that prioritizes explores that appear in the most views,
1065+ maximizing the number of views assigned to the same explore.
1066+
1067+ Args:
1068+ view_to_explores: Dict mapping view_name -> set of explore_names
1069+ explore_to_views: Dict mapping explore_name -> set of view_names
1070+
1071+ Returns:
1072+ Dict mapping view_name -> explore_name (optimized assignment)
1073+ """
1074+
1075+ # Pre-compute explore sizes
1076+ explore_sizes = {
1077+ explore : len (views ) for explore , views in explore_to_views .items ()
1078+ }
1079+
1080+ # Build view-to-explore mapping using dynamic programming approach
1081+ view_to_explore : Dict [str , str ] = {}
1082+
1083+ # For each view, find the explore with maximum size that contains it
1084+ for view_name , candidate_explores in view_to_explores .items ():
1085+ if candidate_explores :
1086+ # Find explore with maximum size using max() with key function
1087+ # This assings the view to the explore with the most views that contains it
1088+ best_explore = max (
1089+ candidate_explores , key = lambda explore : explore_sizes [explore ]
1090+ )
1091+ view_to_explore [view_name ] = best_explore
1092+
1093+ # Log optimization results
1094+ unique_explores_used = len (set (view_to_explore .values ()))
1095+ total_views = len (view_to_explore )
1096+ total_explores = len (explore_to_views )
1097+
1098+ if total_explores > 0 :
1099+ efficiency = (1 - unique_explores_used / total_explores ) * 100
1100+ logger .info (
1101+ f"View-explore optimization: Using { unique_explores_used } /{ total_explores } "
1102+ f"explores for { total_views } views (efficiency: { efficiency :.1f} % savings)"
1103+ )
1104+ else :
1105+ logger .info (
1106+ f"View-explore optimization: No explores to optimize for { total_views } views"
1107+ )
1108+
1109+ logger .debug (f"Final View-to-explore mapping: { view_to_explore } " )
1110+ return view_to_explore
1111+
10431112 def get_report (self ):
10441113 return self .reporter
0 commit comments