From 6ec80240f34fe93995ecfe43860e3a09563cbee6 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Mon, 6 Oct 2025 10:34:25 -0400 Subject: [PATCH 1/8] WIP with comments on fetching only aspects you need in graphql --- .../graphql/AspectMappingRegistry.java | 79 ++++++++++ .../datahub/graphql/GmsGraphQLEngine.java | 1 + .../datahub/graphql/QueryContext.java | 19 +++ .../resolvers/load/LoadableTypeResolver.java | 7 + .../graphql/types/dataset/DatasetType.java | 139 ++++++++++++++++++ .../src/main/resources/entity.graphql | 109 +++++++------- .../datahub/graphql/SpringQueryContext.java | 15 ++ 7 files changed, 318 insertions(+), 51 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java new file mode 100644 index 0000000000000..cd0664fb53446 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java @@ -0,0 +1,79 @@ +package com.linkedin.datahub.graphql; + +//import graphql.schema.GraphQLArgument; +//import graphql.schema.GraphQLDirective; +//import graphql.schema.GraphQLObjectType; +//import graphql.schema.GraphQLSchema; +//import graphql.language.ArrayValue; +//import graphql.language.StringValue; +import lombok.extern.slf4j.Slf4j; + +import java.util.*; +import java.util.stream.Collectors; + +@Slf4j +public class AspectMappingRegistry { + private final Map> fieldToAspects = new HashMap<>(); + + public AspectMappingRegistry() { +// buildMappingFromSchema(schema); + } + +// private void buildMappingFromSchema(GraphQLSchema schema) { +// schema.getTypeMap().values().forEach(type -> { +// if (type instanceof GraphQLObjectType) { +// GraphQLObjectType objectType = (GraphQLObjectType) type; +// String typeName = objectType.getName(); +// +// objectType.getFieldDefinitions().forEach(field -> { +// String fieldName = field.getName(); +// GraphQLDirective directive = field.getDirective("aspectMapping"); +// +// if (directive != null) { +// GraphQLArgument aspectsArg = directive.getArgument("aspects"); +// if (aspectsArg != null && aspectsArg.getArgumentValue().getValue() instanceof ArrayValue) { +// ArrayValue aspectsArray = (ArrayValue) aspectsArg.getArgumentValue().getValue(); +// Set aspects = aspectsArray.getValues().stream() +// .map(value -> ((StringValue) value).getValue()) +// .collect(Collectors.toSet()); +// +// String key = typeName + "." + fieldName; +// fieldToAspects.put(key, aspects); +// log.debug("Mapped {}.{} to aspects: {}", typeName, fieldName, aspects); +// } +// } +// }); +// } +// }); +// +// log.info("Built aspect mapping registry with {} field mappings", fieldToAspects.size()); +// } + + /** + * Get required aspects for the given fields on a type. + * Returns null if any field is unmapped (fallback to all aspects). + */ + public Set getRequiredAspects(String typeName, Set requestedFields) { + Set aspects = new HashSet<>(); + + for (String field : requestedFields) { + // Skip introspection and special fields + if (field.startsWith("__") || field.equals("urn") || field.equals("type")) { + continue; + } + + String key = typeName + "." + field; + Set fieldAspects = fieldToAspects.get(key); + + if (fieldAspects != null) { + aspects.addAll(fieldAspects); + } else { + // Unmapped field - fallback to all aspects + log.debug("Field {} has no @aspectMapping directive, will fetch all aspects", key); + return null; + } + } + + return aspects.isEmpty() ? Collections.emptySet() : aspects; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 49c28e3335e7d..420e48ba1a1e8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -1197,6 +1197,7 @@ private static DataFetcher getResolver( } private static String getUrnField(DataFetchingEnvironment env) { +// env.getSelectionSet().getFields() return env.getArgument(URN_FIELD_NAME); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java index 5ad82b5d70375..91e25ad079505 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java @@ -4,7 +4,9 @@ import com.datahub.authentication.Authentication; import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.metadata.config.DataHubAppConfiguration; +import graphql.schema.DataFetchingEnvironment; import io.datahubproject.metadata.context.OperationContext; +import javax.annotation.Nullable; /** Provided as input to GraphQL resolvers; used to carry information about GQL request context. */ public interface QueryContext { @@ -34,4 +36,21 @@ default String getActorUrn() { OperationContext getOperationContext(); DataHubAppConfiguration getDataHubAppConfig(); + + /** + * Returns the {@link DataFetchingEnvironment} associated with the current GraphQL request. + * This provides access to the GraphQL query structure, requested fields, and other execution context. + * + * @return the DataFetchingEnvironment, or null if not available + */ + @Nullable + DataFetchingEnvironment getDataFetchingEnvironment(); + + /** + * Sets the {@link DataFetchingEnvironment} for the current GraphQL request. + * This is typically called by GraphQL resolvers to provide access to the execution context. + * + * @param environment the DataFetchingEnvironment to associate with this context + */ + void setDataFetchingEnvironment(@Nullable DataFetchingEnvironment environment); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/LoadableTypeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/LoadableTypeResolver.java index 3868b1a35b64f..e1aa24a12304f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/LoadableTypeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/LoadableTypeResolver.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.load; +import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.types.LoadableType; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -32,6 +33,12 @@ public LoadableTypeResolver( @Override public CompletableFuture get(DataFetchingEnvironment environment) { + // Set the DataFetchingEnvironment in the QueryContext for access in batchLoad methods + QueryContext context = environment.getContext(); + if (context != null) { + context.setDataFetchingEnvironment(environment); + } + final K key = _keyProvider.apply(environment); if (key == null) { return null; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index a0579d4f2b75e..47449cf025310 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -11,6 +11,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.StringArray; +import com.linkedin.datahub.graphql.AspectMappingRegistry; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; @@ -134,6 +135,23 @@ public List> batchLoad( @Nonnull final List urnStrs, @Nonnull final QueryContext context) { try { final List urns = urnStrs.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + + // Access DataFetchingEnvironment from QueryContext + if (context.getDataFetchingEnvironment() != null) { + System.out.println("~~~~~~~~~~~~~~~~~~~~~~DataFetchingEnvironment Available~~~~~~~~~~~~~~~"); + System.out.println("Requested fields: " + context.getDataFetchingEnvironment().getSelectionSet().getFields().keySet()); + System.out.println("Field name: " + context.getDataFetchingEnvironment().getField().getName()); + System.out.println("Arguments: " + context.getDataFetchingEnvironment().getArguments()); + System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); + + // You can now optimize which aspects to fetch based on requested fields + Set aspectsToResolve = determineAspectsFromRequestedFields( + context.getDataFetchingEnvironment().getSelectionSet().getFields().keySet() + ); + System.out.println("Optimized aspects to resolve: " + aspectsToResolve); + } else { + System.out.println("DataFetchingEnvironment not available, using default aspects"); + } final Map datasetMap = entityClient.batchGetV2( @@ -325,4 +343,125 @@ private DisjunctivePrivilegeGroup getAuthorizedPrivileges(final DatasetUpdateInp return new DisjunctivePrivilegeGroup( ImmutableList.of(allPrivilegesGroup, specificPrivilegeGroup)); } + + /** + * Maps GraphQL requested fields to the corresponding aspects that need to be fetched. + * This enables performance optimization by only fetching the aspects needed for the requested fields. + * + * @param requestedFields Set of field names requested in the GraphQL query + * @return Set of aspect names that should be fetched + */ + private Set determineAspectsFromRequestedFields(Set requestedFields) { + Set aspectsToResolve = new HashSet<>(); + + // Always include basic aspects + aspectsToResolve.add(DATASET_KEY_ASPECT_NAME); + aspectsToResolve.add(STATUS_ASPECT_NAME); + + // Map specific fields to their corresponding aspects + for (String field : requestedFields) { + switch (field) { + case "properties": + aspectsToResolve.add(DATASET_PROPERTIES_ASPECT_NAME); + break; + case "editableProperties": + aspectsToResolve.add(EDITABLE_DATASET_PROPERTIES_ASPECT_NAME); + break; + case "deprecation": + aspectsToResolve.add(DATASET_DEPRECATION_ASPECT_NAME); + aspectsToResolve.add(DEPRECATION_ASPECT_NAME); + break; + case "upstream": + case "lineage": + aspectsToResolve.add(DATASET_UPSTREAM_LINEAGE_ASPECT_NAME); + aspectsToResolve.add(UPSTREAM_LINEAGE_ASPECT_NAME); + break; + case "schemaMetadata": + aspectsToResolve.add(SCHEMA_METADATA_ASPECT_NAME); + break; + case "editableSchemaMetadata": + aspectsToResolve.add(EDITABLE_SCHEMA_METADATA_ASPECT_NAME); + break; + case "viewProperties": + aspectsToResolve.add(VIEW_PROPERTIES_ASPECT_NAME); + break; + case "ownership": + aspectsToResolve.add(OWNERSHIP_ASPECT_NAME); + break; + case "institutionalMemory": + aspectsToResolve.add(INSTITUTIONAL_MEMORY_ASPECT_NAME); + break; + case "tags": + aspectsToResolve.add(GLOBAL_TAGS_ASPECT_NAME); + break; + case "glossaryTerms": + aspectsToResolve.add(GLOSSARY_TERMS_ASPECT_NAME); + break; + case "container": + aspectsToResolve.add(CONTAINER_ASPECT_NAME); + break; + case "domain": + aspectsToResolve.add(DOMAINS_ASPECT_NAME); + break; + case "dataPlatformInstance": + aspectsToResolve.add(DATA_PLATFORM_INSTANCE_ASPECT_NAME); + break; + case "siblings": + aspectsToResolve.add(SIBLINGS_ASPECT_NAME); + break; + case "embed": + aspectsToResolve.add(EMBED_ASPECT_NAME); + break; + case "dataProducts": + aspectsToResolve.add(DATA_PRODUCTS_ASPECT_NAME); + break; + case "browsePaths": + aspectsToResolve.add(BROWSE_PATHS_V2_ASPECT_NAME); + break; + case "access": + aspectsToResolve.add(ACCESS_ASPECT_NAME); + break; + case "structuredProperties": + aspectsToResolve.add(STRUCTURED_PROPERTIES_ASPECT_NAME); + break; + case "forms": + aspectsToResolve.add(FORMS_ASPECT_NAME); + break; + case "subTypes": + aspectsToResolve.add(SUB_TYPES_ASPECT_NAME); + break; + case "application": + aspectsToResolve.add(APPLICATION_MEMBERSHIP_ASPECT_NAME); + break; + case "versionProperties": + aspectsToResolve.add(VERSION_PROPERTIES_ASPECT_NAME); + break; + case "logicalParent": + aspectsToResolve.add(LOGICAL_PARENT_ASPECT_NAME); + break; + case "share": + aspectsToResolve.add(SHARE_ASPECT_NAME); + break; + case "origin": + aspectsToResolve.add(ORIGIN_ASPECT_NAME); + break; + case "documentation": + aspectsToResolve.add(DOCUMENTATION_ASPECT_NAME); + break; + case "lineageFeatures": + aspectsToResolve.add(LINEAGE_FEATURES_ASPECT_NAME); + break; + default: + // For unknown fields, don't add any specific aspects + break; + } + } + + // If no specific aspects were determined, fall back to all aspects + if (aspectsToResolve.size() <= 2) { // Only basic aspects were added + return ASPECTS_TO_RESOLVE; + } + + return aspectsToResolve; + } } diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 47384cece3406..a792ea536addf 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -1,6 +1,12 @@ # Extending the GQL type system to include Long type used for dates scalar Long +# Used for performance reasons to map fields to aspects so we only fetch aspects that we need +directive @aspectMapping(aspects: [String!]!) on FIELD_DEFINITION + +# Optional: directive to explicitly mark fields that don't need aspects +directive @noAspect on FIELD_DEFINITION + """ Root GraphQL API Schema """ @@ -1655,118 +1661,119 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { """ The primary key of the Dataset """ - urn: String! + urn: String! @noAspect """ The standard Entity Type """ - type: EntityType! + type: EntityType! @noAspect """ The timestamp for the last time this entity was ingested + In order to fetch this we need to fetch other aspects """ - lastIngested: Long + lastIngested: Long @noAspect """ Standardized platform urn where the dataset is defined """ - platform: DataPlatform! + platform: DataPlatform! @aspectMapping(aspects: ["datasetKey"]) # we also have custom graphql resolver for this """ The parent container in which the entity resides """ - container: Container + container: Container @aspectMapping(aspects: ["container"]) """ Recursively get the lineage of containers for this entity """ - parentContainers: ParentContainersResult + parentContainers: ParentContainersResult @noAspect # parentContainers has own resolver """ Unique guid for dataset No longer to be used as the Dataset display name. Use properties.name instead """ - name: String! + name: String! @aspectMapping(aspects: ["datasetProperties", "editableDatasetProperties", "datasetKey"]) """ An additional set of read only properties """ - properties: DatasetProperties + properties: DatasetProperties @aspectMapping(aspects: ["datasetProperties"]) """ An additional set of of read write properties """ - editableProperties: DatasetEditableProperties + editableProperties: DatasetEditableProperties @aspectMapping(aspects: ["editableDatasetProperties"]) """ Ownership metadata of the dataset """ - ownership: Ownership + ownership: Ownership @aspectMapping(aspects: ["ownership"]) """ The deprecation status of the dataset """ - deprecation: Deprecation + deprecation: Deprecation @aspectMapping(aspects: ["datasetDeprecation", "deprecation"]) """ References to internal resources related to the dataset """ - institutionalMemory: InstitutionalMemory + institutionalMemory: InstitutionalMemory @aspectMapping(aspects: ["institutionalMemory"]) """ Schema metadata of the dataset, available by version number """ - schemaMetadata(version: Long): SchemaMetadata + schemaMetadata(version: Long): SchemaMetadata @noAspect # schemaMetadata uses separate AspectResolver """ Editable schema metadata of the dataset """ - editableSchemaMetadata: EditableSchemaMetadata + editableSchemaMetadata: EditableSchemaMetadata @aspectMapping(aspects: ["editableSchemaMetadata"]) """ Status of the Dataset """ - status: Status + status: Status @aspectMapping(aspects: ["status"]) """ Embed information about the Dataset """ - embed: Embed + embed: Embed @aspectMapping(aspects: ["embed"]) """ Tags used for searching dataset """ - tags: GlobalTags + tags: GlobalTags @aspectMapping(aspects: ["globalTags"]) """ The structured glossary terms associated with the dataset """ - glossaryTerms: GlossaryTerms + glossaryTerms: GlossaryTerms @aspectMapping(aspects: ["glossaryTerms"]) """ The specific instance of the data platform that this entity belongs to """ - dataPlatformInstance: DataPlatformInstance + dataPlatformInstance: DataPlatformInstance @aspectMapping(aspects: ["dataPlatformInstance"]) """ The Domain associated with the Dataset """ - domain: DomainAssociation + domain: DomainAssociation @aspectMapping(aspects: ["domains"]) """ The application associated with the dataset """ - application: ApplicationAssociation + application: ApplicationAssociation @aspectMapping(aspects: ["applications"]) """ The forms associated with the Dataset """ - forms: Forms + forms: Forms @aspectMapping(aspects: ["forms"]) """ The Roles and the properties to access the dataset """ - access: Access + access: Access @aspectMapping(aspects: ["access"]) """ Statistics about how this Dataset is used @@ -1778,12 +1785,12 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { range: TimeRange startTimeMillis: Long timeZone: String - ): UsageQueryResult + ): UsageQueryResult @noAspect # has own custom resolver """ Experimental - Summary operational & usage statistics about a Dataset """ - statsSummary: DatasetStatsSummary + statsSummary: DatasetStatsSummary @noAspect # has own custom resolver """ Profile Stats resource that retrieves the events in a previous unit of time in descending order @@ -1794,7 +1801,7 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { endTimeMillis: Long filter: FilterInput limit: Int - ): [DatasetProfile!] + ): [DatasetProfile!] @noAspect # has own custom resolver """ Operational events for an entity. @@ -1804,7 +1811,7 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { endTimeMillis: Long filter: FilterInput limit: Int - ): [Operation!] + ): [Operation!] @noAspect # has own custom resolver """ Assertions associated with the Dataset @@ -1813,91 +1820,91 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { start: Int count: Int includeSoftDeleted: Boolean - ): EntityAssertionsResult + ): EntityAssertionsResult @noAspect # has own custom resolver """ Edges extending from this entity """ - relationships(input: RelationshipsInput!): EntityRelationshipsResult + relationships(input: RelationshipsInput!): EntityRelationshipsResult @noAspect # has own custom resolver """ Edges extending from this entity grouped by direction in the lineage graph """ - lineage(input: LineageInput!): EntityLineageResult + lineage(input: LineageInput!): EntityLineageResult @noAspect # has own custom resolver """ The browse paths corresponding to the dataset. If no Browse Paths have been generated before, this will be null. """ - browsePaths: [BrowsePath!] + browsePaths: [BrowsePath!] @noAspect # has own custom resolver """ The browse path V2 corresponding to an entity. If no Browse Paths V2 have been generated before, this will be null. """ - browsePathV2: BrowsePathV2 + browsePathV2: BrowsePathV2 @aspectMapping(aspects: ["browsePathsV2"]) """ Experimental! The resolved health statuses of the Dataset """ - health: [Health!] + health: [Health!] @noAspect # has own custom resolver """ Schema metadata of the dataset """ - schema: Schema @deprecated(reason: "Use `schemaMetadata`") + schema: Schema @deprecated(reason: "Use `schemaMetadata`") @aspectMapping(aspects: ["schemaMetadata"]) """ Deprecated, use properties field instead External URL associated with the Dataset """ - externalUrl: String @deprecated + externalUrl: String @deprecated @aspectMapping(aspects: ["datasetProperties"]) """ Deprecated, see the properties field instead Environment in which the dataset belongs to or where it was generated Note that this field will soon be deprecated in favor of a more standardized concept of Environment """ - origin: FabricType! @deprecated + origin: FabricType! @deprecated @aspectMapping(aspects: ["origin", "datasetKey"]) """ Deprecated, use the properties field instead Read only technical description for dataset """ - description: String @deprecated + description: String @deprecated @aspectMapping(aspects: ["datasetProperties"]) """ Deprecated, do not use this field The logical type of the dataset ie table, stream, etc """ - platformNativeType: PlatformNativeType @deprecated + platformNativeType: PlatformNativeType @deprecated @noAspect """ Deprecated, use properties instead Native Dataset Uri Uri should not include any environment specific properties """ - uri: String @deprecated + uri: String @deprecated @aspectMapping(aspects: ["datasetProperties"]) """ Deprecated, use tags field instead The structured tags associated with the dataset """ - globalTags: GlobalTags @deprecated + globalTags: GlobalTags @deprecated @aspectMapping(aspects: ["globalTags"]) """ Sub Types that this entity implements """ - subTypes: SubTypes + subTypes: SubTypes @aspectMapping(aspects: ["subTypes"]) """ View related properties. Only relevant if subtypes field contains view. """ - viewProperties: ViewProperties + viewProperties: ViewProperties @aspectMapping(aspects: ["viewProperties"]) """ Experimental API. For fetching extra entities that do not have custom UI code yet """ - aspects(input: AspectParams): [RawAspect!] + aspects(input: AspectParams): [RawAspect!] @noAspect # uses custom resolver """ History of datajob runs that either produced or consumed this dataset @@ -1906,44 +1913,44 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { start: Int count: Int direction: RelationshipDirection! - ): DataProcessInstanceResult + ): DataProcessInstanceResult @noAspect # uses custom resolver """ Metadata about the datasets siblings """ - siblings: SiblingProperties + siblings: SiblingProperties @aspectMapping(aspects: ["siblings"]) """ Executes a search on only the siblings of an entity """ - siblingsSearch(input: ScrollAcrossEntitiesInput!): ScrollResults + siblingsSearch(input: ScrollAcrossEntitiesInput!): ScrollResults @noAspect # uses custom resolver """ Lineage information for the column-level. Includes a list of objects detailing which columns are upstream and which are downstream of each other. The upstream and downstream columns are from datasets. """ - fineGrainedLineages: [FineGrainedLineage!] + fineGrainedLineages: [FineGrainedLineage!] @aspectMapping(aspects: ["upstreamLineage"]) """ Privileges given to a user relevant to this entity """ - privileges: EntityPrivileges + privileges: EntityPrivileges @noAspect # uses custom resolver """ Whether or not this entity exists on DataHub """ - exists: Boolean + exists: Boolean @noAspect # uses custom resolver """ Structured properties about this Dataset """ - structuredProperties: StructuredProperties + structuredProperties: StructuredProperties @aspectMapping(aspects: ["structuredProperties"]) """ Statistics about how this Dataset has been operated on """ - operationsStats(input: OperationsStatsInput): OperationsQueryResult + operationsStats(input: OperationsStatsInput): OperationsQueryResult @noAspect # uses custom resolver } type RoleAssociation { diff --git a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java index cd8ce56bf36f9..c3ffcb5834946 100644 --- a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java +++ b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java @@ -6,6 +6,7 @@ import com.linkedin.metadata.config.DataHubAppConfiguration; import graphql.language.OperationDefinition; import graphql.parser.Parser; +import graphql.schema.DataFetchingEnvironment; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; import jakarta.servlet.http.HttpServletRequest; @@ -23,6 +24,9 @@ public class SpringQueryContext implements QueryContext { @Getter private final String queryName; @Nonnull private final OperationContext operationContext; @Nonnull private final DataHubAppConfiguration dataHubAppConfig; + + // Mutable field for DataFetchingEnvironment + @Nullable private DataFetchingEnvironment dataFetchingEnvironment; public SpringQueryContext( final boolean isAuthenticated, @@ -63,4 +67,15 @@ public SpringQueryContext( this.dataHubAppConfig = dataHubAppConfig; } + + @Override + @Nullable + public DataFetchingEnvironment getDataFetchingEnvironment() { + return dataFetchingEnvironment; + } + + @Override + public void setDataFetchingEnvironment(@Nullable DataFetchingEnvironment environment) { + this.dataFetchingEnvironment = environment; + } } From 1f7dcd4af06c42e51ac41c23d86496b6fe7f5cae Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Mon, 6 Oct 2025 16:40:42 -0400 Subject: [PATCH 2/8] finish full working POC --- .../graphql/AspectMappingRegistry.java | 150 ++++++++++------- .../datahub/graphql/GmsGraphQLEngine.java | 1 - .../datahub/graphql/QueryContext.java | 12 +- .../resolvers/load/LoadableTypeResolver.java | 2 +- .../graphql/types/dataset/DatasetType.java | 154 ++---------------- .../src/main/resources/entity.graphql | 96 ++++++----- .../factory/graphql/GraphQLEngineFactory.java | 3 + 7 files changed, 160 insertions(+), 258 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java index cd0664fb53446..0441a15b41739 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java @@ -1,79 +1,103 @@ package com.linkedin.datahub.graphql; -//import graphql.schema.GraphQLArgument; -//import graphql.schema.GraphQLDirective; -//import graphql.schema.GraphQLObjectType; -//import graphql.schema.GraphQLSchema; -//import graphql.language.ArrayValue; -//import graphql.language.StringValue; -import lombok.extern.slf4j.Slf4j; - +import graphql.language.ArrayValue; +import graphql.language.StringValue; +import graphql.schema.GraphQLArgument; +import graphql.schema.GraphQLDirective; +import graphql.schema.GraphQLObjectType; +import graphql.schema.GraphQLSchema; import java.util.*; import java.util.stream.Collectors; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; @Slf4j public class AspectMappingRegistry { - private final Map> fieldToAspects = new HashMap<>(); + private final Map> fieldToAspects = new HashMap<>(); - public AspectMappingRegistry() { -// buildMappingFromSchema(schema); - } + public AspectMappingRegistry(GraphQLSchema schema) { + buildMappingFromSchema(schema); + } -// private void buildMappingFromSchema(GraphQLSchema schema) { -// schema.getTypeMap().values().forEach(type -> { -// if (type instanceof GraphQLObjectType) { -// GraphQLObjectType objectType = (GraphQLObjectType) type; -// String typeName = objectType.getName(); -// -// objectType.getFieldDefinitions().forEach(field -> { -// String fieldName = field.getName(); -// GraphQLDirective directive = field.getDirective("aspectMapping"); -// -// if (directive != null) { -// GraphQLArgument aspectsArg = directive.getArgument("aspects"); -// if (aspectsArg != null && aspectsArg.getArgumentValue().getValue() instanceof ArrayValue) { -// ArrayValue aspectsArray = (ArrayValue) aspectsArg.getArgumentValue().getValue(); -// Set aspects = aspectsArray.getValues().stream() -// .map(value -> ((StringValue) value).getValue()) -// .collect(Collectors.toSet()); -// -// String key = typeName + "." + fieldName; -// fieldToAspects.put(key, aspects); -// log.debug("Mapped {}.{} to aspects: {}", typeName, fieldName, aspects); -// } -// } -// }); -// } -// }); -// -// log.info("Built aspect mapping registry with {} field mappings", fieldToAspects.size()); -// } + private void buildMappingFromSchema(GraphQLSchema schema) { + schema + .getTypeMap() + .values() + .forEach( + type -> { + if (type instanceof GraphQLObjectType) { + GraphQLObjectType objectType = (GraphQLObjectType) type; + String typeName = objectType.getName(); - /** - * Get required aspects for the given fields on a type. - * Returns null if any field is unmapped (fallback to all aspects). - */ - public Set getRequiredAspects(String typeName, Set requestedFields) { - Set aspects = new HashSet<>(); + objectType + .getFieldDefinitions() + .forEach( + field -> { + String fieldName = field.getName(); + GraphQLDirective aspectsDirective = field.getDirective("aspectMapping"); + GraphQLDirective noAspectsDirective = field.getDirective("noAspects"); - for (String field : requestedFields) { - // Skip introspection and special fields - if (field.startsWith("__") || field.equals("urn") || field.equals("type")) { - continue; - } + if (aspectsDirective != null) { + GraphQLArgument aspectsArg = aspectsDirective.getArgument("aspects"); + if (aspectsArg != null + && aspectsArg.getArgumentValue().getValue() instanceof ArrayValue) { + ArrayValue aspectsArray = + (ArrayValue) aspectsArg.getArgumentValue().getValue(); + Set aspects = + aspectsArray.getValues().stream() + .map(value -> ((StringValue) value).getValue()) + .collect(Collectors.toSet()); - String key = typeName + "." + field; - Set fieldAspects = fieldToAspects.get(key); + String key = typeName + "." + fieldName; + fieldToAspects.put(key, aspects); + log.debug( + "Mapped {}.{} to aspects: {}", typeName, fieldName, aspects); + } + } else if (noAspectsDirective != null) { + String key = typeName + "." + fieldName; + fieldToAspects.put(key, new HashSet<>()); + log.debug( + "Mapped {}.{} to to request no specific aspects.", + typeName, + fieldName); + } + }); + } + }); - if (fieldAspects != null) { - aspects.addAll(fieldAspects); - } else { - // Unmapped field - fallback to all aspects - log.debug("Field {} has no @aspectMapping directive, will fetch all aspects", key); - return null; - } - } + log.info("Built aspect mapping registry with {} field mappings", fieldToAspects.size()); + } - return aspects.isEmpty() ? Collections.emptySet() : aspects; + /** + * Get required aspects for the given fields on a type. Returns null if any field is unmapped + * (fallback to all aspects). + */ + @Nullable + public Set getRequiredAspects( + String typeName, List requestedFields) { + Set aspects = new HashSet<>(); + + for (graphql.schema.SelectedField field : requestedFields) { + // Skip introspection and nested fields (with level > 2 since top level fields are level 2 ie. + // Dataset.urn) + String fieldName = field.getName(); + if (fieldName.startsWith("__") || field.getLevel() > 2) { + continue; + } + + String key = typeName + "." + fieldName; + Set fieldAspects = fieldToAspects.get(key); + + if (fieldAspects != null) { + aspects.addAll(fieldAspects); + } else { + // Unmapped field - fallback to all aspects + log.debug( + "Field {} has no @aspectMapping or @noAspects directives, will fetch all aspects", key); + return null; + } } + + return aspects.isEmpty() ? Collections.emptySet() : aspects; + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 420e48ba1a1e8..49c28e3335e7d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -1197,7 +1197,6 @@ private static DataFetcher getResolver( } private static String getUrnField(DataFetchingEnvironment env) { -// env.getSelectionSet().getFields() return env.getArgument(URN_FIELD_NAME); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java index 91e25ad079505..c24f8ba4e03e6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java @@ -38,18 +38,18 @@ default String getActorUrn() { DataHubAppConfiguration getDataHubAppConfig(); /** - * Returns the {@link DataFetchingEnvironment} associated with the current GraphQL request. - * This provides access to the GraphQL query structure, requested fields, and other execution context. - * + * Returns the {@link DataFetchingEnvironment} associated with the current GraphQL request. This + * provides access to the GraphQL query structure, requested fields, and other execution context. + * * @return the DataFetchingEnvironment, or null if not available */ @Nullable DataFetchingEnvironment getDataFetchingEnvironment(); /** - * Sets the {@link DataFetchingEnvironment} for the current GraphQL request. - * This is typically called by GraphQL resolvers to provide access to the execution context. - * + * Sets the {@link DataFetchingEnvironment} for the current GraphQL request. This is typically + * called by GraphQL resolvers to provide access to the execution context. + * * @param environment the DataFetchingEnvironment to associate with this context */ void setDataFetchingEnvironment(@Nullable DataFetchingEnvironment environment); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/LoadableTypeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/LoadableTypeResolver.java index e1aa24a12304f..1b0f41ce1ef11 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/LoadableTypeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/LoadableTypeResolver.java @@ -38,7 +38,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) { if (context != null) { context.setDataFetchingEnvironment(environment); } - + final K key = _keyProvider.apply(environment); if (key == null) { return null; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index 47449cf025310..b3e99934d5007 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -49,7 +49,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -98,6 +97,7 @@ public class DatasetType private static final Set FACET_FIELDS = ImmutableSet.of("origin", "platform"); private static final String ENTITY_NAME = "dataset"; + private static final String KEY_ASPECT = "datasetKey"; private final EntityClient entityClient; @@ -135,22 +135,21 @@ public List> batchLoad( @Nonnull final List urnStrs, @Nonnull final QueryContext context) { try { final List urns = urnStrs.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); - - // Access DataFetchingEnvironment from QueryContext + Set aspectsToResolve = ASPECTS_TO_RESOLVE; + if (context.getDataFetchingEnvironment() != null) { - System.out.println("~~~~~~~~~~~~~~~~~~~~~~DataFetchingEnvironment Available~~~~~~~~~~~~~~~"); - System.out.println("Requested fields: " + context.getDataFetchingEnvironment().getSelectionSet().getFields().keySet()); - System.out.println("Field name: " + context.getDataFetchingEnvironment().getField().getName()); - System.out.println("Arguments: " + context.getDataFetchingEnvironment().getArguments()); - System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); - - // You can now optimize which aspects to fetch based on requested fields - Set aspectsToResolve = determineAspectsFromRequestedFields( - context.getDataFetchingEnvironment().getSelectionSet().getFields().keySet() - ); - System.out.println("Optimized aspects to resolve: " + aspectsToResolve); - } else { - System.out.println("DataFetchingEnvironment not available, using default aspects"); + // I can move this out of DatasetType at least into GmsGraphQLEngine and populate it the + // first time we get a query instead of on every query + AspectMappingRegistry aspectMappingRegistry = + new AspectMappingRegistry(context.getDataFetchingEnvironment().getGraphQLSchema()); + aspectsToResolve = + aspectMappingRegistry.getRequiredAspects( + "Dataset", context.getDataFetchingEnvironment().getSelectionSet().getFields()); + } + + if (aspectsToResolve != null) { + // always include the key aspect if we are selecting specific assets + aspectsToResolve.add(KEY_ASPECT); } final Map datasetMap = @@ -158,7 +157,7 @@ public List> batchLoad( context.getOperationContext(), Constants.DATASET_ENTITY_NAME, new HashSet<>(urns), - ASPECTS_TO_RESOLVE); + aspectsToResolve); final List gmsResults = new ArrayList<>(urnStrs.size()); for (Urn urn : urns) { @@ -343,125 +342,4 @@ private DisjunctivePrivilegeGroup getAuthorizedPrivileges(final DatasetUpdateInp return new DisjunctivePrivilegeGroup( ImmutableList.of(allPrivilegesGroup, specificPrivilegeGroup)); } - - /** - * Maps GraphQL requested fields to the corresponding aspects that need to be fetched. - * This enables performance optimization by only fetching the aspects needed for the requested fields. - * - * @param requestedFields Set of field names requested in the GraphQL query - * @return Set of aspect names that should be fetched - */ - private Set determineAspectsFromRequestedFields(Set requestedFields) { - Set aspectsToResolve = new HashSet<>(); - - // Always include basic aspects - aspectsToResolve.add(DATASET_KEY_ASPECT_NAME); - aspectsToResolve.add(STATUS_ASPECT_NAME); - - // Map specific fields to their corresponding aspects - for (String field : requestedFields) { - switch (field) { - case "properties": - aspectsToResolve.add(DATASET_PROPERTIES_ASPECT_NAME); - break; - case "editableProperties": - aspectsToResolve.add(EDITABLE_DATASET_PROPERTIES_ASPECT_NAME); - break; - case "deprecation": - aspectsToResolve.add(DATASET_DEPRECATION_ASPECT_NAME); - aspectsToResolve.add(DEPRECATION_ASPECT_NAME); - break; - case "upstream": - case "lineage": - aspectsToResolve.add(DATASET_UPSTREAM_LINEAGE_ASPECT_NAME); - aspectsToResolve.add(UPSTREAM_LINEAGE_ASPECT_NAME); - break; - case "schemaMetadata": - aspectsToResolve.add(SCHEMA_METADATA_ASPECT_NAME); - break; - case "editableSchemaMetadata": - aspectsToResolve.add(EDITABLE_SCHEMA_METADATA_ASPECT_NAME); - break; - case "viewProperties": - aspectsToResolve.add(VIEW_PROPERTIES_ASPECT_NAME); - break; - case "ownership": - aspectsToResolve.add(OWNERSHIP_ASPECT_NAME); - break; - case "institutionalMemory": - aspectsToResolve.add(INSTITUTIONAL_MEMORY_ASPECT_NAME); - break; - case "tags": - aspectsToResolve.add(GLOBAL_TAGS_ASPECT_NAME); - break; - case "glossaryTerms": - aspectsToResolve.add(GLOSSARY_TERMS_ASPECT_NAME); - break; - case "container": - aspectsToResolve.add(CONTAINER_ASPECT_NAME); - break; - case "domain": - aspectsToResolve.add(DOMAINS_ASPECT_NAME); - break; - case "dataPlatformInstance": - aspectsToResolve.add(DATA_PLATFORM_INSTANCE_ASPECT_NAME); - break; - case "siblings": - aspectsToResolve.add(SIBLINGS_ASPECT_NAME); - break; - case "embed": - aspectsToResolve.add(EMBED_ASPECT_NAME); - break; - case "dataProducts": - aspectsToResolve.add(DATA_PRODUCTS_ASPECT_NAME); - break; - case "browsePaths": - aspectsToResolve.add(BROWSE_PATHS_V2_ASPECT_NAME); - break; - case "access": - aspectsToResolve.add(ACCESS_ASPECT_NAME); - break; - case "structuredProperties": - aspectsToResolve.add(STRUCTURED_PROPERTIES_ASPECT_NAME); - break; - case "forms": - aspectsToResolve.add(FORMS_ASPECT_NAME); - break; - case "subTypes": - aspectsToResolve.add(SUB_TYPES_ASPECT_NAME); - break; - case "application": - aspectsToResolve.add(APPLICATION_MEMBERSHIP_ASPECT_NAME); - break; - case "versionProperties": - aspectsToResolve.add(VERSION_PROPERTIES_ASPECT_NAME); - break; - case "logicalParent": - aspectsToResolve.add(LOGICAL_PARENT_ASPECT_NAME); - break; - case "share": - aspectsToResolve.add(SHARE_ASPECT_NAME); - break; - case "origin": - aspectsToResolve.add(ORIGIN_ASPECT_NAME); - break; - case "documentation": - aspectsToResolve.add(DOCUMENTATION_ASPECT_NAME); - break; - case "lineageFeatures": - aspectsToResolve.add(LINEAGE_FEATURES_ASPECT_NAME); - break; - default: - // For unknown fields, don't add any specific aspects - break; - } - } - - // If no specific aspects were determined, fall back to all aspects - if (aspectsToResolve.size() <= 2) { // Only basic aspects were added - return ASPECTS_TO_RESOLVE; - } - - return aspectsToResolve; - } } diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index a792ea536addf..43368cc87de82 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -5,7 +5,7 @@ scalar Long directive @aspectMapping(aspects: [String!]!) on FIELD_DEFINITION # Optional: directive to explicitly mark fields that don't need aspects -directive @noAspect on FIELD_DEFINITION +directive @noAspects on FIELD_DEFINITION """ Root GraphQL API Schema @@ -1661,24 +1661,23 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { """ The primary key of the Dataset """ - urn: String! @noAspect + urn: String! @noAspects """ The standard Entity Type """ - type: EntityType! @noAspect + type: EntityType! @noAspects """ The timestamp for the last time this entity was ingested In order to fetch this we need to fetch other aspects """ - lastIngested: Long @noAspect + lastIngested: Long @noAspects """ Standardized platform urn where the dataset is defined """ platform: DataPlatform! @aspectMapping(aspects: ["datasetKey"]) # we also have custom graphql resolver for this - """ The parent container in which the entity resides """ @@ -1687,13 +1686,15 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { """ Recursively get the lineage of containers for this entity """ - parentContainers: ParentContainersResult @noAspect # parentContainers has own resolver - + parentContainers: ParentContainersResult @noAspects # parentContainers has own resolver """ Unique guid for dataset No longer to be used as the Dataset display name. Use properties.name instead """ - name: String! @aspectMapping(aspects: ["datasetProperties", "editableDatasetProperties", "datasetKey"]) + name: String! + @aspectMapping( + aspects: ["datasetProperties", "editableDatasetProperties", "datasetKey"] + ) """ An additional set of read only properties @@ -1703,7 +1704,8 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { """ An additional set of of read write properties """ - editableProperties: DatasetEditableProperties @aspectMapping(aspects: ["editableDatasetProperties"]) + editableProperties: DatasetEditableProperties + @aspectMapping(aspects: ["editableDatasetProperties"]) """ Ownership metadata of the dataset @@ -1713,22 +1715,24 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { """ The deprecation status of the dataset """ - deprecation: Deprecation @aspectMapping(aspects: ["datasetDeprecation", "deprecation"]) + deprecation: Deprecation + @aspectMapping(aspects: ["datasetDeprecation", "deprecation"]) """ References to internal resources related to the dataset """ - institutionalMemory: InstitutionalMemory @aspectMapping(aspects: ["institutionalMemory"]) + institutionalMemory: InstitutionalMemory + @aspectMapping(aspects: ["institutionalMemory"]) """ Schema metadata of the dataset, available by version number """ - schemaMetadata(version: Long): SchemaMetadata @noAspect # schemaMetadata uses separate AspectResolver - + schemaMetadata(version: Long): SchemaMetadata @noAspects # schemaMetadata uses separate AspectResolver """ Editable schema metadata of the dataset """ - editableSchemaMetadata: EditableSchemaMetadata @aspectMapping(aspects: ["editableSchemaMetadata"]) + editableSchemaMetadata: EditableSchemaMetadata + @aspectMapping(aspects: ["editableSchemaMetadata"]) """ Status of the Dataset @@ -1753,7 +1757,8 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { """ The specific instance of the data platform that this entity belongs to """ - dataPlatformInstance: DataPlatformInstance @aspectMapping(aspects: ["dataPlatformInstance"]) + dataPlatformInstance: DataPlatformInstance + @aspectMapping(aspects: ["dataPlatformInstance"]) """ The Domain associated with the Dataset @@ -1785,13 +1790,11 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { range: TimeRange startTimeMillis: Long timeZone: String - ): UsageQueryResult @noAspect # has own custom resolver - + ): UsageQueryResult @noAspects # has own custom resolver """ Experimental - Summary operational & usage statistics about a Dataset """ - statsSummary: DatasetStatsSummary @noAspect # has own custom resolver - + statsSummary: DatasetStatsSummary @noAspects # has own custom resolver """ Profile Stats resource that retrieves the events in a previous unit of time in descending order If no start or end time are provided, the most recent events will be returned @@ -1801,8 +1804,7 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { endTimeMillis: Long filter: FilterInput limit: Int - ): [DatasetProfile!] @noAspect # has own custom resolver - + ): [DatasetProfile!] @noAspects # has own custom resolver """ Operational events for an entity. """ @@ -1811,8 +1813,7 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { endTimeMillis: Long filter: FilterInput limit: Int - ): [Operation!] @noAspect # has own custom resolver - + ): [Operation!] @noAspects # has own custom resolver """ Assertions associated with the Dataset """ @@ -1820,23 +1821,20 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { start: Int count: Int includeSoftDeleted: Boolean - ): EntityAssertionsResult @noAspect # has own custom resolver - + ): EntityAssertionsResult @noAspects # has own custom resolver """ Edges extending from this entity """ - relationships(input: RelationshipsInput!): EntityRelationshipsResult @noAspect # has own custom resolver - + relationships(input: RelationshipsInput!): EntityRelationshipsResult + @noAspects # has own custom resolver """ Edges extending from this entity grouped by direction in the lineage graph """ - lineage(input: LineageInput!): EntityLineageResult @noAspect # has own custom resolver - + lineage(input: LineageInput!): EntityLineageResult @noAspects # has own custom resolver """ The browse paths corresponding to the dataset. If no Browse Paths have been generated before, this will be null. """ - browsePaths: [BrowsePath!] @noAspect # has own custom resolver - + browsePaths: [BrowsePath!] @noAspects # has own custom resolver """ The browse path V2 corresponding to an entity. If no Browse Paths V2 have been generated before, this will be null. """ @@ -1845,12 +1843,13 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { """ Experimental! The resolved health statuses of the Dataset """ - health: [Health!] @noAspect # has own custom resolver - + health: [Health!] @noAspects # has own custom resolver """ Schema metadata of the dataset """ - schema: Schema @deprecated(reason: "Use `schemaMetadata`") @aspectMapping(aspects: ["schemaMetadata"]) + schema: Schema + @deprecated(reason: "Use `schemaMetadata`") + @aspectMapping(aspects: ["schemaMetadata"]) """ Deprecated, use properties field instead @@ -1863,7 +1862,9 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { Environment in which the dataset belongs to or where it was generated Note that this field will soon be deprecated in favor of a more standardized concept of Environment """ - origin: FabricType! @deprecated @aspectMapping(aspects: ["origin", "datasetKey"]) + origin: FabricType! + @deprecated + @aspectMapping(aspects: ["origin", "datasetKey"]) """ Deprecated, use the properties field instead @@ -1875,7 +1876,7 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { Deprecated, do not use this field The logical type of the dataset ie table, stream, etc """ - platformNativeType: PlatformNativeType @deprecated @noAspect + platformNativeType: PlatformNativeType @deprecated @noAspects """ Deprecated, use properties instead @@ -1904,8 +1905,7 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { Experimental API. For fetching extra entities that do not have custom UI code yet """ - aspects(input: AspectParams): [RawAspect!] @noAspect # uses custom resolver - + aspects(input: AspectParams): [RawAspect!] @noAspects # uses custom resolver """ History of datajob runs that either produced or consumed this dataset """ @@ -1913,8 +1913,7 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { start: Int count: Int direction: RelationshipDirection! - ): DataProcessInstanceResult @noAspect # uses custom resolver - + ): DataProcessInstanceResult @noAspects # uses custom resolver """ Metadata about the datasets siblings """ @@ -1923,34 +1922,33 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { """ Executes a search on only the siblings of an entity """ - siblingsSearch(input: ScrollAcrossEntitiesInput!): ScrollResults @noAspect # uses custom resolver - + siblingsSearch(input: ScrollAcrossEntitiesInput!): ScrollResults @noAspects # uses custom resolver """ Lineage information for the column-level. Includes a list of objects detailing which columns are upstream and which are downstream of each other. The upstream and downstream columns are from datasets. """ - fineGrainedLineages: [FineGrainedLineage!] @aspectMapping(aspects: ["upstreamLineage"]) + fineGrainedLineages: [FineGrainedLineage!] + @aspectMapping(aspects: ["upstreamLineage"]) """ Privileges given to a user relevant to this entity """ - privileges: EntityPrivileges @noAspect # uses custom resolver - + privileges: EntityPrivileges @noAspects # uses custom resolver """ Whether or not this entity exists on DataHub """ - exists: Boolean @noAspect # uses custom resolver - + exists: Boolean @noAspects # uses custom resolver """ Structured properties about this Dataset """ - structuredProperties: StructuredProperties @aspectMapping(aspects: ["structuredProperties"]) + structuredProperties: StructuredProperties + @aspectMapping(aspects: ["structuredProperties"]) """ Statistics about how this Dataset has been operated on """ - operationsStats(input: OperationsStatsInput): OperationsQueryResult @noAspect # uses custom resolver + operationsStats(input: OperationsStatsInput): OperationsQueryResult @noAspects # uses custom resolver } type RoleAssociation { diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java index 9f1d9225ade93..151a49ab3449e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java @@ -296,6 +296,9 @@ protected GraphQLEngine graphQLEngine( args.setMetricUtils(metricUtils); args.setS3Util(s3Util); + // I need to run builder().build() to get the GraphQL schema + // Once it's built I can't add another argument. Could I build it, create AspectMappingRegistry + // with the schema, then set it in args and build again and return? Seems weird.. return new GmsGraphQLEngine(args).builder().build(); } From 3fde3b2da175cb8f4dc4426aa48c22c2e4081da5 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Mon, 6 Oct 2025 16:53:35 -0400 Subject: [PATCH 3/8] add comments explaining what's broken so far --- .../datahub/graphql/AspectMappingRegistry.java | 5 +++++ .../graphql/types/corpuser/CorpUserType.java | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java index 0441a15b41739..066f3b4063f5f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java @@ -80,6 +80,11 @@ public Set getRequiredAspects( for (graphql.schema.SelectedField field : requestedFields) { // Skip introspection and nested fields (with level > 2 since top level fields are level 2 ie. // Dataset.urn) + // okay this doesn't actually work with nested fields we need for other resolvers.. will need + // to come back to this. + // for example a query with ownership on a dataset with user fields will look like this for + // the requestedFields: + // Dataset.ownership/Ownership.owners/Owner.owner/CorpUser.properties/CorpUserProperties.displayName String fieldName = field.getName(); if (fieldName.startsWith("__") || field.getLevel() > 2) { continue; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java index 34fb4b1f1c0e6..02d044d4cf217 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java @@ -13,6 +13,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.StringArray; +import com.linkedin.datahub.graphql.AspectMappingRegistry; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; @@ -46,6 +47,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import javax.annotation.Nonnull; @@ -84,6 +86,22 @@ public List> batchLoad( final List corpUserUrns = urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + Set aspectsToResolve = null; + if (context.getDataFetchingEnvironment() != null) { + // I can move this out of DatasetType at least into GmsGraphQLEngine and populate it the + // first time we get a query instead of on every query + AspectMappingRegistry aspectMappingRegistry = + new AspectMappingRegistry(context.getDataFetchingEnvironment().getGraphQLSchema()); + aspectsToResolve = + aspectMappingRegistry.getRequiredAspects( + "CorpUser", context.getDataFetchingEnvironment().getSelectionSet().getFields()); + } + + System.out.println("~~~~~~~~~~~~~~~~~~~~aspectsToResolve - corpuser~~~~~~~~~~~~~~~~~~~~"); + System.out.println(context.getDataFetchingEnvironment().getSelectionSet().getFields()); + System.out.println(aspectsToResolve); + System.out.println("~~~~~~~~~~~~~~~~~~~~aspectsToResolve - corpuser~~~~~~~~~~~~~~~~~~~~"); + final Map corpUserMap = _entityClient.batchGetV2( context.getOperationContext(), From 4e25e68bfe287e71bcb56c279a0081c045c9cfd7 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Tue, 11 Nov 2025 11:09:46 -0500 Subject: [PATCH 4/8] add change for nested fields - still WIP --- .../graphql/AspectMappingRegistry.java | 26 ++++++++++++------- .../graphql/types/dataset/DatasetType.java | 22 +++++++++++----- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java index 066f3b4063f5f..e6d8d01ca7468 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java @@ -71,6 +71,10 @@ private void buildMappingFromSchema(GraphQLSchema schema) { /** * Get required aspects for the given fields on a type. Returns null if any field is unmapped * (fallback to all aspects). + * + *

This method filters the selection set to only include fields that directly belong to the + * specified type, regardless of where that type appears in the query tree. This allows it to work + * correctly for both top-level queries and nested entities (e.g., Dataset inside SearchResult). */ @Nullable public Set getRequiredAspects( @@ -78,15 +82,17 @@ public Set getRequiredAspects( Set aspects = new HashSet<>(); for (graphql.schema.SelectedField field : requestedFields) { - // Skip introspection and nested fields (with level > 2 since top level fields are level 2 ie. - // Dataset.urn) - // okay this doesn't actually work with nested fields we need for other resolvers.. will need - // to come back to this. - // for example a query with ownership on a dataset with user fields will look like this for - // the requestedFields: - // Dataset.ownership/Ownership.owners/Owner.owner/CorpUser.properties/CorpUserProperties.displayName String fieldName = field.getName(); - if (fieldName.startsWith("__") || field.getLevel() > 2) { + + // Skip introspection fields + if (fieldName.startsWith("__")) { + continue; + } + + // Only process fields that belong to the target type + // getObjectTypeNames() returns the set of types this field belongs to (accounting for + // interfaces/unions) + if (!field.getObjectTypeNames().contains(typeName)) { continue; } @@ -95,14 +101,16 @@ public Set getRequiredAspects( if (fieldAspects != null) { aspects.addAll(fieldAspects); + log.debug("Field {} mapped to aspects: {}", key, fieldAspects); } else { - // Unmapped field - fallback to all aspects + // Unmapped field - fallback to all aspects to be conservative log.debug( "Field {} has no @aspectMapping or @noAspects directives, will fetch all aspects", key); return null; } } + log.debug("Computed required aspects for {}: {}", typeName, aspects); return aspects.isEmpty() ? Collections.emptySet() : aspects; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index b3e99934d5007..8f8e21b65642f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -49,6 +49,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -56,7 +57,9 @@ import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; +@Slf4j public class DatasetType implements SearchableEntityType, BrowsableEntityType, @@ -138,18 +141,23 @@ public List> batchLoad( Set aspectsToResolve = ASPECTS_TO_RESOLVE; if (context.getDataFetchingEnvironment() != null) { - // I can move this out of DatasetType at least into GmsGraphQLEngine and populate it the - // first time we get a query instead of on every query + // TODO: Optimize by creating AspectMappingRegistry once and reusing it AspectMappingRegistry aspectMappingRegistry = new AspectMappingRegistry(context.getDataFetchingEnvironment().getGraphQLSchema()); - aspectsToResolve = + Set requiredAspects = aspectMappingRegistry.getRequiredAspects( "Dataset", context.getDataFetchingEnvironment().getSelectionSet().getFields()); - } - if (aspectsToResolve != null) { - // always include the key aspect if we are selecting specific assets - aspectsToResolve.add(KEY_ASPECT); + if (requiredAspects != null) { + // Successfully determined required aspects - only fetch what's needed + aspectsToResolve = new HashSet<>(requiredAspects); + // Always include the key aspect + aspectsToResolve.add(KEY_ASPECT); + } else { + // Could not determine required aspects (unmapped field) - fetch all aspects to be safe + log.debug( + "Could not determine required aspects for Dataset, falling back to fetching all aspects"); + } } final Map datasetMap = From a6c6436891c52fdf6a9e8178a62cc049e671fc43 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Tue, 11 Nov 2025 12:31:04 -0500 Subject: [PATCH 5/8] fix bug with nested fields --- .../resolvers/load/EntityTypeResolver.java | 7 ++++++ .../graphql/types/corpuser/CorpUserType.java | 16 ------------ .../graphql/types/dataset/DatasetType.java | 25 +++++++++++++------ 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityTypeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityTypeResolver.java index 3c285f30661bc..6b799ff36d7bb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityTypeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityTypeResolver.java @@ -2,6 +2,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; +import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.Entity; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -44,6 +45,12 @@ private boolean isOnlySelectingIdentityFields(DataFetchingEnvironment environmen @Override public CompletableFuture get(DataFetchingEnvironment environment) { + // Set the DataFetchingEnvironment in the QueryContext for access in batchLoad methods + QueryContext context = environment.getContext(); + if (context != null) { + context.setDataFetchingEnvironment(environment); + } + final Entity resolvedEntity = _entityProvider.apply(environment); if (resolvedEntity == null) { return CompletableFuture.completedFuture(null); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java index 02d044d4cf217..b181e7d3e7a11 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java @@ -86,22 +86,6 @@ public List> batchLoad( final List corpUserUrns = urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); - Set aspectsToResolve = null; - if (context.getDataFetchingEnvironment() != null) { - // I can move this out of DatasetType at least into GmsGraphQLEngine and populate it the - // first time we get a query instead of on every query - AspectMappingRegistry aspectMappingRegistry = - new AspectMappingRegistry(context.getDataFetchingEnvironment().getGraphQLSchema()); - aspectsToResolve = - aspectMappingRegistry.getRequiredAspects( - "CorpUser", context.getDataFetchingEnvironment().getSelectionSet().getFields()); - } - - System.out.println("~~~~~~~~~~~~~~~~~~~~aspectsToResolve - corpuser~~~~~~~~~~~~~~~~~~~~"); - System.out.println(context.getDataFetchingEnvironment().getSelectionSet().getFields()); - System.out.println(aspectsToResolve); - System.out.println("~~~~~~~~~~~~~~~~~~~~aspectsToResolve - corpuser~~~~~~~~~~~~~~~~~~~~"); - final Map corpUserMap = _entityClient.batchGetV2( context.getOperationContext(), diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index 8f8e21b65642f..c0d147c2d266e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -102,6 +102,9 @@ public class DatasetType private static final String ENTITY_NAME = "dataset"; private static final String KEY_ASPECT = "datasetKey"; + // Cache the AspectMappingRegistry to avoid recreating it on every request + private static volatile AspectMappingRegistry cachedAspectMappingRegistry = null; + private final EntityClient entityClient; public DatasetType(final EntityClient entityClient) { @@ -141,11 +144,20 @@ public List> batchLoad( Set aspectsToResolve = ASPECTS_TO_RESOLVE; if (context.getDataFetchingEnvironment() != null) { - // TODO: Optimize by creating AspectMappingRegistry once and reusing it - AspectMappingRegistry aspectMappingRegistry = - new AspectMappingRegistry(context.getDataFetchingEnvironment().getGraphQLSchema()); + // Lazily initialize and cache the AspectMappingRegistry + if (cachedAspectMappingRegistry == null) { + synchronized (DatasetType.class) { + if (cachedAspectMappingRegistry == null) { + cachedAspectMappingRegistry = + new AspectMappingRegistry( + context.getDataFetchingEnvironment().getGraphQLSchema()); + log.info("Initialized AspectMappingRegistry for Dataset aspect optimization"); + } + } + } + Set requiredAspects = - aspectMappingRegistry.getRequiredAspects( + cachedAspectMappingRegistry.getRequiredAspects( "Dataset", context.getDataFetchingEnvironment().getSelectionSet().getFields()); if (requiredAspects != null) { @@ -153,10 +165,7 @@ public List> batchLoad( aspectsToResolve = new HashSet<>(requiredAspects); // Always include the key aspect aspectsToResolve.add(KEY_ASPECT); - } else { - // Could not determine required aspects (unmapped field) - fetch all aspects to be safe - log.debug( - "Could not determine required aspects for Dataset, falling back to fetching all aspects"); + log.info("Fetching optimized aspect set for Dataset: {}", aspectsToResolve); } } From b49d8346fce4f88417035913356219b6808d57f7 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Tue, 11 Nov 2025 12:50:28 -0500 Subject: [PATCH 6/8] create aspect mapping registry once on boot up --- .../datahub/graphql/QueryContext.java | 17 ++++++++++ .../graphql/types/dataset/DatasetType.java | 31 ++++++++----------- .../factory/graphql/GraphQLEngineFactory.java | 26 +++++++++++++--- .../datahub/graphql/GraphQLController.java | 4 +++ .../datahub/graphql/SpringQueryContext.java | 17 ++++++++-- 5 files changed, 71 insertions(+), 24 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java index c24f8ba4e03e6..ef8134d73045d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java @@ -53,4 +53,21 @@ default String getActorUrn() { * @param environment the DataFetchingEnvironment to associate with this context */ void setDataFetchingEnvironment(@Nullable DataFetchingEnvironment environment); + + /** + * Returns the {@link AspectMappingRegistry} for optimizing aspect fetching based on GraphQL + * field selections. + * + * @return the AspectMappingRegistry, or null if not available + */ + @Nullable + AspectMappingRegistry getAspectMappingRegistry(); + + /** + * Sets the {@link AspectMappingRegistry} for this context. This is typically called during + * context initialization. + * + * @param aspectMappingRegistry the AspectMappingRegistry to use + */ + void setAspectMappingRegistry(@Nullable AspectMappingRegistry aspectMappingRegistry); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index c0d147c2d266e..e5cb4c56d354b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -102,9 +102,6 @@ public class DatasetType private static final String ENTITY_NAME = "dataset"; private static final String KEY_ASPECT = "datasetKey"; - // Cache the AspectMappingRegistry to avoid recreating it on every request - private static volatile AspectMappingRegistry cachedAspectMappingRegistry = null; - private final EntityClient entityClient; public DatasetType(final EntityClient entityClient) { @@ -143,22 +140,14 @@ public List> batchLoad( final List urns = urnStrs.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); Set aspectsToResolve = ASPECTS_TO_RESOLVE; - if (context.getDataFetchingEnvironment() != null) { - // Lazily initialize and cache the AspectMappingRegistry - if (cachedAspectMappingRegistry == null) { - synchronized (DatasetType.class) { - if (cachedAspectMappingRegistry == null) { - cachedAspectMappingRegistry = - new AspectMappingRegistry( - context.getDataFetchingEnvironment().getGraphQLSchema()); - log.info("Initialized AspectMappingRegistry for Dataset aspect optimization"); - } - } - } - + // Use the AspectMappingRegistry from the context to determine required aspects + if (context.getDataFetchingEnvironment() != null + && context.getAspectMappingRegistry() != null) { Set requiredAspects = - cachedAspectMappingRegistry.getRequiredAspects( - "Dataset", context.getDataFetchingEnvironment().getSelectionSet().getFields()); + context + .getAspectMappingRegistry() + .getRequiredAspects( + "Dataset", context.getDataFetchingEnvironment().getSelectionSet().getFields()); if (requiredAspects != null) { // Successfully determined required aspects - only fetch what's needed @@ -166,7 +155,13 @@ public List> batchLoad( // Always include the key aspect aspectsToResolve.add(KEY_ASPECT); log.info("Fetching optimized aspect set for Dataset: {}", aspectsToResolve); + } else { + log.debug( + "Could not determine required aspects for Dataset, falling back to fetching all aspects"); } + } else { + log.debug( + "DataFetchingEnvironment or AspectMappingRegistry not available, fetching all aspects for Dataset"); } final Map datasetMap = diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java index 151a49ab3449e..ccaa2a86131c5 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java @@ -6,6 +6,7 @@ import com.datahub.authentication.token.StatefulTokenService; import com.datahub.authentication.user.NativeUserService; import com.datahub.authorization.role.RoleService; +import com.linkedin.datahub.graphql.AspectMappingRegistry; import com.linkedin.datahub.graphql.GmsGraphQLEngine; import com.linkedin.datahub.graphql.GmsGraphQLEngineArgs; import com.linkedin.datahub.graphql.GraphQLEngine; @@ -296,10 +297,27 @@ protected GraphQLEngine graphQLEngine( args.setMetricUtils(metricUtils); args.setS3Util(s3Util); - // I need to run builder().build() to get the GraphQL schema - // Once it's built I can't add another argument. Could I build it, create AspectMappingRegistry - // with the schema, then set it in args and build again and return? Seems weird.. - return new GmsGraphQLEngine(args).builder().build(); + // Create the GmsGraphQLEngine and build the GraphQL schema + GmsGraphQLEngine gmsGraphQLEngine = new GmsGraphQLEngine(args); + GraphQLEngine graphQLEngine = gmsGraphQLEngine.builder().build(); + + // Create the AspectMappingRegistry with the built schema + // This enables entity types to optimize aspect fetching based on GraphQL field selections + this.aspectMappingRegistry = + new AspectMappingRegistry(graphQLEngine.getGraphQL().getGraphQLSchema()); + + return graphQLEngine; + } + + // Store the AspectMappingRegistry to expose it as a bean + private AspectMappingRegistry aspectMappingRegistry; + + /** + * Provides the AspectMappingRegistry bean for use in resolvers and entity types. + */ + @Bean(name = "aspectMappingRegistry") + protected AspectMappingRegistry aspectMappingRegistry() { + return this.aspectMappingRegistry; } @Bean(name = "graphQLWorkerPool") diff --git a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java index e1e233f31aefd..31ef8b7ac310a 100644 --- a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java +++ b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java @@ -54,6 +54,8 @@ public class GraphQLController { @Inject MetricUtils metricUtils; + @Inject com.linkedin.datahub.graphql.AspectMappingRegistry aspectMappingRegistry; + @Nonnull @Inject @Named("systemOperationContext") @@ -130,6 +132,8 @@ CompletableFuture> postGraphQL( operationName, query, variables); + // Set the AspectMappingRegistry for aspect-level optimizations + context.setAspectMappingRegistry(aspectMappingRegistry); Span.current().setAttribute(ACTOR_URN_ATTR, context.getActorUrn()); final String threadName = Thread.currentThread().getName(); diff --git a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java index c3ffcb5834946..d33312faa1c0d 100644 --- a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java +++ b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java @@ -2,6 +2,7 @@ import com.datahub.authentication.Authentication; import com.datahub.plugins.auth.authorization.Authorizer; +import com.linkedin.datahub.graphql.AspectMappingRegistry; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.metadata.config.DataHubAppConfiguration; import graphql.language.OperationDefinition; @@ -24,9 +25,10 @@ public class SpringQueryContext implements QueryContext { @Getter private final String queryName; @Nonnull private final OperationContext operationContext; @Nonnull private final DataHubAppConfiguration dataHubAppConfig; - - // Mutable field for DataFetchingEnvironment + + // Mutable fields for request-scoped data @Nullable private DataFetchingEnvironment dataFetchingEnvironment; + @Nullable private AspectMappingRegistry aspectMappingRegistry; public SpringQueryContext( final boolean isAuthenticated, @@ -78,4 +80,15 @@ public DataFetchingEnvironment getDataFetchingEnvironment() { public void setDataFetchingEnvironment(@Nullable DataFetchingEnvironment environment) { this.dataFetchingEnvironment = environment; } + + @Override + @Nullable + public AspectMappingRegistry getAspectMappingRegistry() { + return aspectMappingRegistry; + } + + @Override + public void setAspectMappingRegistry(@Nullable AspectMappingRegistry aspectMappingRegistry) { + this.aspectMappingRegistry = aspectMappingRegistry; + } } From b4dc7d65c4da74097f59d665e49ed5245a731728 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Tue, 11 Nov 2025 13:47:40 -0500 Subject: [PATCH 7/8] refactor getting optmized aspects for reuse --- .../graphql/AspectMappingRegistry.java | 14 ++++ .../graphql/types/dataset/DatasetType.java | 31 ++------ .../datahub/graphql/util/AspectUtils.java | 74 +++++++++++++++++++ 3 files changed, 93 insertions(+), 26 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/util/AspectUtils.java diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java index e6d8d01ca7468..663050c6ba8bd 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java @@ -11,6 +11,20 @@ import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; +/** + * Maps GraphQL field selections to the minimum set of aspects needed to resolve them, enabling + * performance optimization by fetching only required aspects instead of all aspects. + * + * This class scans the GraphQL schema for two directives: + * - @aspectMapping(aspects: ["aspectName"]) - declares which aspects a field needs + * - @noAspects - indicates a field needs no aspects (computed fields, custom resolvers) + * + * To use in entity types, add one line to batchLoad: + * Set aspects = AspectUtils.getOptimizedAspects(context, "Dataset", ALL_ASPECTS, "datasetKey"); + * + * If any field lacks a mapping directive, getRequiredAspects returns null and the entity type falls + * back to fetching all aspects for safety. + */ @Slf4j public class AspectMappingRegistry { private final Map> fieldToAspects = new HashMap<>(); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index e5cb4c56d354b..e6bb594376d84 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -11,9 +11,9 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.StringArray; -import com.linkedin.datahub.graphql.AspectMappingRegistry; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.util.AspectUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AutoCompleteResults; import com.linkedin.datahub.graphql.generated.BatchDatasetUpdateInput; @@ -138,31 +138,10 @@ public List> batchLoad( @Nonnull final List urnStrs, @Nonnull final QueryContext context) { try { final List urns = urnStrs.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); - Set aspectsToResolve = ASPECTS_TO_RESOLVE; - - // Use the AspectMappingRegistry from the context to determine required aspects - if (context.getDataFetchingEnvironment() != null - && context.getAspectMappingRegistry() != null) { - Set requiredAspects = - context - .getAspectMappingRegistry() - .getRequiredAspects( - "Dataset", context.getDataFetchingEnvironment().getSelectionSet().getFields()); - - if (requiredAspects != null) { - // Successfully determined required aspects - only fetch what's needed - aspectsToResolve = new HashSet<>(requiredAspects); - // Always include the key aspect - aspectsToResolve.add(KEY_ASPECT); - log.info("Fetching optimized aspect set for Dataset: {}", aspectsToResolve); - } else { - log.debug( - "Could not determine required aspects for Dataset, falling back to fetching all aspects"); - } - } else { - log.debug( - "DataFetchingEnvironment or AspectMappingRegistry not available, fetching all aspects for Dataset"); - } + + // Determine optimal aspects to fetch based on GraphQL field selections + Set aspectsToResolve = + AspectUtils.getOptimizedAspects(context, "Dataset", ASPECTS_TO_RESOLVE, KEY_ASPECT); final Map datasetMap = entityClient.batchGetV2( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/util/AspectUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/util/AspectUtils.java new file mode 100644 index 0000000000000..8e9c4ab800d23 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/util/AspectUtils.java @@ -0,0 +1,74 @@ +package com.linkedin.datahub.graphql.util; + +import com.linkedin.datahub.graphql.QueryContext; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +/** + * Utility methods for optimizing aspect fetching in GraphQL entity types by determining which + * aspects need to be fetched based on requested fields. + */ +@Slf4j +public class AspectUtils { + + private AspectUtils() {} + + /** + * Determines optimal aspects to fetch based on GraphQL field selections. Falls back to + * defaultAspects if optimization isn't possible (missing registry, unmapped fields, etc). + * + * Usage in entity type batchLoad: + * Set aspects = AspectUtils.getOptimizedAspects(context, "Dataset", ALL_ASPECTS, "datasetKey"); + * + * @param context the QueryContext containing AspectMappingRegistry and DataFetchingEnvironment + * @param entityTypeName the GraphQL type name (e.g., "Dataset", "CorpUser") + * @param defaultAspects the full set of aspects to use as fallback + * @param alwaysIncludeAspects aspects to always include (e.g., key aspects) + * @return optimized aspect set, or defaultAspects if optimization isn't possible + */ + @Nonnull + public static Set getOptimizedAspects( + @Nonnull final QueryContext context, + @Nonnull final String entityTypeName, + @Nonnull final Set defaultAspects, + @Nonnull final String... alwaysIncludeAspects) { + + // Check if we have the necessary context for optimization + if (context.getDataFetchingEnvironment() == null + || context.getAspectMappingRegistry() == null) { + log.debug( + "DataFetchingEnvironment or AspectMappingRegistry not available for {}, fetching all aspects", + entityTypeName); + return defaultAspects; + } + + // Attempt to determine required aspects from GraphQL field selections + Set requiredAspects = + context + .getAspectMappingRegistry() + .getRequiredAspects( + entityTypeName, context.getDataFetchingEnvironment().getSelectionSet().getFields()); + + // If we couldn't determine required aspects (e.g., unmapped field), fall back to all aspects + if (requiredAspects == null) { + log.debug( + "Could not determine required aspects for {}, falling back to fetching all aspects", + entityTypeName); + return defaultAspects; + } + + // Successfully optimized - build the minimal aspect set + Set optimizedAspects = new HashSet<>(requiredAspects); + + // Add any aspects that should always be included + if (alwaysIncludeAspects != null && alwaysIncludeAspects.length > 0) { + Collections.addAll(optimizedAspects, alwaysIncludeAspects); + } + + log.info("Fetching optimized aspect set for {}: {}", entityTypeName, optimizedAspects); + return optimizedAspects; + } +} From 0d215dec1c18d9de43923db49402f5de4a0b1ae1 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Tue, 11 Nov 2025 13:58:33 -0500 Subject: [PATCH 8/8] lint --- .../datahub/graphql/AspectMappingRegistry.java | 14 +++++++------- .../com/linkedin/datahub/graphql/QueryContext.java | 4 ++-- .../graphql/types/corpuser/CorpUserType.java | 2 -- .../datahub/graphql/types/dataset/DatasetType.java | 2 +- .../linkedin/datahub/graphql/util/AspectUtils.java | 4 ++-- .../gms/factory/graphql/GraphQLEngineFactory.java | 4 +--- 6 files changed, 13 insertions(+), 17 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java index 663050c6ba8bd..f0a3c65207580 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/AspectMappingRegistry.java @@ -15,15 +15,15 @@ * Maps GraphQL field selections to the minimum set of aspects needed to resolve them, enabling * performance optimization by fetching only required aspects instead of all aspects. * - * This class scans the GraphQL schema for two directives: - * - @aspectMapping(aspects: ["aspectName"]) - declares which aspects a field needs - * - @noAspects - indicates a field needs no aspects (computed fields, custom resolvers) + *

This class scans the GraphQL schema for two directives: - @aspectMapping(aspects: + * ["aspectName"]) - declares which aspects a field needs - @noAspects - indicates a field needs no + * aspects (computed fields, custom resolvers) * - * To use in entity types, add one line to batchLoad: - * Set aspects = AspectUtils.getOptimizedAspects(context, "Dataset", ALL_ASPECTS, "datasetKey"); + *

To use in entity types, add one line to batchLoad: Set aspects = + * AspectUtils.getOptimizedAspects(context, "Dataset", ALL_ASPECTS, "datasetKey"); * - * If any field lacks a mapping directive, getRequiredAspects returns null and the entity type falls - * back to fetching all aspects for safety. + *

If any field lacks a mapping directive, getRequiredAspects returns null and the entity type + * falls back to fetching all aspects for safety. */ @Slf4j public class AspectMappingRegistry { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java index ef8134d73045d..aaa14581b2d09 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java @@ -55,8 +55,8 @@ default String getActorUrn() { void setDataFetchingEnvironment(@Nullable DataFetchingEnvironment environment); /** - * Returns the {@link AspectMappingRegistry} for optimizing aspect fetching based on GraphQL - * field selections. + * Returns the {@link AspectMappingRegistry} for optimizing aspect fetching based on GraphQL field + * selections. * * @return the AspectMappingRegistry, or null if not available */ diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java index b181e7d3e7a11..34fb4b1f1c0e6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java @@ -13,7 +13,6 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.StringArray; -import com.linkedin.datahub.graphql.AspectMappingRegistry; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; @@ -47,7 +46,6 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import javax.annotation.Nonnull; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index e6bb594376d84..4ff992095e9da 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -13,7 +13,6 @@ import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; -import com.linkedin.datahub.graphql.util.AspectUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AutoCompleteResults; import com.linkedin.datahub.graphql.generated.BatchDatasetUpdateInput; @@ -35,6 +34,7 @@ import com.linkedin.datahub.graphql.types.mappers.BrowsePathsMapper; import com.linkedin.datahub.graphql.types.mappers.BrowseResultMapper; import com.linkedin.datahub.graphql.types.mappers.UrnSearchResultsMapper; +import com.linkedin.datahub.graphql.util.AspectUtils; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/util/AspectUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/util/AspectUtils.java index 8e9c4ab800d23..c6d728ae1d2fa 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/util/AspectUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/util/AspectUtils.java @@ -20,8 +20,8 @@ private AspectUtils() {} * Determines optimal aspects to fetch based on GraphQL field selections. Falls back to * defaultAspects if optimization isn't possible (missing registry, unmapped fields, etc). * - * Usage in entity type batchLoad: - * Set aspects = AspectUtils.getOptimizedAspects(context, "Dataset", ALL_ASPECTS, "datasetKey"); + *

Usage in entity type batchLoad: Set aspects = + * AspectUtils.getOptimizedAspects(context, "Dataset", ALL_ASPECTS, "datasetKey"); * * @param context the QueryContext containing AspectMappingRegistry and DataFetchingEnvironment * @param entityTypeName the GraphQL type name (e.g., "Dataset", "CorpUser") diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java index ccaa2a86131c5..3b076eb5813ae 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java @@ -312,9 +312,7 @@ protected GraphQLEngine graphQLEngine( // Store the AspectMappingRegistry to expose it as a bean private AspectMappingRegistry aspectMappingRegistry; - /** - * Provides the AspectMappingRegistry bean for use in resolvers and entity types. - */ + /** Provides the AspectMappingRegistry bean for use in resolvers and entity types. */ @Bean(name = "aspectMappingRegistry") protected AspectMappingRegistry aspectMappingRegistry() { return this.aspectMappingRegistry;