Skip to content

Commit 089dcfa

Browse files
author
AWS
committed
AWS Glue Update: Added support for preprocessing queries in Data Quality operations through new DataQualityGlueTable structure.
1 parent eb9c6d6 commit 089dcfa

File tree

3 files changed

+53
-47
lines changed

3 files changed

+53
-47
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"type": "feature",
3+
"category": "AWS Glue",
4+
"contributor": "",
5+
"description": "Added support for preprocessing queries in Data Quality operations through new DataQualityGlueTable structure."
6+
}

services/glue/src/main/resources/codegen-resources/endpoint-tests.json

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -494,17 +494,6 @@
494494
"UseDualStack": true
495495
}
496496
},
497-
{
498-
"documentation": "For region us-iso-east-1 with FIPS enabled and DualStack enabled",
499-
"expect": {
500-
"error": "FIPS and DualStack are enabled, but this partition does not support one or both"
501-
},
502-
"params": {
503-
"Region": "us-iso-east-1",
504-
"UseFIPS": true,
505-
"UseDualStack": true
506-
}
507-
},
508497
{
509498
"documentation": "For region us-iso-east-1 with FIPS enabled and DualStack disabled",
510499
"expect": {
@@ -518,17 +507,6 @@
518507
"UseDualStack": false
519508
}
520509
},
521-
{
522-
"documentation": "For region us-iso-east-1 with FIPS disabled and DualStack enabled",
523-
"expect": {
524-
"error": "DualStack is enabled but this partition does not support DualStack"
525-
},
526-
"params": {
527-
"Region": "us-iso-east-1",
528-
"UseFIPS": false,
529-
"UseDualStack": true
530-
}
531-
},
532510
{
533511
"documentation": "For region us-iso-east-1 with FIPS disabled and DualStack disabled",
534512
"expect": {
@@ -542,17 +520,6 @@
542520
"UseDualStack": false
543521
}
544522
},
545-
{
546-
"documentation": "For region us-isob-east-1 with FIPS enabled and DualStack enabled",
547-
"expect": {
548-
"error": "FIPS and DualStack are enabled, but this partition does not support one or both"
549-
},
550-
"params": {
551-
"Region": "us-isob-east-1",
552-
"UseFIPS": true,
553-
"UseDualStack": true
554-
}
555-
},
556523
{
557524
"documentation": "For region us-isob-east-1 with FIPS enabled and DualStack disabled",
558525
"expect": {
@@ -566,17 +533,6 @@
566533
"UseDualStack": false
567534
}
568535
},
569-
{
570-
"documentation": "For region us-isob-east-1 with FIPS disabled and DualStack enabled",
571-
"expect": {
572-
"error": "DualStack is enabled but this partition does not support DualStack"
573-
},
574-
"params": {
575-
"Region": "us-isob-east-1",
576-
"UseFIPS": false,
577-
"UseDualStack": true
578-
}
579-
},
580536
{
581537
"documentation": "For region us-isob-east-1 with FIPS disabled and DualStack disabled",
582538
"expect": {

services/glue/src/main/resources/codegen-resources/service-2.json

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@
268268
{"shape":"InternalServiceException"},
269269
{"shape":"ResourceNumberLimitExceededException"}
270270
],
271-
"documentation":"<p>Annotate datapoints over time for a specific data quality statistic.</p>"
271+
"documentation":"<p>Annotate datapoints over time for a specific data quality statistic. The API requires both profileID and statisticID as part of the InclusionAnnotation input. The API only works for a single statisticId across multiple profiles.</p>"
272272
},
273273
"BatchStopJobRun":{
274274
"name":"BatchStopJobRun",
@@ -5599,7 +5599,7 @@
55995599
"members":{
56005600
"InclusionAnnotations":{
56015601
"shape":"InclusionAnnotationList",
5602-
"documentation":"<p>A list of <code>DatapointInclusionAnnotation</code>'s.</p>"
5602+
"documentation":"<p>A list of <code>DatapointInclusionAnnotation</code>'s. The InclusionAnnotations must contain a profileId and statisticId. If there are multiple InclusionAnnotations, the list must refer to a single statisticId across multiple profileIds.</p>"
56035603
},
56045604
"ClientToken":{
56055605
"shape":"HashString",
@@ -10731,6 +10731,40 @@
1073110731
},
1073210732
"documentation":"<p>Additional run options you can specify for an evaluation run.</p>"
1073310733
},
10734+
"DataQualityGlueTable":{
10735+
"type":"structure",
10736+
"required":[
10737+
"DatabaseName",
10738+
"TableName"
10739+
],
10740+
"members":{
10741+
"DatabaseName":{
10742+
"shape":"NameString",
10743+
"documentation":"<p>A database name in the Glue Data Catalog.</p>"
10744+
},
10745+
"TableName":{
10746+
"shape":"NameString",
10747+
"documentation":"<p>A table name in the Glue Data Catalog.</p>"
10748+
},
10749+
"CatalogId":{
10750+
"shape":"NameString",
10751+
"documentation":"<p>A unique identifier for the Glue Data Catalog.</p>"
10752+
},
10753+
"ConnectionName":{
10754+
"shape":"NameString",
10755+
"documentation":"<p>The name of the connection to the Glue Data Catalog.</p>"
10756+
},
10757+
"AdditionalOptions":{
10758+
"shape":"GlueTableAdditionalOptions",
10759+
"documentation":"<p>Additional options for the table. Currently there are two keys supported:</p> <ul> <li> <p> <code>pushDownPredicate</code>: to filter on partitions without having to list and read all the files in your dataset.</p> </li> <li> <p> <code>catalogPartitionPredicate</code>: to use server-side partition pruning using partition indexes in the Glue Data Catalog.</p> </li> </ul>"
10760+
},
10761+
"PreProcessingQuery":{
10762+
"shape":"PreProcessingQueryString",
10763+
"documentation":"<p>SQL Query of SparkSQL format that can be used to pre-process the data for the table in Glue Data Catalog, before running the Data Quality Operation.</p>"
10764+
}
10765+
},
10766+
"documentation":"<p>The database and table in the Glue Data Catalog that is used for input or output data for Data Quality Operations.</p>"
10767+
},
1073410768
"DataQualityMetricValues":{
1073510769
"type":"structure",
1073610770
"members":{
@@ -11172,11 +11206,14 @@
1117211206
},
1117311207
"DataSource":{
1117411208
"type":"structure",
11175-
"required":["GlueTable"],
1117611209
"members":{
1117711210
"GlueTable":{
1117811211
"shape":"GlueTable",
1117911212
"documentation":"<p>An Glue table.</p>"
11213+
},
11214+
"DataQualityGlueTable":{
11215+
"shape":"DataQualityGlueTable",
11216+
"documentation":"<p>An Glue table for Data Quality Operations.</p>"
1118011217
}
1118111218
},
1118211219
"documentation":"<p>A data source (an Glue table) for which you want data quality results.</p>"
@@ -21426,6 +21463,13 @@
2142621463
},
2142721464
"documentation":"<p>Specifies a target that uses Postgres SQL.</p>"
2142821465
},
21466+
"PreProcessingQueryString":{
21467+
"type":"string",
21468+
"documentation":"<p>SQL Query of SparkSQL format that can be used to pre-process data before running Data Quality Operations.</p>",
21469+
"max":51200,
21470+
"min":0,
21471+
"pattern":"[\\u0020-\\uD7FF\\uE000-\\uFFFD\\uD800\\uDC00-\\uDBFF\\uDFFF\\r\\n\\t]*"
21472+
},
2142921473
"Predecessor":{
2143021474
"type":"structure",
2143121475
"members":{

0 commit comments

Comments
 (0)