diff --git a/README.md b/README.md index d459563..a81bd1f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,51 @@ # Unity Catalog Setup -This repository is now deprecated, as Unity Catalog is now in GA. Customers should follow instructions from the [official documentation](https://docs.databricks.com/data-governance/unity-catalog/index.html) +[Unity Catalog](https://docs.databricks.com/data-governance/unity-catalog/index.html) is a unified governance solution for all data and AI assets including files, tables, machine learning models and dashboards in your lakehouse on any cloud. This repository includes resources and guides helpful for setting up Unity Catalog metastores. + +Customers should follow instructions from the [official documentation](https://docs.databricks.com/data-governance/unity-catalog/index.html) to [get started using Unity Catalog](https://docs.databricks.com/data-governance/unity-catalog/get-started.html). + +## Cloudformation + +To get started using Unity Catalog on AWS, you will need an S3 bucket and an IAM role for your first UC metastore. It is required to setup an S3 bucket to be the root storage location for managed tables in that metastore. You must also set up a cross-account trust relationship so that Unity Catalog can assume a role to access the data in the bucket on behalf of Databricks users. To help with this setup, we have provided two Cloudformation templates. + +1. **Without KMS** - [`unity-catalog-setup-template.json`](cloudformation/unity-catalog-template.json) - A template to create the S3 bucket, the IAM role, and the necessary IAM policy. S3 is configured to use server-side encryption using **S3-managed keys**, and _KMS permissions are **not** granted._ +2. **With KMS** - [`unity-catalog-setup-kms-template.json`](cloudformation/unity-catalog-kms-template.json) - A template to create the S3 bucket, the IAM role, and the necessary IAM policy. S3 is configured to use server-side encryption using **the provided KMS key**, and _KMS permissions are granted._ + +### Usage Examples + +#### Create a Stack Using the [AWS CLI](https://aws.amazon.com/cli/) + +_Prerequisite: [Install and configure the AWS command line interface](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html)_ + +**Without KMS** + +```bash +aws cloudformation create-stack \ + --stack-name "unity-catalog-stack" \ + --template-url https://raw.githubusercontent.com/databricks/unity-catalog-setup/main/cloudformation/unity-catalog-setup-template.json \ + --capabilities CAPABILITY_IAM \ + --parameters ParameterKey=DatabricksAccountID,ParameterValue=111122223333 \ + ParameterKey=MetastoreBucketName,ParameterValue=unity-catalog-bucket +``` + +**With KMS** + +```bash +aws cloudformation create-stack \ + --stack-name "unity-catalog-stack" \ + --template-url https://raw.githubusercontent.com/databricks/unity-catalog-setup/main/cloudformation/unity-catalog-setup-kms-template.json \ + --capabilities CAPABILITY_IAM \ + --parameters ParameterKey=DatabricksAccountID,ParameterValue=123412341234 \ + ParameterKey=MetastoreBucketName,ParameterValue=unity-catalog-bucket \ + ParameterKey=KMSKeyARN,ParameterValue=arn:aws:kms:us-east-1:111122223333:key/really-long-key-identifier +``` + +#### Create a Stack Using the AWS Console + +1. Log into your AWS console +2. Click into the Cloudformation console +3. Click "Create Stack" then "With new resources (standard)" +4. Paste a link to the raw Cloudformation template to use. E.g., [the template without KMS](https://raw.githubusercontent.com/databricks/unity-catalog-setup/main/cloudformation/unity-catalog-setup-template.json) +5. Follow the prompts on screen for stack name, `DatabricksAccountID`, and `MetastoreBucketName` (and `KMSKeyARN` if using KMS). + +![](cloudformation/uc-cloudformation-ui.png) diff --git a/cloudformation/uc-cloudformation-ui.png b/cloudformation/uc-cloudformation-ui.png new file mode 100644 index 0000000..eab94d8 Binary files /dev/null and b/cloudformation/uc-cloudformation-ui.png differ diff --git a/cloudformation/unity-catalog-setup-kms-template.json b/cloudformation/unity-catalog-setup-kms-template.json new file mode 100644 index 0000000..b27ae8f --- /dev/null +++ b/cloudformation/unity-catalog-setup-kms-template.json @@ -0,0 +1,119 @@ +{ + "AWSTemplateFormatVersion": "2010-09-09", + + "Parameters": { + "DatabricksAccountID": { + "Description": "Your Databricks account ID without hyphens.", + "Type": "String", + "AllowedPattern": "^[0-9]{12}$", + "ConstraintDescription": "Log into the Databricks account console. Click User Profile." + }, + "MetastoreBucketName": { + "Description": "S3 bucket to be created for the root storage of managed tables in Unity Catalog. See https://docs.databricks.com/data-governance/unity-catalog/get-started.html#configure-a-storage-bucket-and-iam-role-in-aws", + "Type": "String", + "MinLength": 3, + "MaxLength": 63, + "AllowedPattern": "^[a-z0-9][a-z0-9.-]+[a-z0-9]$" + }, + "KMSKeyARN": { + "Description": "The Amazon Resouce Name (ARN) of the KMS key to use.", + "Type": "String" + } + }, + + "Outputs": { + "UnityCatalogRole": { + "Description": "Edit this role and add itself as a principal in the assume role policy document.", + "Value": { "Fn::Sub": "${UnityCatalogRole.Arn}" } + }, + "UnityCatalogMetastore": { + "Value": { "Fn::Sub": "${UnityCatalogMetastore.Arn}" } + } + }, + + "Resources": { + "UnityCatalogMetastore": { + "Type": "AWS::S3::Bucket", + "Properties": { + "BucketName": { "Ref": "MetastoreBucketName" }, + "BucketEncryption": { + "ServerSideEncryptionConfiguration": [ { + "BucketKeyEnabled" : true, + "ServerSideEncryptionByDefault" : { + "SSEAlgorithm" : "aws:kms", + "KMSMasterKeyID" : { "Ref": "KMSKeyARN" } + } + } ] + } + } + }, + + "UnityCatalogRole": { + "Type" : "AWS::IAM::Role", + "Properties": { + "Description" : "Unity Catalog role for accessing S3", + "Path": "/", + "AssumeRolePolicyDocument" : { + "Version": "2012-10-17", + "Statement": [ { + "Effect": "Allow", + "Principal": { + "AWS": [ + "arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL" + ] + }, + "Action": "sts:AssumeRole", + "Condition": { + "StringEquals": { + "sts:ExternalId": { "Ref": "DatabricksAccountID" } + } + } + } ] + } + } + }, + + "UnityCatalogRolePolicy": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyName": "UnityCatalogMetastore", + "PolicyDocument": { + "Version": "2012-10-17", + "Statement": [ { + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject", + "s3:ListBucket", + "s3:GetBucketLocation", + "s3:GetLifecycleConfiguration", + "s3:PutLifecycleConfiguration" + ], + "Resource": [ + { "Fn::Sub": "${UnityCatalogMetastore.Arn}/*" }, + { "Fn::Sub": "${UnityCatalogMetastore.Arn}" } + ] + }, { + "Effect": "Allow", + "Action": [ + "kms:Decrypt", + "kms:Encrypt", + "kms:GenerateDataKey*" + ], + "Resource": [ + { "Ref": "KMSKeyARN" } + ] + }, { + "Effect": "Allow", + "Action": "sts:AssumeRole", + "Resource": { "Fn::Sub": "${UnityCatalogRole.Arn}" } + } ] + }, + "Roles": [ + { "Ref": "UnityCatalogRole" } + ] + } + } + } +} diff --git a/cloudformation/unity-catalog-setup-template.json b/cloudformation/unity-catalog-setup-template.json new file mode 100644 index 0000000..bf2196e --- /dev/null +++ b/cloudformation/unity-catalog-setup-template.json @@ -0,0 +1,104 @@ +{ + "AWSTemplateFormatVersion": "2010-09-09", + + "Parameters": { + "DatabricksAccountID": { + "Description": "Your Databricks account ID without hyphens.", + "Type": "String", + "AllowedPattern": "^[0-9]{12}$", + "ConstraintDescription": "Log into the Databricks account console. Click User Profile." + }, + "MetastoreBucketName": { + "Description": "S3 bucket to be created for the root storage of managed tables in Unity Catalog. See https://docs.databricks.com/data-governance/unity-catalog/get-started.html#configure-a-storage-bucket-and-iam-role-in-aws", + "Type": "String", + "MinLength": 3, + "MaxLength": 63, + "AllowedPattern": "^[a-z0-9][a-z0-9.-]+[a-z0-9]$" + } + }, + + "Outputs": { + "UnityCatalogRole": { + "Description": "Edit this role and add itself as a principal in the assume role policy document.", + "Value": { "Fn::Sub": "${UnityCatalogRole.Arn}" } + }, + "UnityCatalogMetastore": { + "Value": { "Fn::Sub": "${UnityCatalogMetastore.Arn}" } + } + }, + + "Resources": { + "UnityCatalogMetastore": { + "Type": "AWS::S3::Bucket", + "Properties": { + "BucketName": { "Ref": "MetastoreBucketName" }, + "BucketEncryption": { + "ServerSideEncryptionConfiguration": [ { + "BucketKeyEnabled" : true, + "ServerSideEncryptionByDefault" : { + "SSEAlgorithm" : "AES256" + } + } ] + } + } + }, + + "UnityCatalogRole": { + "Type" : "AWS::IAM::Role", + "Properties": { + "Description" : "Unity Catalog role for accessing S3", + "Path": "/", + "AssumeRolePolicyDocument" : { + "Version": "2012-10-17", + "Statement": [ { + "Effect": "Allow", + "Principal": { + "AWS": [ + "arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL" + ] + }, + "Action": "sts:AssumeRole", + "Condition": { + "StringEquals": { + "sts:ExternalId": { "Ref": "DatabricksAccountID" } + } + } + } ] + } + } + }, + + "UnityCatalogRolePolicy": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyName": "UnityCatalogMetastore", + "PolicyDocument": { + "Version": "2012-10-17", + "Statement": [ { + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject", + "s3:ListBucket", + "s3:GetBucketLocation", + "s3:GetLifecycleConfiguration", + "s3:PutLifecycleConfiguration" + ], + "Resource": [ + { "Fn::Sub": "${UnityCatalogMetastore.Arn}/*" }, + { "Fn::Sub": "${UnityCatalogMetastore.Arn}" } + ] + }, { + "Effect": "Allow", + "Action": "sts:AssumeRole", + "Resource": { "Fn::Sub": "${UnityCatalogRole.Arn}" } + } ] + }, + "Roles": [ + { "Ref": "UnityCatalogRole" } + ] + } + } + } +}