From 4d3dc0989a772114e6eda73f4f08bee8396e866c Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 5 Nov 2025 14:43:17 +0100 Subject: [PATCH 01/13] remove obsolete errors --- rust/operator-binary/src/controller.rs | 42 -------------------------- 1 file changed, 42 deletions(-) diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 96cdc955..6670df0d 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -131,16 +131,6 @@ pub enum Error { #[snafu(display("object defines no metastore role"))] NoMetaStoreRole, - #[snafu(display("failed to calculate service name for role {rolegroup}"))] - RoleGroupServiceNameNotFound { - rolegroup: RoleGroupRef, - }, - - #[snafu(display("failed to apply global Service"))] - ApplyRoleService { - source: stackable_operator::cluster_resources::Error, - }, - #[snafu(display("failed to apply Service for {rolegroup}"))] ApplyRoleGroupService { source: stackable_operator::cluster_resources::Error, @@ -198,9 +188,6 @@ pub enum Error { source: stackable_operator::crd::s3::v1alpha1::ConnectionError, }, - #[snafu(display("failed to configure S3 TLS client details"))] - ConfigureS3TlsClientDetails { source: TlsClientDetailsError }, - #[snafu(display( "Hive does not support skipping the verification of the tls enabled S3 server" ))] @@ -209,15 +196,6 @@ pub enum Error { #[snafu(display("failed to resolve and merge resource config for role and role group"))] FailedToResolveResourceConfig { source: crate::crd::Error }, - #[snafu(display("invalid java heap config - missing default or value in crd?"))] - InvalidJavaHeapConfig, - - #[snafu(display("failed to convert java heap config to unit [{unit}]"))] - FailedToConvertJavaHeap { - source: stackable_operator::memory::Error, - unit: String, - }, - #[snafu(display("failed to create hive container [{name}]"))] FailedToCreateHiveContainer { source: stackable_operator::builder::pod::container::Error, @@ -258,9 +236,6 @@ pub enum Error { source: stackable_operator::commons::rbac::Error, }, - #[snafu(display("internal operator failure"))] - InternalOperatorError { source: crate::crd::Error }, - #[snafu(display( "failed to serialize [{JVM_SECURITY_PROPERTIES_FILE}] for {}", rolegroup @@ -280,16 +255,6 @@ pub enum Error { source: crate::operations::graceful_shutdown::Error, }, - #[snafu(display("failed to build TLS certificate SecretClass Volume"))] - TlsCertSecretClassVolumeBuild { - source: stackable_operator::builder::pod::volume::SecretOperatorVolumeSourceBuilderError, - }, - - #[snafu(display("failed to build S3 credentials SecretClass Volume"))] - S3CredentialsSecretClassVolumeBuild { - source: stackable_operator::commons::secret_class::SecretClassVolumeError, - }, - #[snafu(display("failed to build Labels"))] LabelBuild { source: stackable_operator::kvp::LabelError, @@ -306,13 +271,6 @@ pub enum Error { stackable_operator::kvp::KeyValuePairError, }, - #[snafu(display( - "there was an error adding LDAP Volumes and VolumeMounts to the Pod and Containers" - ))] - AddLdapVolumes { - source: stackable_operator::crd::authentication::ldap::v1alpha1::Error, - }, - #[snafu(display("failed to add kerberos config"))] AddKerberosConfig { source: kerberos::Error }, From ca2c9d0bf5eec44de0a5343ccf5a1fd57de717c8 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 5 Nov 2025 16:46:52 +0100 Subject: [PATCH 02/13] compiling - without tls --- deploy/helm/hive-operator/crds/crds.yaml | 27 ++++ rust/operator-binary/src/config/mod.rs | 1 + rust/operator-binary/src/config/opa.rs | 181 +++++++++++++++++++++++ rust/operator-binary/src/controller.rs | 40 ++++- rust/operator-binary/src/crd/mod.rs | 19 ++- rust/operator-binary/src/crd/security.rs | 13 +- 6 files changed, 273 insertions(+), 8 deletions(-) create mode 100644 rust/operator-binary/src/config/opa.rs diff --git a/deploy/helm/hive-operator/crds/crds.yaml b/deploy/helm/hive-operator/crds/crds.yaml index b6fd37be..3d07e4ad 100644 --- a/deploy/helm/hive-operator/crds/crds.yaml +++ b/deploy/helm/hive-operator/crds/crds.yaml @@ -49,6 +49,33 @@ spec: required: - kerberos type: object + authorization: + description: |- + Authorization options for Hive. + Learn more in the [Hive authorization usage guide](https://docs.stackable.tech/home/nightly/hive/usage-guide/security#authorization). + nullable: true + properties: + opa: + description: |- + Configure the OPA stacklet [discovery ConfigMap](https://docs.stackable.tech/home/nightly/concepts/service_discovery) + and the name of the Rego package containing your authorization rules. + Consult the [OPA authorization documentation](https://docs.stackable.tech/home/nightly/concepts/opa) + to learn how to deploy Rego authorization rules with OPA. + nullable: true + properties: + configMapName: + description: |- + The [discovery ConfigMap](https://docs.stackable.tech/home/nightly/concepts/service_discovery) + for the OPA stacklet that should be used for authorization requests. + type: string + package: + description: The name of the Rego package containing the Rego rules for the product. + nullable: true + type: string + required: + - configMapName + type: object + type: object database: description: Database connection specification for the metadata database. properties: diff --git a/rust/operator-binary/src/config/mod.rs b/rust/operator-binary/src/config/mod.rs index 271c6d99..4069348d 100644 --- a/rust/operator-binary/src/config/mod.rs +++ b/rust/operator-binary/src/config/mod.rs @@ -1 +1,2 @@ pub mod jvm; +pub mod opa; diff --git a/rust/operator-binary/src/config/opa.rs b/rust/operator-binary/src/config/opa.rs new file mode 100644 index 00000000..7464fda0 --- /dev/null +++ b/rust/operator-binary/src/config/opa.rs @@ -0,0 +1,181 @@ +use std::collections::BTreeMap; + +use stackable_operator::{ + client::Client, + commons::opa::{OpaApiVersion, OpaConfig}, + k8s_openapi::api::core::v1::ConfigMap, + kube::ResourceExt, +}; + +use crate::crd::v1alpha1::HiveCluster; + +const HIVE_METASTORE_PRE_EVENT_LISTENERS: &str = "hive.metastore.pre.event.listeners"; +const HIVE_SECURITY_METASTORE_AUTHORIZATION_MANAGER: &str = + "hive.security.metastore.authorization.manager"; + +const OPA_AUTHORIZATION_PRE_EVENT_LISTENER_V3: &str = + "com.bosch.bdps.hms3.OpaAuthorizationPreEventListener"; +const OPA_BASED_AUTHORIZATION_PROVIDER_V3: &str = + "com.bosch.bdps.hms3.OpaBasedAuthorizationProvider"; +const OPA_AUTHORIZATION_PRE_EVENT_LISTENER_V4: &str = + "com.bosch.bdps.hms4.OpaAuthorizationPreEventListener"; +const OPA_BASED_AUTHORIZATION_PROVIDER_V4: &str = + "com.bosch.bdps.hms4.OpaBasedAuthorizationProvider"; + +const OPA_AUTHORIZATION_BASE_ENDPOINT: &str = "com.bosch.bdps.opa.authorization.base.endpoint"; +const OPA_AUTHORIZATION_POLICY_URL_DATA_BASE: &str = + "com.bosch.bdps.opa.authorization.policy.url.database"; +const OPA_AUTHORIZATION_POLICY_URL_TABLE: &str = + "com.bosch.bdps.opa.authorization.policy.url.table"; +const OPA_AUTHORIZATION_POLICY_URL_COLUMN: &str = + "com.bosch.bdps.opa.authorization.policy.url.column"; +const OPA_AUTHORIZATION_POLICY_URL_PARTITION: &str = + "com.bosch.bdps.opa.authorization.policy.url.partition"; +const OPA_AUTHORIZATION_POLICY_URL_USER: &str = "com.bosch.bdps.opa.authorization.policy.url.user"; + +pub const OPA_TLS_VOLUME_NAME: &str = "opa-tls"; + +pub struct HiveOpaConfig { + /// Endpoint for OPA, e.g. + /// `http://localhost:8081/v1/data/hms/allow` + pub(crate) base_endpoint: String, + /// Policy to check database authorization, e.g. + /// `http://localhost:8081/v1/data/hms/database_allow` + pub(crate) policy_url_database: String, + /// Policy to check table authorization, e.g. + /// `http://localhost:8081/v1/data/hms/table_allow` + pub(crate) policy_url_table: String, + /// Policy to check column authorization, e.g. + /// `http://localhost:8081/v1/data/hms/column_allow` + pub(crate) policy_url_column: String, + /// Policy to check partition authorization, e.g. + /// `http://localhost:8081/v1/data/hms/partition_allow` + pub(crate) policy_url_partition: String, + /// Policy to check user authorization, e.g. + /// `http://localhost:8081/v1/data/hms/user_allow` + pub(crate) policy_url_user: String, + /// Optional TLS secret class for OPA communication. + /// If set, the CA certificate from this secret class will be added + /// to hive's truststore to make it trust OPA's TLS certificate. + pub(crate) tls_secret_class: Option, +} + +impl HiveOpaConfig { + pub async fn from_opa_config( + client: &Client, + hive: &HiveCluster, + opa_config: &OpaConfig, + ) -> Result { + // See: https://github.com/boschglobal/hive-metastore-opa-authorizer?tab=readme-ov-file#configuration + // TODO: get document root once (client call) and build the other strings + let base_endpoint = opa_config + .full_document_url_from_config_map(client, hive, Some("allow"), OpaApiVersion::V1) + .await?; + + let policy_url_database = opa_config + .full_document_url_from_config_map( + client, + hive, + Some("database_allow"), + OpaApiVersion::V1, + ) + .await?; + let policy_url_table = opa_config + .full_document_url_from_config_map(client, hive, Some("table_allow"), OpaApiVersion::V1) + .await?; + let policy_url_column = opa_config + .full_document_url_from_config_map( + client, + hive, + Some("column_allow"), + OpaApiVersion::V1, + ) + .await?; + let policy_url_partition = opa_config + .full_document_url_from_config_map( + client, + hive, + Some("partition_allow"), + OpaApiVersion::V1, + ) + .await?; + let policy_url_user = opa_config + .full_document_url_from_config_map(client, hive, Some("user_allow"), OpaApiVersion::V1) + .await?; + + let tls_secret_class = client + .get::( + &opa_config.config_map_name, + hive.namespace().as_deref().unwrap_or("default"), + ) + .await + .ok() + .and_then(|cm| cm.data) + .and_then(|mut data| data.remove("OPA_SECRET_CLASS")); + + Ok(HiveOpaConfig { + base_endpoint, + policy_url_database, + policy_url_table, + policy_url_column, + policy_url_partition, + policy_url_user, + tls_secret_class, + }) + } + + pub fn as_config(&self, product_version: &str) -> BTreeMap { + let (pre_event_listener, authorization_provider) = if product_version.starts_with("3.") { + ( + OPA_AUTHORIZATION_PRE_EVENT_LISTENER_V3, + OPA_BASED_AUTHORIZATION_PROVIDER_V3, + ) + } else { + ( + OPA_AUTHORIZATION_PRE_EVENT_LISTENER_V4, + OPA_BASED_AUTHORIZATION_PROVIDER_V4, + ) + }; + + BTreeMap::from([ + ( + HIVE_METASTORE_PRE_EVENT_LISTENERS.to_string(), + pre_event_listener.to_string(), + ), + ( + HIVE_SECURITY_METASTORE_AUTHORIZATION_MANAGER.to_string(), + authorization_provider.to_string(), + ), + ( + OPA_AUTHORIZATION_BASE_ENDPOINT.to_string(), + self.base_endpoint.to_owned(), + ), + ( + OPA_AUTHORIZATION_POLICY_URL_DATA_BASE.to_string(), + self.policy_url_database.to_owned(), + ), + ( + OPA_AUTHORIZATION_POLICY_URL_TABLE.to_string(), + self.policy_url_table.to_owned(), + ), + ( + OPA_AUTHORIZATION_POLICY_URL_COLUMN.to_string(), + self.policy_url_column.to_owned(), + ), + ( + OPA_AUTHORIZATION_POLICY_URL_PARTITION.to_string(), + self.policy_url_partition.to_owned(), + ), + ( + OPA_AUTHORIZATION_POLICY_URL_USER.to_string(), + self.policy_url_user.to_owned(), + ), + ]) + } + + pub fn tls_mount_path(&self) -> Option { + self.tls_secret_class + .as_ref() + .map(|_| format!("/stackable/secrets/{OPA_TLS_VOLUME_NAME}")) + } +} diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 6670df0d..16e5964e 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -36,7 +36,6 @@ use stackable_operator::{ commons::{ product_image_selection::{self, ResolvedProductImage}, rbac::build_rbac_resources, - tls_verification::TlsClientDetailsError, }, crd::{listener::v1alpha1::Listener, s3}, k8s_openapi::{ @@ -85,7 +84,10 @@ use tracing::warn; use crate::{ OPERATOR_NAME, command::build_container_command_args, - config::jvm::{construct_hadoop_heapsize_env, construct_non_heap_jvm_args}, + config::{ + jvm::{construct_hadoop_heapsize_env, construct_non_heap_jvm_args}, + opa::HiveOpaConfig, + }, crd::{ APP_NAME, CORE_SITE_XML, Container, DB_PASSWORD_ENV, DB_USERNAME_ENV, HIVE_PORT, HIVE_PORT_NAME, HIVE_SITE_XML, HiveClusterStatus, HiveRole, JVM_SECURITY_PROPERTIES_FILE, @@ -236,6 +238,9 @@ pub enum Error { source: stackable_operator::commons::rbac::Error, }, + #[snafu(display("internal operator failure"))] + InternalOperatorFailure { source: crate::crd::Error }, + #[snafu(display( "failed to serialize [{JVM_SECURITY_PROPERTIES_FILE}] for {}", rolegroup @@ -313,6 +318,11 @@ pub enum Error { ResolveProductImage { source: product_image_selection::Error, }, + + #[snafu(display("invalid OpaConfig"))] + InvalidOpaConfig { + source: stackable_operator::commons::opa::Error, + }, } type Result = std::result::Result; @@ -416,6 +426,15 @@ pub async fn reconcile_hive( .await .context(ApplyRoleBindingSnafu)?; + let hive_opa_config = match hive.get_opa_config() { + Some(opa_config) => Some( + HiveOpaConfig::from_opa_config(client, hive, opa_config) + .await + .context(InvalidOpaConfigSnafu)?, + ), + None => None, + }; + let mut ss_cond_builder = StatefulSetConditionBuilder::default(); for (rolegroup_name, rolegroup_config) in metastore_config.iter() { @@ -442,6 +461,7 @@ pub async fn reconcile_hive( s3_connection_spec.as_ref(), &config, &client.kubernetes_cluster_info, + hive_opa_config.as_ref(), )?; let rg_statefulset = build_metastore_rolegroup_statefulset( hive, @@ -567,6 +587,7 @@ fn build_metastore_rolegroup_config_map( s3_connection_spec: Option<&s3::v1alpha1::ConnectionSpec>, merged_config: &MetaStoreConfig, cluster_info: &KubernetesClusterInfo, + hive_opa_config: Option<&HiveOpaConfig>, ) -> Result { let mut hive_site_data = String::new(); @@ -623,6 +644,17 @@ fn build_metastore_rolegroup_config_map( data.insert(property_name.to_string(), Some(property_value.to_string())); } + // OPA settings + if let Some(opa_config) = hive_opa_config { + data.extend( + opa_config + .as_config(&resolved_product_image.product_version) + .into_iter() + .map(|(k, v)| (k, Some(v))) + .collect::>>(), + ); + } + // overrides for (property_name, property_value) in config { data.insert(property_name.to_string(), Some(property_value.to_string())); @@ -711,10 +743,10 @@ fn build_metastore_rolegroup_statefulset( merged_config: &MetaStoreConfig, sa_name: &str, ) -> Result { - let role = hive.role(hive_role).context(InternalOperatorSnafu)?; + let role = hive.role(hive_role).context(InternalOperatorFailureSnafu)?; let rolegroup = hive .rolegroup(rolegroup_ref) - .context(InternalOperatorSnafu)?; + .context(InternalOperatorFailureSnafu)?; let mut container_builder = ContainerBuilder::new(APP_NAME).context(FailedToCreateHiveContainerSnafu { diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 8af89d72..e9e0f216 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -7,6 +7,7 @@ use stackable_operator::{ commons::{ affinity::StackableAffinity, cluster_operation::ClusterOperation, + opa::OpaConfig, product_image_selection::ProductImage, resources::{ CpuLimitsFragment, MemoryLimitsFragment, NoRuntimeLimits, NoRuntimeLimitsFragment, @@ -151,6 +152,13 @@ pub mod versioned { #[derive(Clone, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] pub struct HiveClusterConfig { + /// Settings related to user [authentication](DOCS_BASE_URL_PLACEHOLDER/usage-guide/security). + pub authentication: Option, + + /// Authorization options for Hive. + /// Learn more in the [Hive authorization usage guide](DOCS_BASE_URL_PLACEHOLDER/hive/usage-guide/security#authorization). + pub authorization: Option, + // no doc - docs in DatabaseConnectionSpec struct. pub database: DatabaseConnectionSpec, @@ -169,9 +177,6 @@ pub mod versioned { /// to learn how to configure log aggregation with Vector. #[serde(skip_serializing_if = "Option::is_none")] pub vector_aggregator_config_map_name: Option, - - /// Settings related to user [authentication](DOCS_BASE_URL_PLACEHOLDER/usage-guide/security). - pub authentication: Option, } } @@ -289,6 +294,14 @@ impl v1alpha1::HiveCluster { &self.spec.cluster_config.database.db_type } + pub fn get_opa_config(&self) -> Option<&OpaConfig> { + self.spec + .cluster_config + .authorization + .as_ref() + .and_then(|a| a.opa.as_ref()) + } + /// Retrieve and merge resource configs for role and role groups pub fn merged_config( &self, diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index ce279097..ffad541a 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -1,5 +1,8 @@ use serde::{Deserialize, Serialize}; -use stackable_operator::schemars::{self, JsonSchema}; +use stackable_operator::{ + commons::opa::OpaConfig, + schemars::{self, JsonSchema}, +}; #[derive(Clone, Debug, Deserialize, Eq, Hash, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] @@ -8,6 +11,14 @@ pub struct AuthenticationConfig { pub kerberos: KerberosConfig, } +#[derive(Clone, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct AuthorizationConfig { + // no doc - it's in the struct. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub opa: Option, +} + #[derive(Clone, Debug, Deserialize, Eq, Hash, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] pub struct KerberosConfig { From 11dafe11b1889883f53b9ca60d8db6b10c69ccf1 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 7 Nov 2025 12:38:15 +0100 Subject: [PATCH 03/13] oparized smoke test working --- rust/operator-binary/src/config/opa.rs | 66 ++---------------- tests/templates/kuttl/smoke/50-assert.yaml | 13 ++++ .../kuttl/smoke/50-install-opa.yaml.j2 | 69 +++++++++++++++++++ .../kuttl/smoke/60-install-hive.yaml.j2 | 4 ++ tests/test-definition.yaml | 5 +- 5 files changed, 97 insertions(+), 60 deletions(-) create mode 100644 tests/templates/kuttl/smoke/50-assert.yaml create mode 100644 tests/templates/kuttl/smoke/50-install-opa.yaml.j2 diff --git a/rust/operator-binary/src/config/opa.rs b/rust/operator-binary/src/config/opa.rs index 7464fda0..a27bf8bc 100644 --- a/rust/operator-binary/src/config/opa.rs +++ b/rust/operator-binary/src/config/opa.rs @@ -37,23 +37,8 @@ pub const OPA_TLS_VOLUME_NAME: &str = "opa-tls"; pub struct HiveOpaConfig { /// Endpoint for OPA, e.g. - /// `http://localhost:8081/v1/data/hms/allow` + /// `http://localhost:8081/v1/data/` pub(crate) base_endpoint: String, - /// Policy to check database authorization, e.g. - /// `http://localhost:8081/v1/data/hms/database_allow` - pub(crate) policy_url_database: String, - /// Policy to check table authorization, e.g. - /// `http://localhost:8081/v1/data/hms/table_allow` - pub(crate) policy_url_table: String, - /// Policy to check column authorization, e.g. - /// `http://localhost:8081/v1/data/hms/column_allow` - pub(crate) policy_url_column: String, - /// Policy to check partition authorization, e.g. - /// `http://localhost:8081/v1/data/hms/partition_allow` - pub(crate) policy_url_partition: String, - /// Policy to check user authorization, e.g. - /// `http://localhost:8081/v1/data/hms/user_allow` - pub(crate) policy_url_user: String, /// Optional TLS secret class for OPA communication. /// If set, the CA certificate from this secret class will be added /// to hive's truststore to make it trust OPA's TLS certificate. @@ -67,40 +52,8 @@ impl HiveOpaConfig { opa_config: &OpaConfig, ) -> Result { // See: https://github.com/boschglobal/hive-metastore-opa-authorizer?tab=readme-ov-file#configuration - // TODO: get document root once (client call) and build the other strings let base_endpoint = opa_config - .full_document_url_from_config_map(client, hive, Some("allow"), OpaApiVersion::V1) - .await?; - - let policy_url_database = opa_config - .full_document_url_from_config_map( - client, - hive, - Some("database_allow"), - OpaApiVersion::V1, - ) - .await?; - let policy_url_table = opa_config - .full_document_url_from_config_map(client, hive, Some("table_allow"), OpaApiVersion::V1) - .await?; - let policy_url_column = opa_config - .full_document_url_from_config_map( - client, - hive, - Some("column_allow"), - OpaApiVersion::V1, - ) - .await?; - let policy_url_partition = opa_config - .full_document_url_from_config_map( - client, - hive, - Some("partition_allow"), - OpaApiVersion::V1, - ) - .await?; - let policy_url_user = opa_config - .full_document_url_from_config_map(client, hive, Some("user_allow"), OpaApiVersion::V1) + .full_document_url_from_config_map(client, hive, None, OpaApiVersion::V1) .await?; let tls_secret_class = client @@ -115,11 +68,6 @@ impl HiveOpaConfig { Ok(HiveOpaConfig { base_endpoint, - policy_url_database, - policy_url_table, - policy_url_column, - policy_url_partition, - policy_url_user, tls_secret_class, }) } @@ -152,23 +100,23 @@ impl HiveOpaConfig { ), ( OPA_AUTHORIZATION_POLICY_URL_DATA_BASE.to_string(), - self.policy_url_database.to_owned(), + "database_allow".to_string(), ), ( OPA_AUTHORIZATION_POLICY_URL_TABLE.to_string(), - self.policy_url_table.to_owned(), + "table_allow".to_string(), ), ( OPA_AUTHORIZATION_POLICY_URL_COLUMN.to_string(), - self.policy_url_column.to_owned(), + "column_allow".to_string(), ), ( OPA_AUTHORIZATION_POLICY_URL_PARTITION.to_string(), - self.policy_url_partition.to_owned(), + "partition_allow".to_string(), ), ( OPA_AUTHORIZATION_POLICY_URL_USER.to_string(), - self.policy_url_user.to_owned(), + "user_allow".to_string(), ), ]) } diff --git a/tests/templates/kuttl/smoke/50-assert.yaml b/tests/templates/kuttl/smoke/50-assert.yaml new file mode 100644 index 00000000..f109e187 --- /dev/null +++ b/tests/templates/kuttl/smoke/50-assert.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +commands: + - script: kubectl -n $NAMESPACE rollout status daemonset opa-server-default --timeout 600s +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: hive-opa-bundle + labels: + opa.stackable.tech/bundle: "hms" diff --git a/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 b/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 new file mode 100644 index 00000000..8c59bf3b --- /dev/null +++ b/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 @@ -0,0 +1,69 @@ +--- +apiVersion: opa.stackable.tech/v1alpha1 +kind: OpaCluster +metadata: + name: opa +spec: + image: +{% if test_scenario['values']['opa-latest'].find(",") > 0 %} + custom: "{{ test_scenario['values']['opa-latest'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['opa-latest'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['opa-latest'] }}" +{% endif %} + pullPolicy: IfNotPresent + clusterConfig: +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + servers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + containers: + opa: + console: + level: INFO + file: + level: INFO + loggers: + decision: + level: INFO + roleGroups: + default: {} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: hive-opa-bundle + labels: + opa.stackable.tech/bundle: "hms" +data: + trino.rego: | + package hms + + default database_allow = false + default table_allow = false + default column_allow = false + default partition_allow = false + default user_allow = false + + database_allow if { + input.identity.username == "stackable" + } + + table_allow if { + input.identity.username == "stackable" + } + + column_allow if { + input.identity.username == "stackable" + } + + partition_allow if { + input.identity.username == "stackable" + } + + user_allow if { + input.identity.username == "stackable" + } diff --git a/tests/templates/kuttl/smoke/60-install-hive.yaml.j2 b/tests/templates/kuttl/smoke/60-install-hive.yaml.j2 index 4db2575e..1f35f82a 100644 --- a/tests/templates/kuttl/smoke/60-install-hive.yaml.j2 +++ b/tests/templates/kuttl/smoke/60-install-hive.yaml.j2 @@ -13,6 +13,10 @@ spec: {% endif %} pullPolicy: IfNotPresent clusterConfig: + authorization: + opa: + configMapName: opa + package: hms database: connString: jdbc:postgresql://postgresql:5432/hive credentialsSecret: hive-credentials diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index a8795502..241dad35 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -17,7 +17,6 @@ dimensions: - name: hive values: - 3.1.3 - - 4.0.0 - 4.0.1 - 4.1.0 # Alternatively, if you want to use a custom image, append a comma and the full image name to the product version @@ -41,6 +40,9 @@ dimensions: - name: zookeeper-latest values: - 3.9.4 + - name: opa-latest + values: + - 1.8.0 - name: krb5 values: - 1.21.1 @@ -62,6 +64,7 @@ tests: dimensions: - postgres - hive + - opa-latest - s3-use-tls - openshift - name: upgrade From af9f8cb0ee37ff515d123e010f23e05861c1cd4b Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 7 Nov 2025 12:38:41 +0100 Subject: [PATCH 04/13] add hive opa example --- examples/hive-opa-cluster.yaml | 89 ++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 examples/hive-opa-cluster.yaml diff --git a/examples/hive-opa-cluster.yaml b/examples/hive-opa-cluster.yaml new file mode 100644 index 00000000..7c6d50d3 --- /dev/null +++ b/examples/hive-opa-cluster.yaml @@ -0,0 +1,89 @@ +# helm install postgresql oci://registry-1.docker.io/bitnamicharts/postgresql \ +# --version 16.5.0 \ +# --namespace default \ +# --set image.repository=bitnamilegacy/postgresql \ +# --set volumePermissions.image.repository=bitnamilegacy/os-shell \ +# --set metrics.image.repository=bitnamilegacy/postgres-exporter \ +# --set global.security.allowInsecureImages=true \ +# --set auth.username=hive \ +# --set auth.password=hive \ +# --set auth.database=hive \ +# --set primary.extendedConfiguration="password_encryption=md5" \ +# --wait +--- +apiVersion: hive.stackable.tech/v1alpha1 +kind: HiveCluster +metadata: + name: hive +spec: + image: + productVersion: 4.1.0 + pullPolicy: IfNotPresent + clusterConfig: + authorization: + opa: + configMapName: opa + package: hms + database: + connString: jdbc:postgresql://postgresql:5432/hive + credentialsSecret: hive-postgresql-credentials + dbType: postgres + metastore: + roleGroups: + default: + replicas: 1 + config: + resources: + cpu: + min: 300m + max: "2" + memory: + limit: 5Gi +--- +apiVersion: v1 +kind: Secret +metadata: + name: hive-postgresql-credentials +type: Opaque +stringData: + username: hive + password: hive +--- +apiVersion: opa.stackable.tech/v1alpha1 +kind: OpaCluster +metadata: + name: opa +spec: + image: + productVersion: 1.8.0 + servers: + config: + logging: + enableVectorAgent: false + containers: + opa: + console: + level: INFO + file: + level: INFO + loggers: + decision: + level: INFO + roleGroups: + default: {} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: hive-opa-bundle + labels: + opa.stackable.tech/bundle: "hms" +data: + trino.rego: | + package hms + + database_allow = true + table_allow = true + column_allow = true + partition_allow = true + user_allow = true From 7321b2c46c27e0e051ef71ba466eb87250199114 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 7 Nov 2025 17:28:49 +0100 Subject: [PATCH 05/13] add opa testing to smoke --- .../kuttl/smoke/50-install-opa.yaml.j2 | 24 +++++-- tests/templates/kuttl/smoke/80-assert.yaml | 5 +- .../smoke/80-prepare-test-metastore.yaml | 1 + .../kuttl/smoke/test_metastore_opa.py | 67 +++++++++++++++++++ 4 files changed, 90 insertions(+), 7 deletions(-) create mode 100755 tests/templates/kuttl/smoke/test_metastore_opa.py diff --git a/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 b/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 index 8c59bf3b..f9841384 100644 --- a/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 +++ b/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 @@ -48,22 +48,36 @@ data: default partition_allow = false default user_allow = false + stackable_user := "stackable" + db_name := "test_metastore" + database_allow if { - input.identity.username == "stackable" + input.identity.username == stackable_user + input.resources.database.name == db_name + } + + table_allow if { + input.identity.username == stackable_user + input.resources.table.dbName == db_name + input.privileges.writeRequiredPriv[0].priv == "CREATE" + input.resources.table.tableName in ["s3_one_column_table", "one_column_table"] } table_allow if { - input.identity.username == "stackable" + input.identity.username == stackable_user + input.resources.table.dbName == db_name + input.privileges.readRequiredPriv[0].priv == "SELECT" + input.resources.table.tableName in ["s3_one_column_table", "one_column_table"] } column_allow if { - input.identity.username == "stackable" + input.identity.username == stackable_user } partition_allow if { - input.identity.username == "stackable" + input.identity.username == stackable_user } user_allow if { - input.identity.username == "stackable" + input.identity.username == stackable_user } diff --git a/tests/templates/kuttl/smoke/80-assert.yaml b/tests/templates/kuttl/smoke/80-assert.yaml index 3d4bd846..ed78faa4 100644 --- a/tests/templates/kuttl/smoke/80-assert.yaml +++ b/tests/templates/kuttl/smoke/80-assert.yaml @@ -2,5 +2,6 @@ apiVersion: kuttl.dev/v1beta1 kind: TestAssert commands: - - script: kubectl exec -n "$NAMESPACE" test-metastore-0 -- python /tmp/test_metastore.py -m hive-metastore.$NAMESPACE.svc.cluster.local - - script: kubectl exec -n "$NAMESPACE" test-metastore-0 -- python /tmp/test_metastore.py -m hive-metastore-default-headless.$NAMESPACE.svc.cluster.local + - script: kubectl exec -n "$NAMESPACE" test-metastore-0 -- python /tmp/test_metastore.py -d test_metastore -m hive-metastore.$NAMESPACE.svc.cluster.local + - script: kubectl exec -n "$NAMESPACE" test-metastore-0 -- python /tmp/test_metastore.py -d test_metastore -m hive-metastore-default-headless.$NAMESPACE.svc.cluster.local + - script: kubectl exec -n "$NAMESPACE" test-metastore-0 -- python /tmp/test_metastore_opa.py -d db_not_allowed -m hive-metastore.$NAMESPACE.svc.cluster.local diff --git a/tests/templates/kuttl/smoke/80-prepare-test-metastore.yaml b/tests/templates/kuttl/smoke/80-prepare-test-metastore.yaml index 45da6773..3704aaba 100644 --- a/tests/templates/kuttl/smoke/80-prepare-test-metastore.yaml +++ b/tests/templates/kuttl/smoke/80-prepare-test-metastore.yaml @@ -3,3 +3,4 @@ apiVersion: kuttl.dev/v1beta1 kind: TestStep commands: - script: kubectl cp -n "$NAMESPACE" ./test_metastore.py test-metastore-0:/tmp + - script: kubectl cp -n "$NAMESPACE" ./test_metastore_opa.py test-metastore-0:/tmp diff --git a/tests/templates/kuttl/smoke/test_metastore_opa.py b/tests/templates/kuttl/smoke/test_metastore_opa.py new file mode 100755 index 00000000..4d81effc --- /dev/null +++ b/tests/templates/kuttl/smoke/test_metastore_opa.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +from hive_metastore_client import HiveMetastoreClient +from hive_metastore_client.builders import ( + DatabaseBuilder, + ColumnBuilder, + SerDeInfoBuilder, + StorageDescriptorBuilder, + TableBuilder, +) +import argparse + + +def table(db_name, table_name, location): + columns = [ColumnBuilder("id", "string", "col comment").build()] + + serde_info = SerDeInfoBuilder( + serialization_lib="org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe" + ).build() + + storage_descriptor = StorageDescriptorBuilder( + columns=columns, + location=location, + input_format="org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", + output_format="org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat", + serde_info=serde_info, + compressed=True, + ).build() + + test_table = TableBuilder( + db_name=db_name, + table_name=table_name, + storage_descriptor=storage_descriptor, + ).build() + + return test_table + + +if __name__ == "__main__": + all_args = argparse.ArgumentParser(description="Test hive-metastore-opa-authorizer and rego rules.") + all_args.add_argument("-p", "--port", help="Metastore server port", default="9083") + all_args.add_argument( + "-d", "--database", help="Test DB name", default="db_not_allowed" + ) + all_args.add_argument( + "-m", "--metastore", help="The host or service to connect to", required=True + ) + args = vars(all_args.parse_args()) + + database_name = args["database"] + port = args["port"] + host = args["metastore"] + + # Creating database object using builder + database = DatabaseBuilder(database_name).build() + + print(f"[INFO] Trying to access '{database_name}' which is expected to fail due to 'database_allow' authorization policy...!") + + with HiveMetastoreClient(host, port) as hive_client: + try: + hive_client.create_database_if_not_exists(database) + except Exception as e: + print(f"[DENIED] {e}") + print(f"[SUCCESS] Test hive-metastore-opa-authorizer succeeded. Could not access database '{database_name}'!") + exit(0) + + print(f"[ERROR] Test hive-metastore-opa-authorizer failed. Could access database '{database_name}'!") + exit(-1) From 5c9c73c3d508e87c485974a349fe22e360779c63 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 7 Nov 2025 17:36:16 +0100 Subject: [PATCH 06/13] adapted changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f64b05bf..259da2fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added + +- Add OPA authorization using the operator-rs `OpaConfig` ([#652]). + +[#652]: https://github.com/stackabletech/hive-operator/pull/652 + ## [25.11.0] - 2025-11-07 ## [25.11.0-rc1] - 2025-11-06 From 7c7bedf10ad12864a2b9ebe5b09d7c256f1394e8 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Sat, 8 Nov 2025 12:30:22 +0100 Subject: [PATCH 07/13] enable tls --- rust/operator-binary/src/command.rs | 22 ++++-- rust/operator-binary/src/config/opa.rs | 2 +- rust/operator-binary/src/controller.rs | 38 +++++++++- .../kuttl/logging/test_log_aggregation.py | 18 ++--- tests/templates/kuttl/smoke/50-assert.yaml | 4 +- .../kuttl/smoke/50-install-opa.yaml.j2 | 76 ++++++++++++------- .../kuttl/smoke/test_metastore_opa.py | 16 +++- 7 files changed, 126 insertions(+), 50 deletions(-) diff --git a/rust/operator-binary/src/command.rs b/rust/operator-binary/src/command.rs index 4f8d1135..ee08f56d 100644 --- a/rust/operator-binary/src/command.rs +++ b/rust/operator-binary/src/command.rs @@ -1,16 +1,20 @@ use stackable_operator::crd::s3; -use crate::crd::{ - DB_PASSWORD_ENV, DB_PASSWORD_PLACEHOLDER, DB_USERNAME_ENV, DB_USERNAME_PLACEHOLDER, - HIVE_METASTORE_LOG4J2_PROPERTIES, HIVE_SITE_XML, STACKABLE_CONFIG_DIR, - STACKABLE_CONFIG_MOUNT_DIR, STACKABLE_LOG_CONFIG_MOUNT_DIR, STACKABLE_TRUST_STORE, - STACKABLE_TRUST_STORE_PASSWORD, v1alpha1, +use crate::{ + config::opa::HiveOpaConfig, + crd::{ + DB_PASSWORD_ENV, DB_PASSWORD_PLACEHOLDER, DB_USERNAME_ENV, DB_USERNAME_PLACEHOLDER, + HIVE_METASTORE_LOG4J2_PROPERTIES, HIVE_SITE_XML, STACKABLE_CONFIG_DIR, + STACKABLE_CONFIG_MOUNT_DIR, STACKABLE_LOG_CONFIG_MOUNT_DIR, STACKABLE_TRUST_STORE, + STACKABLE_TRUST_STORE_PASSWORD, v1alpha1, + }, }; pub fn build_container_command_args( hive: &v1alpha1::HiveCluster, start_command: String, s3_connection_spec: Option<&s3::v1alpha1::ConnectionSpec>, + hive_opa_config: Option<&HiveOpaConfig>, ) -> Vec { let mut args = vec![ // copy config files to a writeable empty folder in order to set s3 access and secret keys @@ -51,6 +55,14 @@ pub fn build_container_command_args( } } + if let Some(opa) = hive_opa_config { + if let Some(ca_cert_dir) = opa.tls_ca_cert_mount_path() { + args.push(format!( + "cert-tools generate-pkcs12-truststore --pkcs12 {STACKABLE_TRUST_STORE}:{STACKABLE_TRUST_STORE_PASSWORD} --pem {ca_cert_dir}/ca.crt --out {STACKABLE_TRUST_STORE} --out-password {STACKABLE_TRUST_STORE_PASSWORD}" + )); + } + } + // db credentials args.extend([ format!("echo replacing {DB_USERNAME_PLACEHOLDER} and {DB_PASSWORD_PLACEHOLDER} with secret values."), diff --git a/rust/operator-binary/src/config/opa.rs b/rust/operator-binary/src/config/opa.rs index a27bf8bc..411e4558 100644 --- a/rust/operator-binary/src/config/opa.rs +++ b/rust/operator-binary/src/config/opa.rs @@ -121,7 +121,7 @@ impl HiveOpaConfig { ]) } - pub fn tls_mount_path(&self) -> Option { + pub fn tls_ca_cert_mount_path(&self) -> Option { self.tls_secret_class .as_ref() .map(|_| format!("/stackable/secrets/{OPA_TLS_VOLUME_NAME}")) diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 16e5964e..9229e619 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -28,7 +28,7 @@ use stackable_operator::{ security::PodSecurityContextBuilder, volume::{ ListenerOperatorVolumeSourceBuilder, ListenerOperatorVolumeSourceBuilderError, - ListenerReference, VolumeBuilder, + ListenerReference, SecretOperatorVolumeSourceBuilder, VolumeBuilder, }, }, }, @@ -86,7 +86,7 @@ use crate::{ command::build_container_command_args, config::{ jvm::{construct_hadoop_heapsize_env, construct_non_heap_jvm_args}, - opa::HiveOpaConfig, + opa::{HiveOpaConfig, OPA_TLS_VOLUME_NAME}, }, crd::{ APP_NAME, CORE_SITE_XML, Container, DB_PASSWORD_ENV, DB_USERNAME_ENV, HIVE_PORT, @@ -323,6 +323,11 @@ pub enum Error { InvalidOpaConfig { source: stackable_operator::commons::opa::Error, }, + + #[snafu(display("failed to build TLS certificate SecretClass Volume"))] + TlsCertSecretClassVolumeBuild { + source: stackable_operator::builder::pod::volume::SecretOperatorVolumeSourceBuilderError, + }, } type Result = std::result::Result; @@ -472,6 +477,7 @@ pub async fn reconcile_hive( s3_connection_spec.as_ref(), &config, &rbac_sa.name_any(), + hive_opa_config.as_ref(), )?; cluster_resources @@ -742,6 +748,7 @@ fn build_metastore_rolegroup_statefulset( s3_connection: Option<&s3::v1alpha1::ConnectionSpec>, merged_config: &MetaStoreConfig, sa_name: &str, + hive_opa_config: Option<&HiveOpaConfig>, ) -> Result { let role = hive.role(hive_role).context(InternalOperatorFailureSnafu)?; let rolegroup = hive @@ -815,6 +822,32 @@ fn build_metastore_rolegroup_statefulset( } } + // Add OPA TLS certs if configured + if let Some((tls_secret_class, tls_mount_path)) = + hive_opa_config.as_ref().and_then(|opa_config| { + opa_config + .tls_secret_class + .as_ref() + .zip(opa_config.tls_ca_cert_mount_path()) + }) + { + container_builder + .add_volume_mount(OPA_TLS_VOLUME_NAME, &tls_mount_path) + .context(AddVolumeMountSnafu)?; + + let opa_tls_volume = VolumeBuilder::new(OPA_TLS_VOLUME_NAME) + .ephemeral( + SecretOperatorVolumeSourceBuilder::new(tls_secret_class) + .build() + .context(TlsCertSecretClassVolumeBuildSnafu)?, + ) + .build(); + + pod_builder + .add_volume(opa_tls_volume) + .context(AddVolumeSnafu)?; + } + let db_type = hive.db_type(); let start_command = if resolved_product_image.product_version.starts_with("3.") { // The schematool version in 3.1.x does *not* support the `-initOrUpgradeSchema` flag yet, so we can not use that. @@ -866,6 +899,7 @@ fn build_metastore_rolegroup_statefulset( create_vector_shutdown_file_command(STACKABLE_LOG_DIR), }, s3_connection, + hive_opa_config, )) .add_volume_mount(STACKABLE_CONFIG_DIR_NAME, STACKABLE_CONFIG_DIR) .context(AddVolumeMountSnafu)? diff --git a/tests/templates/kuttl/logging/test_log_aggregation.py b/tests/templates/kuttl/logging/test_log_aggregation.py index 80c87d09..ee20e27c 100755 --- a/tests/templates/kuttl/logging/test_log_aggregation.py +++ b/tests/templates/kuttl/logging/test_log_aggregation.py @@ -23,9 +23,9 @@ def check_sent_events(): }, ) - assert ( - response.status_code == 200 - ), "Cannot access the API of the vector aggregator." + assert response.status_code == 200, ( + "Cannot access the API of the vector aggregator." + ) result = response.json() @@ -35,13 +35,13 @@ def check_sent_events(): componentId = transform["componentId"] if componentId == "filteredInvalidEvents": - assert ( - sentEvents is None or sentEvents["sentEventsTotal"] == 0 - ), "Invalid log events were sent." + assert sentEvents is None or sentEvents["sentEventsTotal"] == 0, ( + "Invalid log events were sent." + ) else: - assert ( - sentEvents is not None and sentEvents["sentEventsTotal"] > 0 - ), f'No events were sent in "{componentId}".' + assert sentEvents is not None and sentEvents["sentEventsTotal"] > 0, ( + f'No events were sent in "{componentId}".' + ) if __name__ == "__main__": diff --git a/tests/templates/kuttl/smoke/50-assert.yaml b/tests/templates/kuttl/smoke/50-assert.yaml index f109e187..5b1731b6 100644 --- a/tests/templates/kuttl/smoke/50-assert.yaml +++ b/tests/templates/kuttl/smoke/50-assert.yaml @@ -1,9 +1,9 @@ --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert -timeout: 600 +timeout: 300 commands: - - script: kubectl -n $NAMESPACE rollout status daemonset opa-server-default --timeout 600s + - script: kubectl -n $NAMESPACE rollout status daemonset opa-server-default --timeout 300s --- apiVersion: v1 kind: ConfigMap diff --git a/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 b/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 index f9841384..4bd971b3 100644 --- a/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 +++ b/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 @@ -1,36 +1,58 @@ --- -apiVersion: opa.stackable.tech/v1alpha1 -kind: OpaCluster -metadata: - name: opa -spec: - image: +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + kubectl apply -n $NAMESPACE -f - < 0 %} - custom: "{{ test_scenario['values']['opa-latest'].split(',')[1] }}" - productVersion: "{{ test_scenario['values']['opa-latest'].split(',')[0] }}" + custom: "{{ test_scenario['values']['opa-latest'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['opa-latest'].split(',')[0] }}" {% else %} - productVersion: "{{ test_scenario['values']['opa-latest'] }}" + productVersion: "{{ test_scenario['values']['opa-latest'] }}" {% endif %} - pullPolicy: IfNotPresent - clusterConfig: + pullPolicy: IfNotPresent + clusterConfig: + tls: + serverSecretClass: opa-tls-$NAMESPACE {% if lookup('env', 'VECTOR_AGGREGATOR') %} - vectorAggregatorConfigMapName: vector-aggregator-discovery + vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} - servers: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - containers: - opa: - console: - level: INFO - file: - level: INFO - loggers: - decision: - level: INFO - roleGroups: - default: {} + servers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + containers: + opa: + console: + level: INFO + file: + level: INFO + loggers: + decision: + level: INFO + roleGroups: + default: {} + --- + apiVersion: secrets.stackable.tech/v1alpha1 + kind: SecretClass + metadata: + name: opa-tls-$NAMESPACE + spec: + backend: + autoTls: + ca: + autoGenerate: true + secret: + name: opa-tls-ca-$NAMESPACE + namespace: $NAMESPACE + --- apiVersion: v1 kind: ConfigMap diff --git a/tests/templates/kuttl/smoke/test_metastore_opa.py b/tests/templates/kuttl/smoke/test_metastore_opa.py index 4d81effc..bde8b6db 100755 --- a/tests/templates/kuttl/smoke/test_metastore_opa.py +++ b/tests/templates/kuttl/smoke/test_metastore_opa.py @@ -36,7 +36,9 @@ def table(db_name, table_name, location): if __name__ == "__main__": - all_args = argparse.ArgumentParser(description="Test hive-metastore-opa-authorizer and rego rules.") + all_args = argparse.ArgumentParser( + description="Test hive-metastore-opa-authorizer and rego rules." + ) all_args.add_argument("-p", "--port", help="Metastore server port", default="9083") all_args.add_argument( "-d", "--database", help="Test DB name", default="db_not_allowed" @@ -53,15 +55,21 @@ def table(db_name, table_name, location): # Creating database object using builder database = DatabaseBuilder(database_name).build() - print(f"[INFO] Trying to access '{database_name}' which is expected to fail due to 'database_allow' authorization policy...!") + print( + f"[INFO] Trying to access '{database_name}' which is expected to fail due to 'database_allow' authorization policy...!" + ) with HiveMetastoreClient(host, port) as hive_client: try: hive_client.create_database_if_not_exists(database) except Exception as e: print(f"[DENIED] {e}") - print(f"[SUCCESS] Test hive-metastore-opa-authorizer succeeded. Could not access database '{database_name}'!") + print( + f"[SUCCESS] Test hive-metastore-opa-authorizer succeeded. Could not access database '{database_name}'!" + ) exit(0) - print(f"[ERROR] Test hive-metastore-opa-authorizer failed. Could access database '{database_name}'!") + print( + f"[ERROR] Test hive-metastore-opa-authorizer failed. Could access database '{database_name}'!" + ) exit(-1) From c8c9f86953e5f88a6654b4e86405b38773389853 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Sat, 8 Nov 2025 12:35:56 +0100 Subject: [PATCH 08/13] add opa-use-tls dimension --- tests/templates/kuttl/smoke/50-install-opa.yaml.j2 | 5 ++++- tests/test-definition.yaml | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 b/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 index 4bd971b3..ea30910f 100644 --- a/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 +++ b/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 @@ -19,8 +19,10 @@ commands: {% endif %} pullPolicy: IfNotPresent clusterConfig: +{% if test_scenario['values']['opa-use-tls'] == 'true' %} tls: serverSecretClass: opa-tls-$NAMESPACE +{% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} @@ -39,6 +41,7 @@ commands: level: INFO roleGroups: default: {} +{% if test_scenario['values']['opa-use-tls'] == 'true' %} --- apiVersion: secrets.stackable.tech/v1alpha1 kind: SecretClass @@ -52,7 +55,7 @@ commands: secret: name: opa-tls-ca-$NAMESPACE namespace: $NAMESPACE - +{% endif %} --- apiVersion: v1 kind: ConfigMap diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 241dad35..12eb28f2 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -53,6 +53,10 @@ dimensions: values: - "true" - "false" + - name: opa-use-tls + values: + - "true" + - "false" - name: kerberos-realm values: - "PROD.MYCORP" @@ -66,6 +70,7 @@ tests: - hive - opa-latest - s3-use-tls + - opa-use-tls - openshift - name: upgrade dimensions: From 2fe3b8c8cbc97f2affb17d37a4afe2b65b020ac3 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Sat, 8 Nov 2025 12:41:23 +0100 Subject: [PATCH 09/13] remove left over trino references --- examples/hive-opa-cluster.yaml | 2 +- tests/templates/kuttl/smoke/50-install-opa.yaml.j2 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/hive-opa-cluster.yaml b/examples/hive-opa-cluster.yaml index 7c6d50d3..994bedaa 100644 --- a/examples/hive-opa-cluster.yaml +++ b/examples/hive-opa-cluster.yaml @@ -79,7 +79,7 @@ metadata: labels: opa.stackable.tech/bundle: "hms" data: - trino.rego: | + hive.rego: | package hms database_allow = true diff --git a/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 b/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 index ea30910f..a63a90b4 100644 --- a/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 +++ b/tests/templates/kuttl/smoke/50-install-opa.yaml.j2 @@ -64,7 +64,7 @@ metadata: labels: opa.stackable.tech/bundle: "hms" data: - trino.rego: | + hive.rego: | package hms default database_allow = false From 0cf0465a2c7da8a6785dda568f6a0053c6fcbb2c Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Sun, 9 Nov 2025 15:04:29 +0100 Subject: [PATCH 10/13] started docs --- .../hive/pages/usage-guide/security.adoc | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/docs/modules/hive/pages/usage-guide/security.adoc b/docs/modules/hive/pages/usage-guide/security.adoc index 2c500038..cfbffca9 100644 --- a/docs/modules/hive/pages/usage-guide/security.adoc +++ b/docs/modules/hive/pages/usage-guide/security.adoc @@ -45,3 +45,117 @@ The `kerberos.secretClass` is used to give Hive the possibility to request keyta === 5. Access Hive In case you want to access Hive it is recommended to start up a client Pod that connects to Hive, rather than shelling into the master. We have an https://github.com/stackabletech/hive-operator/blob/main/tests/templates/kuttl/kerberos/70-install-access-hive.yaml.j2[integration test] for this exact purpose, where you can see how to connect and get a valid keytab. + + +== Authorization +The Stackable Operator for Apache Hive supports the following authorization methods. + +=== Open Policy Agent (OPA) +The Apache Hive metastore can be configured to delegate authorization decisions to an Open Policy Agent (OPA) instance. +More information on the setup and configuration of OPA can be found in the xref:opa:index.adoc[OPA Operator documentation]. +A Hive cluster can be configured using OPA authorization by adding this section to the configuration: + +[source,yaml] +---- +spec: + clusterConfig: + authorization: + opa: + configMapName: opa # <1> + package: hms # <2> +---- +<1> The name of your OPA Stacklet (`opa` in this case) +<2> The rego rule package to use for policy decisions. +This is optional and defaults to the name of the Hive Stacklet. + +==== Defining rego rules +For a general explanation of how rules are written, please refer to the {opa-rego-docs}[OPA documentation]. +Authorization with OPA is done using the https://github.com/boschglobal/hive-metastore-opa-authorizer[hive-metastore-opa-authorizer] plugin. + +===== OPA Inputs +The payload sent by Hive with each request to OPA, that is accessible within the rego rules, has the following structure: + +[source,json] +---- +{ + "identity": { + "username": "", + "groups": ["", ""] + }, + "resources": { + "database": null, + "table": null, + "partition": null, + "columns": ["col1", "col2"] + }, + "privileges": { + "readRequiredPriv": [], + "writeRequiredPriv": [], + "inputs": null, + "outputs": null + } +} +---- +* `identity`: Contains user information. +** `username`: The name of the user. +** `groups`: A list of groups the user belongs to. +* `resources`: Specifies the resources involved in the request. +** `database`: The database object. +** `table`: The table object. +** `partition`: The partition object. +** `columns`: A list of column names involved in the request. +* `privileges`: Details the privileges required for the request. +** `readRequiredPriv`: A list of required read privileges. +** `writeRequiredPriv`: A list of required write privileges. +** `inputs`: Input tables for the request. +** `outputs`: Output tables for the request. + +===== Example OPA Rego Rule +Below is a basic rego rule that demonstrates how to handle input dictionary sent from the hive authorizer to OPA: + +[source,rego] +---- +package hms + +default database_allow = false +default table_allow = false +default column_allow = false +default partition_allow = false +default user_allow = false + +database_allow if { + input.identity.username == "stackable" + input.resources.database.name == "test_db" +} + +table_allow if { + input.identity.username == "stackable" + input.resources.table.dbName == "test_db" + input.resources.table.tableName == "test_table" + input.privileges.readRequiredPriv[0].priv == "SELECT" +} + +table_allow if { + input.identity.username == "stackable" + input.resources.table.dbName == "test_db" + input.privileges.writeRequiredPriv[0].priv == "CREATE" +} +---- +* `database_allow` grants access if the user is `stackable` and the database is `test_db`. +* `table_allow` grants access if the user is `stackable`, the database is `test_db` and: +** the table is `test_table` and the required read privilege is `SELECT`. +** the required write privilege is `CREATE` without any table restriction. + +==== Configuring policy URLs + +The `database_allow`, `table_allow`, `column_allow`, `partition_allow`, and `user_allow` policy URLs can be (config) overriden using the properties in `hive-site.xml`: +* `com.bosch.bdps.opa.authorization.policy.url.database` +* `com.bosch.bdps.opa.authorization.policy.url.table` +* `com.bosch.bdps.opa.authorization.policy.url.column` +* `com.bosch.bdps.opa.authorization.policy.url.partition` +* `com.bosch.bdps.opa.authorization.policy.url.user` + +==== TLS secured OPA cluster + +Stackable OPA clusters secured via TLS are supported and no further configuration is required. +The Stackable Hive operator automatically adds the certificate from the SecretClass used to secure the OPA cluster to its trust. From e29a06e0602fdf9581ec1c4c00a28ba9168040f7 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Sun, 9 Nov 2025 15:05:57 +0100 Subject: [PATCH 11/13] formatting --- docs/modules/hive/pages/usage-guide/security.adoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/modules/hive/pages/usage-guide/security.adoc b/docs/modules/hive/pages/usage-guide/security.adoc index cfbffca9..ea43e71f 100644 --- a/docs/modules/hive/pages/usage-guide/security.adoc +++ b/docs/modules/hive/pages/usage-guide/security.adoc @@ -149,6 +149,7 @@ table_allow if { ==== Configuring policy URLs The `database_allow`, `table_allow`, `column_allow`, `partition_allow`, and `user_allow` policy URLs can be (config) overriden using the properties in `hive-site.xml`: + * `com.bosch.bdps.opa.authorization.policy.url.database` * `com.bosch.bdps.opa.authorization.policy.url.table` * `com.bosch.bdps.opa.authorization.policy.url.column` From e3cf46abcee413507212142880a73d5a30af5d09 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Sun, 9 Nov 2025 15:11:59 +0100 Subject: [PATCH 12/13] pre commit --- docs/modules/hive/pages/usage-guide/security.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/hive/pages/usage-guide/security.adoc b/docs/modules/hive/pages/usage-guide/security.adoc index ea43e71f..fcc282c3 100644 --- a/docs/modules/hive/pages/usage-guide/security.adoc +++ b/docs/modules/hive/pages/usage-guide/security.adoc @@ -158,5 +158,5 @@ The `database_allow`, `table_allow`, `column_allow`, `partition_allow`, and `use ==== TLS secured OPA cluster -Stackable OPA clusters secured via TLS are supported and no further configuration is required. +Stackable OPA clusters secured via TLS are supported and no further configuration is required. The Stackable Hive operator automatically adds the certificate from the SecretClass used to secure the OPA cluster to its trust. From 815a74b5059a28454d51faa8f4271666b59da779 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Wed, 12 Nov 2025 14:31:24 +0100 Subject: [PATCH 13/13] add opa opeartor to test suite --- tests/release.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/release.yaml b/tests/release.yaml index 5ab0aeca..225e77af 100644 --- a/tests/release.yaml +++ b/tests/release.yaml @@ -18,3 +18,5 @@ releases: operatorVersion: 0.0.0-dev hive: operatorVersion: 0.0.0-dev + opa: + operatorVersion: 0.0.0-dev