From 2a638e70f3cdf208bd0e153156208325c9b732bf Mon Sep 17 00:00:00 2001 From: "sarunas.svegzda" Date: Mon, 10 Nov 2025 12:51:04 +0200 Subject: [PATCH 01/12] Add getting started with Ceph without STS --- getting-started/ceph/README.md | 138 ++++++++++++++ getting-started/ceph/docker-compose.yml | 227 ++++++++++++++++++++++++ 2 files changed, 365 insertions(+) create mode 100644 getting-started/ceph/README.md create mode 100644 getting-started/ceph/docker-compose.yml diff --git a/getting-started/ceph/README.md b/getting-started/ceph/README.md new file mode 100644 index 0000000000..0747994829 --- /dev/null +++ b/getting-started/ceph/README.md @@ -0,0 +1,138 @@ + + +# Getting Started with Apache Polaris and Ceph + +## Overview + +This guide describes how to spin up a **single-node Ceph cluster** with **RADOS Gateway (RGW)** for S3-compatible storage and configure it for use by **Polaris**. + +This example cluster is configured for basic access key authentication only. +It does not include STS (Security Token Service) or temporary credentials. +All access to the Ceph RGW (RADOS Gateway) and Polaris integration uses static S3-style credentials (as configured via radosgw-admin user create). + +Spark is used as a query engine. This example assumes a local Spark installation. +See the [Spark Notebooks Example](../spark/README.md) for a more advanced Spark setup. + +## Starting the Example + +The services are started **in sequence**: +1. Monitor + Manager +2. OSD +3. RGW +4. Polaris + +Note: this example pulls the `apache/polaris:latest` image, but assumes the image is `1.2.0-incubating` or later. + + +### 1. Start monitor and manager +```shell +docker-compose up -d mon1 mgr +``` + +### 2. Start OSD +```shell +docker-compose up -d osd1 +``` + +### 3. Start RGW +```shell +docker-compose up -d rgw1 +``` +#### Check status +```shell +docker exec -it cephpolaris-mon1-1 ceph -s +``` +You should see something like: +```yaml +cluster: + id: b2f59c4b-5f14-4f8c-a9b7-3b7998c76a0e + health: HEALTH_WARN + mon is allowing insecure global_id reclaim + 1 monitors have not enabled msgr2 + 6 pool(s) have no replicas configured + +services: + mon: 1 daemons, quorum mon1 (age 49m) + mgr: mgr(active, since 94m) + osd: 1 osds: 1 up (since 36m), 1 in (since 93m) + rgw: 1 daemon active (1 hosts, 1 zones) +``` + +### 4. Create bucket for Polaris storage +```shell +docker-compose up -d setup_bucket +``` + +### 5. Run Polaris service +```shell +docker-compose up -d polaris +``` + +### 6. Setup polaris catalog +```shell +docker-compose up -d polaris-setup +``` + +## Connecting From Spark + +```shell +bin/spark-sql \ + --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.9.0,org.apache.iceberg:iceberg-aws-bundle:1.9.0 \ + --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ + --conf spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.catalog.polaris.type=rest \ + --conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \ + --conf spark.sql.catalog.polaris.token-refresh-enabled=false \ + --conf spark.sql.catalog.polaris.warehouse=quickstart_catalog \ + --conf spark.sql.catalog.polaris.scope=PRINCIPAL_ROLE:ALL \ + --conf spark.sql.catalog.polaris.credential=root:s3cr3t \ + --conf spark.sql.catalog.polaris.client.region=irrelevant +``` + +Note: `s3cr3t` is defined as the password for the `root` user in the `docker-compose.yml` file. + +Note: The `client.region` configuration is required for the AWS S3 client to work, but it is not used in this example +since Ceph does not require a specific region. + +## Running Queries + +Run inside the Spark SQL shell: + +``` +spark-sql (default)> use polaris; +Time taken: 0.837 seconds + +spark-sql ()> create namespace ns; +Time taken: 0.374 seconds + +spark-sql ()> create table ns.t1 as select 'abc'; +Time taken: 2.192 seconds + +spark-sql ()> select * from ns.t1; +abc +Time taken: 0.579 seconds, Fetched 1 row(s) +``` +## Lack of Credential Vending + +Notice that the Spark configuration does not contain a `X-Iceberg-Access-Delegation` header. +This is because example cluster does not include STS (Security Token Service) or temporary credentials. + +The lack of STS API is represented in the Catalog storage configuration by the +`stsUnavailable=true` property. diff --git a/getting-started/ceph/docker-compose.yml b/getting-started/ceph/docker-compose.yml new file mode 100644 index 0000000000..d286c00bb4 --- /dev/null +++ b/getting-started/ceph/docker-compose.yml @@ -0,0 +1,227 @@ +networks: + cluster-net: + driver: bridge + +services: + + mon1: + image: ${CEPH_CONTAINER_IMAGE} + entrypoint: "/bin/sh" + command: + - "-c" + - >- + set -ex; + mkdir -p /var/lib/ceph/osd/ceph-0; + ceph-authtool --create-keyring /tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'; + ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring \ + --gen-key -n client.admin \ + --cap mon 'allow *' --cap osd 'allow *' --cap mgr 'allow *' --cap mds 'allow *'; + ceph-authtool --create-keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \ + --gen-key -n client.bootstrap-osd \ + --cap mon 'profile bootstrap-osd' --cap mgr 'allow r'; + ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring; + ceph-authtool /tmp/ceph.mon.keyring --import-keyring /var/lib/ceph/bootstrap-osd/ceph.keyring; + chown ceph:ceph /tmp/ceph.mon.keyring; + monmaptool --create --add mon1 ${MON_IP} --fsid ${FSID} /tmp/monmap --clobber; + sudo -u ceph ceph-mon --mkfs -i mon1 --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring; + ceph-mon -i mon1 -f -d; + environment: + MON_IP: ${MON_IP} + CEPH_PUBLIC_NETWORK: ${MON1_CEPH_PUBLIC_NETWORK} + FSID: ${FSID} + volumes: + - ./ceph-conf:/etc/ceph + - ./bootstrap-osd:/var/lib/ceph/bootstrap-osd + - ./osd1:/var/lib/ceph/osd/ceph-0/ + networks: + - cluster-net + + mgr: + image: ${CEPH_CONTAINER_IMAGE} + entrypoint: "/bin/sh" + command: + - "-c" + - >- + set -ex; + mkdir -p /var/lib/ceph/mgr/ceph-mgr; + ceph auth get-or-create mgr.mgr mon 'allow profile mgr' osd 'allow *' mds 'allow *' > /var/lib/ceph/mgr/ceph-mgr/keyring; + ceph-mgr -f -i mgr; + volumes: + - ./ceph-conf:/etc/ceph + depends_on: + - mon1 + networks: + - cluster-net + ports: + - ${DASHBOARD_PORT}:${INTERNAL_DASHBOARD_PORT} + + osd1: + pid: host + privileged: true + image: ${CEPH_CONTAINER_IMAGE} + environment: + OSD_UUID_1: ${OSD_UUID_1} + entrypoint: "/bin/sh" + command: + - "-c" + - >- + set -ex; + mkdir -p /var/lib/ceph/osd/ceph-0; + chown -R ceph:ceph /var/lib/ceph/osd/ceph-0; + ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-0/keyring \ + --gen-key -n osd.0 \ + --cap osd 'allow *' \ + --cap mon 'allow profile osd'; + ceph auth del osd.0 || true; + ceph auth add osd.0 -i /var/lib/ceph/osd/ceph-0/keyring; + ceph osd new ${OSD_UUID_1} -n client.bootstrap-osd -k /var/lib/ceph/bootstrap-osd/ceph.keyring; + ceph-osd -i 0 --mkfs --osd-data /var/lib/ceph/osd/ceph-0 --osd-uuid ${OSD_UUID_1} \ + --keyring /var/lib/ceph/osd/ceph-0/keyring; + ceph-osd -f -i 0; + volumes: + - ./ceph-conf:/etc/ceph + - ./bootstrap-osd:/var/lib/ceph/bootstrap-osd + depends_on: + - mon1 + networks: + - cluster-net + + mds1: + image: ${CEPH_CONTAINER_IMAGE} + entrypoint: "/bin/sh" + command: + - "-c" + - >- + set -ex; + mkdir -p /var/lib/ceph/mds/ceph-admin; + ceph-authtool --create-keyring /var/lib/ceph/mds/ceph-admin/keyring --gen-key -n mds. --cap mds 'allow *'; + ceph-mds -f -i admin; + hostname: "ceph-mds1-host" + environment: + CEPHFS_CREATE: 1 + volumes: + - ./ceph-conf:/etc/ceph + depends_on: + - osd1 + networks: + - cluster-net + rgw1: + image: ${CEPH_CONTAINER_IMAGE} + container_name: rgw1 + environment: + MON_IP: ${MON_IP} + CEPH_PUBLIC_NETWORK: ${MON1_CEPH_PUBLIC_NETWORK} + RGW_ACCESS_KEY: ${RGW_ACCESS_KEY} + RGW_SECRET_KEY: ${RGW_SECRET_KEY} + entrypoint: "/bin/sh" + command: + - "-c" + - >- + set -ex; + mkdir -p /var/lib/ceph/radosgw/ceph-rgw1; + ceph auth get-or-create client.rgw1 mon 'allow rw' osd 'allow rwx'; + ceph auth caps client.rgw1 mon 'allow rw' osd 'allow rwx'; + ceph-authtool --create-keyring /var/lib/ceph/radosgw/ceph-rgw1/keyring --gen-key -n client.rgw1 --cap osd 'allow *' --cap mon 'allow *'; + ceph auth del client.rgw1 || true; + ceph auth add client.rgw1 -i /var/lib/ceph/radosgw/ceph-rgw1/keyring; + radosgw-admin user create --uid="polaris-user" \ + --display-name="Polaris User" \ + --access-key="${RGW_ACCESS_KEY}" \ + --secret-key="${RGW_SECRET_KEY}" || true; + echo ">>> RGW user created (access=${RGW_ACCESS_KEY}, secret=${RGW_SECRET_KEY})"; + radosgw -n client.rgw1 --rgw-frontends="beast port=7480" --foreground; + ports: + - "7480:7480" # RGW HTTP endpoint (S3) + - "7481:7481" + volumes: + - ./ceph-conf:/etc/ceph + depends_on: + - osd1 + networks: + - cluster-net + + setup_bucket: + image: peakcom/s5cmd:latest + depends_on: + - rgw1 + environment: + AWS_ACCESS_KEY_ID: ${RGW_ACCESS_KEY} + AWS_SECRET_ACCESS_KEY: ${RGW_SECRET_KEY} + S3_ENDPOINT_URL: ${S3_ENDPOINT_URL} + S3_REGION: ${S3_REGION} + S3_POLARIS_BUCKET: ${S3_POLARIS_BUCKET} + entrypoint: "/bin/sh" + command: + - "-c" + - >- + set -ex; + echo ">>> Waiting for RGW to become ready..."; + sleep 5; + echo ">>> Create bucket if not exist..."; + /s5cmd --endpoint-url ${S3_ENDPOINT_URL} mb s3://${S3_POLARIS_BUCKET} || true; + tail -f /dev/null; + networks: + - cluster-net + + polaris: + image: apache/polaris:latest + ports: + # API port + - "8181:8181" + # Optional, allows attaching a debugger to the Polaris JVM + - "5005:5005" + depends_on: + - rgw1 + environment: + JAVA_DEBUG: true + JAVA_DEBUG_PORT: "*:5005" + AWS_REGION: us-west-2 + AWS_ACCESS_KEY_ID: ${RGW_ACCESS_KEY} + AWS_SECRET_ACCESS_KEY: ${RGW_SECRET_KEY} + POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,s3cr3t + polaris.realm-context.realms: POLARIS + quarkus.otel.sdk.disabled: "true" + healthcheck: + test: ["CMD", "curl", "http://localhost:8182/q/health"] + interval: 2s + timeout: 10s + retries: 10 + start_period: 10s + networks: + - cluster-net + + polaris-setup: + image: alpine/curl + depends_on: + polaris: + condition: service_healthy + environment: + - CLIENT_ID=root + - CLIENT_SECRET=s3cr3t + volumes: + - ../assets/polaris/:/polaris + entrypoint: "/bin/sh" + command: + - "-c" + - >- + chmod +x /polaris/create-catalog.sh; + chmod +x /polaris/obtain-token.sh; + source /polaris/obtain-token.sh; + echo Creating catalog...; + export STORAGE_CONFIG_INFO='{"storageType":"S3", + "endpoint":"http://rgw1:7480", + "stsUnavailable":"true", + "pathStyleAccess":true}'; + export STORAGE_LOCATION='s3://polaris-storage'; + /polaris/create-catalog.sh POLARIS $$TOKEN; + echo Extra grants...; + curl -H "Authorization: Bearer $$TOKEN" -H 'Content-Type: application/json' \ + -X PUT \ + http://polaris:8181/api/management/v1/catalogs/quickstart_catalog/catalog-roles/catalog_admin/grants \ + -d '{"type":"catalog", "privilege":"CATALOG_MANAGE_CONTENT"}'; + echo Done.; + curl -H "Authorization: Bearer $$TOKEN" -H 'Content-Type: application/json' \ + -X GET \ + http://polaris:8181/api/management/v1/catalogs; + networks: + - cluster-net From c7a6a362aab33f5771d465464a3dd0111a58014b Mon Sep 17 00:00:00 2001 From: "sarunas.svegzda" Date: Mon, 10 Nov 2025 14:06:10 +0200 Subject: [PATCH 02/12] add env example --- getting-started/ceph/.env.example | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 getting-started/ceph/.env.example diff --git a/getting-started/ceph/.env.example b/getting-started/ceph/.env.example new file mode 100644 index 0000000000..865bea7bf7 --- /dev/null +++ b/getting-started/ceph/.env.example @@ -0,0 +1,15 @@ +LANG=en_US.utf8 +TZ=UTC +CEPH_CONTAINER_IMAGE=quay.io/ceph/ceph:v19.2.3 +DASHBOARD_PORT=8443 +INTERNAL_DASHBOARD_PORT=8443 +RGW_PORT=8080 +MON_IP=172.18.0.2 +MON1_CEPH_PUBLIC_NETWORK=172.18.0.0/16 +RGW_ACCESS_KEY=POLARIS123ACCESS +RGW_SECRET_KEY=POLARIS456SECRET +FSID=b2f59c4b-5f14-4f8c-a9b7-3b7998c76a0e +OSD_UUID_1=80505106-0d32-4777-bac9-3dfc901b1273 +S3_ENDPOINT_URL=http://rgw1:7480 +S3_REGION=us-east-1 +S3_POLARIS_BUCKET=polaris-storage \ No newline at end of file From 8cad1b8d275a975e9de28462af50fd770e2c356a Mon Sep 17 00:00:00 2001 From: "sarunas.svegzda" Date: Tue, 11 Nov 2025 10:45:35 +0200 Subject: [PATCH 03/12] add ceph conf --- getting-started/ceph/.env.example | 15 ------------ getting-started/ceph/ceph-conf/ceph.conf | 29 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 15 deletions(-) delete mode 100644 getting-started/ceph/.env.example create mode 100644 getting-started/ceph/ceph-conf/ceph.conf diff --git a/getting-started/ceph/.env.example b/getting-started/ceph/.env.example deleted file mode 100644 index 865bea7bf7..0000000000 --- a/getting-started/ceph/.env.example +++ /dev/null @@ -1,15 +0,0 @@ -LANG=en_US.utf8 -TZ=UTC -CEPH_CONTAINER_IMAGE=quay.io/ceph/ceph:v19.2.3 -DASHBOARD_PORT=8443 -INTERNAL_DASHBOARD_PORT=8443 -RGW_PORT=8080 -MON_IP=172.18.0.2 -MON1_CEPH_PUBLIC_NETWORK=172.18.0.0/16 -RGW_ACCESS_KEY=POLARIS123ACCESS -RGW_SECRET_KEY=POLARIS456SECRET -FSID=b2f59c4b-5f14-4f8c-a9b7-3b7998c76a0e -OSD_UUID_1=80505106-0d32-4777-bac9-3dfc901b1273 -S3_ENDPOINT_URL=http://rgw1:7480 -S3_REGION=us-east-1 -S3_POLARIS_BUCKET=polaris-storage \ No newline at end of file diff --git a/getting-started/ceph/ceph-conf/ceph.conf b/getting-started/ceph/ceph-conf/ceph.conf new file mode 100644 index 0000000000..e36b63e3ec --- /dev/null +++ b/getting-started/ceph/ceph-conf/ceph.conf @@ -0,0 +1,29 @@ +[global] +fsid = b2f59c4b-5f14-4f8c-a9b7-3b7998c76a0e +mon_initial_members = mon1 +mon_host = 172.18.0.2 +public_network = 172.18.0.0/16 +cluster_network = 172.18.0.0/16 +auth_cluster_required = cephx +auth_service_required = cephx +auth_client_required = cephx +osd_pool_default_size = 1 +osd_pool_default_min_size = 1 +osd_pool_default_pg_num = 333 +osd_crush_chooseleaf_type = 1 +mon_allow_pool_size_one= true +# max open files = 655350 +# cephx cluster require signatures = false +# cephx service require signatures = false +# osd max object name len = 256 +# osd max object namespace len = 64 + +[mon.mon1] +mon_data = /var/lib/ceph/mon/ceph-mon1 +mon_rocksdb_min_wal_logs = 1 +mon_rocksdb_max_total_wal_size = 64M +mon_rocksdb_options = max_background_compactions=4;max_background_flushes=2 + +[client.rgw1] +host = ceph-rgw1 +rgw_frontends = civetweb port=7480 From 811c477f5c6d1d30ff120ca8c9a340e327a7f541 Mon Sep 17 00:00:00 2001 From: "sarunas.svegzda" Date: Tue, 11 Nov 2025 12:21:42 +0200 Subject: [PATCH 04/12] changes after PR --- getting-started/ceph/README.md | 21 +++++++++++------ getting-started/ceph/docker-compose.yml | 31 ++++++++++++++++++++----- 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/getting-started/ceph/README.md b/getting-started/ceph/README.md index 0747994829..2583835058 100644 --- a/getting-started/ceph/README.md +++ b/getting-started/ceph/README.md @@ -32,6 +32,13 @@ See the [Spark Notebooks Example](../spark/README.md) for a more advanced Spark ## Starting the Example +Before starting the Ceph + Polaris stack, you’ll need to configure environment variables that define network settings, credentials, and cluster IDs. + +Copy the example environment file: +```shell +mv getting-started/ceph/.env.example getting-started/ceph/.env +``` + The services are started **in sequence**: 1. Monitor + Manager 2. OSD @@ -43,21 +50,21 @@ Note: this example pulls the `apache/polaris:latest` image, but assumes the imag ### 1. Start monitor and manager ```shell -docker-compose up -d mon1 mgr +docker compose up -d mon1 mgr ``` ### 2. Start OSD ```shell -docker-compose up -d osd1 +docker compose up -d osd1 ``` ### 3. Start RGW ```shell -docker-compose up -d rgw1 +docker compose up -d rgw1 ``` #### Check status ```shell -docker exec -it cephpolaris-mon1-1 ceph -s +docker exec --interactive --tty ceph-mon1-1 ceph -s ``` You should see something like: ```yaml @@ -77,17 +84,17 @@ services: ### 4. Create bucket for Polaris storage ```shell -docker-compose up -d setup_bucket +docker compose up -d setup_bucket ``` ### 5. Run Polaris service ```shell -docker-compose up -d polaris +docker compose up -d polaris ``` ### 6. Setup polaris catalog ```shell -docker-compose up -d polaris-setup +docker compose up -d polaris-setup ``` ## Connecting From Spark diff --git a/getting-started/ceph/docker-compose.yml b/getting-started/ceph/docker-compose.yml index d286c00bb4..65b681e51f 100644 --- a/getting-started/ceph/docker-compose.yml +++ b/getting-started/ceph/docker-compose.yml @@ -1,3 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + networks: cluster-net: driver: bridge @@ -5,7 +24,7 @@ networks: services: mon1: - image: ${CEPH_CONTAINER_IMAGE} + image: quay.io/ceph/ceph:v19.2.3 entrypoint: "/bin/sh" command: - "-c" @@ -37,7 +56,7 @@ services: - cluster-net mgr: - image: ${CEPH_CONTAINER_IMAGE} + image: quay.io/ceph/ceph:v19.2.3 entrypoint: "/bin/sh" command: - "-c" @@ -58,7 +77,7 @@ services: osd1: pid: host privileged: true - image: ${CEPH_CONTAINER_IMAGE} + image: quay.io/ceph/ceph:v19.2.3 environment: OSD_UUID_1: ${OSD_UUID_1} entrypoint: "/bin/sh" @@ -87,7 +106,7 @@ services: - cluster-net mds1: - image: ${CEPH_CONTAINER_IMAGE} + image: quay.io/ceph/ceph:v19.2.3 entrypoint: "/bin/sh" command: - "-c" @@ -106,7 +125,7 @@ services: networks: - cluster-net rgw1: - image: ${CEPH_CONTAINER_IMAGE} + image: quay.io/ceph/ceph:v19.2.3 container_name: rgw1 environment: MON_IP: ${MON_IP} @@ -175,7 +194,7 @@ services: environment: JAVA_DEBUG: true JAVA_DEBUG_PORT: "*:5005" - AWS_REGION: us-west-2 + AWS_REGION: us-east-1 AWS_ACCESS_KEY_ID: ${RGW_ACCESS_KEY} AWS_SECRET_ACCESS_KEY: ${RGW_SECRET_KEY} POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,s3cr3t From 6f7a3128c788226dbfc5590c0e605090ebe4eccb Mon Sep 17 00:00:00 2001 From: "sarunas.svegzda" Date: Tue, 11 Nov 2025 12:22:53 +0200 Subject: [PATCH 05/12] env example with comments --- getting-started/ceph/.env.example | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 getting-started/ceph/.env.example diff --git a/getting-started/ceph/.env.example b/getting-started/ceph/.env.example new file mode 100644 index 0000000000..270ccf1371 --- /dev/null +++ b/getting-started/ceph/.env.example @@ -0,0 +1,14 @@ +LANG=en_US.utf8 # Default system locale used inside containers +TZ=UTC # Timezone used inside containers +DASHBOARD_PORT=8443 # Port for Ceph Dashboard +INTERNAL_DASHBOARD_PORT=8443 # Internal port for Ceph Dashboard +RGW_PORT=8080 # Port for Rados Gateway +MON_IP=172.18.0.2 # IP address of the monitor +MON1_CEPH_PUBLIC_NETWORK=172.18.0.0/16 # Public network subnet used by all Ceph daemons +RGW_ACCESS_KEY=POLARIS123ACCESS # Access key for Polaris S3 user +RGW_SECRET_KEY=POLARIS456SECRET. # Secret key for Polaris S3 user +FSID=b2f59c4b-5f14-4f8c-a9b7-3b7998c76a0e # Unique cluster identifier (use `uuidgen` to regenerate) +OSD_UUID_1=80505106-0d32-4777-bac9-3dfc901b1273 # Unique OSD identifier (use `uuidgen` to regenerate) +S3_ENDPOINT_URL=http://rgw1:7480 # Internal endpoint for S3-compatible RGW service +S3_REGION=us-east-1 # S3 region name +S3_POLARIS_BUCKET=polaris-storage # Default S3 bucket name for Polaris storage \ No newline at end of file From edcc27c096ed30030d2fd4581f1bc9dbf1cbcfee Mon Sep 17 00:00:00 2001 From: "sarunas.svegzda" Date: Tue, 11 Nov 2025 15:57:22 +0200 Subject: [PATCH 06/12] add podman support --- getting-started/ceph/.env.example | 19 ++++++++ getting-started/ceph/README.md | 45 ++++++++++------- getting-started/ceph/ceph-conf/ceph.conf | 19 ++++++++ getting-started/ceph/docker-compose.yml | 13 ++--- getting-started/ceph/prepare-network.sh | 61 ++++++++++++++++++++++++ 5 files changed, 133 insertions(+), 24 deletions(-) create mode 100755 getting-started/ceph/prepare-network.sh diff --git a/getting-started/ceph/.env.example b/getting-started/ceph/.env.example index 270ccf1371..d8c2366894 100644 --- a/getting-started/ceph/.env.example +++ b/getting-started/ceph/.env.example @@ -1,3 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + LANG=en_US.utf8 # Default system locale used inside containers TZ=UTC # Timezone used inside containers DASHBOARD_PORT=8443 # Port for Ceph Dashboard diff --git a/getting-started/ceph/README.md b/getting-started/ceph/README.md index 2583835058..8eea21370d 100644 --- a/getting-started/ceph/README.md +++ b/getting-started/ceph/README.md @@ -34,11 +34,6 @@ See the [Spark Notebooks Example](../spark/README.md) for a more advanced Spark Before starting the Ceph + Polaris stack, you’ll need to configure environment variables that define network settings, credentials, and cluster IDs. -Copy the example environment file: -```shell -mv getting-started/ceph/.env.example getting-started/ceph/.env -``` - The services are started **in sequence**: 1. Monitor + Manager 2. OSD @@ -47,24 +42,38 @@ The services are started **in sequence**: Note: this example pulls the `apache/polaris:latest` image, but assumes the image is `1.2.0-incubating` or later. +### 1. Copy the example environment file +```shell +cp getting-started/ceph/.env.example getting-started/ceph/.env +``` + +### 2. Prepare Network +```shell +# Optional: force runtime (docker or podman) +export RUNTIME=docker + +chmod +x getting-started/ceph/prepare-network.sh + +./getting-started/ceph/prepare-network.sh +``` -### 1. Start monitor and manager +### 3. Start monitor and manager ```shell -docker compose up -d mon1 mgr +$RUNTIME compose up -d mon1 mgr ``` -### 2. Start OSD +### 4. Start OSD ```shell -docker compose up -d osd1 +$RUNTIME compose up -d osd1 ``` -### 3. Start RGW +### 5. Start RGW ```shell -docker compose up -d rgw1 +$RUNTIME compose up -d rgw1 ``` #### Check status ```shell -docker exec --interactive --tty ceph-mon1-1 ceph -s +$RUNTIME exec --interactive --tty ceph-mon1-1 ceph -s ``` You should see something like: ```yaml @@ -82,19 +91,19 @@ services: rgw: 1 daemon active (1 hosts, 1 zones) ``` -### 4. Create bucket for Polaris storage +### 6. Create bucket for Polaris storage ```shell -docker compose up -d setup_bucket +$RUNTIME compose up -d setup_bucket ``` -### 5. Run Polaris service +### 7. Run Polaris service ```shell -docker compose up -d polaris +$RUNTIME compose up -d polaris ``` -### 6. Setup polaris catalog +### 8. Setup polaris catalog ```shell -docker compose up -d polaris-setup +$RUNTIME compose up -d polaris-setup ``` ## Connecting From Spark diff --git a/getting-started/ceph/ceph-conf/ceph.conf b/getting-started/ceph/ceph-conf/ceph.conf index e36b63e3ec..12bef94c71 100644 --- a/getting-started/ceph/ceph-conf/ceph.conf +++ b/getting-started/ceph/ceph-conf/ceph.conf @@ -1,3 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + [global] fsid = b2f59c4b-5f14-4f8c-a9b7-3b7998c76a0e mon_initial_members = mon1 diff --git a/getting-started/ceph/docker-compose.yml b/getting-started/ceph/docker-compose.yml index 65b681e51f..47c50ff105 100644 --- a/getting-started/ceph/docker-compose.yml +++ b/getting-started/ceph/docker-compose.yml @@ -20,6 +20,7 @@ networks: cluster-net: driver: bridge + external: true services: @@ -31,18 +32,18 @@ services: - >- set -ex; mkdir -p /var/lib/ceph/osd/ceph-0; - ceph-authtool --create-keyring /tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'; + ceph-authtool --create-keyring /var/lib/ceph/tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'; ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring \ --gen-key -n client.admin \ --cap mon 'allow *' --cap osd 'allow *' --cap mgr 'allow *' --cap mds 'allow *'; ceph-authtool --create-keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \ --gen-key -n client.bootstrap-osd \ --cap mon 'profile bootstrap-osd' --cap mgr 'allow r'; - ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring; - ceph-authtool /tmp/ceph.mon.keyring --import-keyring /var/lib/ceph/bootstrap-osd/ceph.keyring; - chown ceph:ceph /tmp/ceph.mon.keyring; - monmaptool --create --add mon1 ${MON_IP} --fsid ${FSID} /tmp/monmap --clobber; - sudo -u ceph ceph-mon --mkfs -i mon1 --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring; + ceph-authtool /var/lib/ceph/tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring; + ceph-authtool /var/lib/ceph/tmp/ceph.mon.keyring --import-keyring /var/lib/ceph/bootstrap-osd/ceph.keyring; + chown ceph:ceph /var/lib/ceph/tmp/ceph.mon.keyring; + monmaptool --create --add mon1 ${MON_IP} --fsid ${FSID} /var/lib/ceph/tmp/monmap --clobber; + sudo -u ceph ceph-mon --mkfs -i mon1 --monmap /var/lib/ceph/tmp/monmap --keyring /var/lib/ceph/tmp/ceph.mon.keyring; ceph-mon -i mon1 -f -d; environment: MON_IP: ${MON_IP} diff --git a/getting-started/ceph/prepare-network.sh b/getting-started/ceph/prepare-network.sh new file mode 100755 index 0000000000..fbf782aa7b --- /dev/null +++ b/getting-started/ceph/prepare-network.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +#!/bin/bash +set -e + +NETWORK_NAME="cluster-net" +SUBNET="172.18.0.0/16" +GATEWAY="172.18.0.1" +RUNTIME=${RUNTIME:-auto} # choose: docker | podman | auto + +create_network() { + local cmd=$1 + local exists_cmd="$cmd network inspect $NETWORK_NAME >/dev/null 2>&1" + + if eval "$exists_cmd"; then + echo "Network '$NETWORK_NAME' already exists in $cmd." + else + echo "Creating network '$NETWORK_NAME' in $cmd..." + $cmd network create \ + --driver bridge \ + --subnet $SUBNET \ + --gateway $GATEWAY \ + $NETWORK_NAME + fi +} + +# Auto-detect or use user choice +if [ "$RUNTIME" = "docker" ]; then + create_network docker +elif [ "$RUNTIME" = "podman" ]; then + create_network podman +else + if command -v docker >/dev/null 2>&1; then + echo "Detected Docker (defaulting to Docker runtime)" + create_network docker + elif command -v podman >/dev/null 2>&1; then + echo "Detected Podman (defaulting to Podman runtime)" + create_network podman + else + echo "Neither Docker nor Podman found. Please install one." + exit 1 + fi +fi From a538c8f1f296ea56cf4d0345de194baaf9755cb7 Mon Sep 17 00:00:00 2001 From: "sarunas.svegzda" Date: Tue, 11 Nov 2025 16:49:16 +0200 Subject: [PATCH 07/12] updates after review --- getting-started/ceph/README.md | 18 ++++++++---------- getting-started/ceph/docker-compose.yml | 2 +- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/getting-started/ceph/README.md b/getting-started/ceph/README.md index 8eea21370d..76d790d771 100644 --- a/getting-started/ceph/README.md +++ b/getting-started/ceph/README.md @@ -44,7 +44,7 @@ Note: this example pulls the `apache/polaris:latest` image, but assumes the imag ### 1. Copy the example environment file ```shell -cp getting-started/ceph/.env.example getting-started/ceph/.env +cp .env.example .env ``` ### 2. Prepare Network @@ -52,28 +52,26 @@ cp getting-started/ceph/.env.example getting-started/ceph/.env # Optional: force runtime (docker or podman) export RUNTIME=docker -chmod +x getting-started/ceph/prepare-network.sh - ./getting-started/ceph/prepare-network.sh ``` ### 3. Start monitor and manager ```shell -$RUNTIME compose up -d mon1 mgr +docker compose up -d mon1 mgr ``` ### 4. Start OSD ```shell -$RUNTIME compose up -d osd1 +docker compose up -d osd1 ``` ### 5. Start RGW ```shell -$RUNTIME compose up -d rgw1 +docker compose up -d rgw1 ``` #### Check status ```shell -$RUNTIME exec --interactive --tty ceph-mon1-1 ceph -s +docker exec --interactive --tty ceph-mon1-1 ceph -s ``` You should see something like: ```yaml @@ -93,17 +91,17 @@ services: ### 6. Create bucket for Polaris storage ```shell -$RUNTIME compose up -d setup_bucket +docker compose up -d setup_bucket ``` ### 7. Run Polaris service ```shell -$RUNTIME compose up -d polaris +docker compose up -d polaris ``` ### 8. Setup polaris catalog ```shell -$RUNTIME compose up -d polaris-setup +docker compose up -d polaris-setup ``` ## Connecting From Spark diff --git a/getting-started/ceph/docker-compose.yml b/getting-started/ceph/docker-compose.yml index 47c50ff105..9c333c0841 100644 --- a/getting-started/ceph/docker-compose.yml +++ b/getting-started/ceph/docker-compose.yml @@ -43,7 +43,7 @@ services: ceph-authtool /var/lib/ceph/tmp/ceph.mon.keyring --import-keyring /var/lib/ceph/bootstrap-osd/ceph.keyring; chown ceph:ceph /var/lib/ceph/tmp/ceph.mon.keyring; monmaptool --create --add mon1 ${MON_IP} --fsid ${FSID} /var/lib/ceph/tmp/monmap --clobber; - sudo -u ceph ceph-mon --mkfs -i mon1 --monmap /var/lib/ceph/tmp/monmap --keyring /var/lib/ceph/tmp/ceph.mon.keyring; + ceph-mon --mkfs -i mon1 --monmap /var/lib/ceph/tmp/monmap --keyring /var/lib/ceph/tmp/ceph.mon.keyring; ceph-mon -i mon1 -f -d; environment: MON_IP: ${MON_IP} From 5dd40b147bb33123edec3b445f37c7fc06c14203 Mon Sep 17 00:00:00 2001 From: "sarunas.svegzda" Date: Tue, 11 Nov 2025 23:46:45 +0200 Subject: [PATCH 08/12] s3 access keys in spark config --- getting-started/ceph/README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/getting-started/ceph/README.md b/getting-started/ceph/README.md index 76d790d771..e765b0f089 100644 --- a/getting-started/ceph/README.md +++ b/getting-started/ceph/README.md @@ -52,7 +52,7 @@ cp .env.example .env # Optional: force runtime (docker or podman) export RUNTIME=docker -./getting-started/ceph/prepare-network.sh +./prepare-network.sh ``` ### 3. Start monitor and manager @@ -112,12 +112,15 @@ bin/spark-sql \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ --conf spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog \ --conf spark.sql.catalog.polaris.type=rest \ - --conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \ - --conf spark.sql.catalog.polaris.token-refresh-enabled=false \ + --conf spark.sql.catalog.polaris.io-impl="org.apache.iceberg.aws.s3.S3FileIO" \ + --conf spark.sql.catalog.polaris.uri=http://polaris:8181/api/catalog \ + --conf spark.sql.catalog.polaris.token-refresh-enabled=true \ --conf spark.sql.catalog.polaris.warehouse=quickstart_catalog \ --conf spark.sql.catalog.polaris.scope=PRINCIPAL_ROLE:ALL \ --conf spark.sql.catalog.polaris.credential=root:s3cr3t \ - --conf spark.sql.catalog.polaris.client.region=irrelevant + --conf spark.sql.catalog.polaris.client.region=irrelevant \ + --conf spark.sql.catalog.polaris.s3.access-key-id=$RGW_ACCESS_KEY \ + --conf spark.sql.catalog.polaris.s3.secret-access-key=$RGW_SECRET_KEY ``` Note: `s3cr3t` is defined as the password for the `root` user in the `docker-compose.yml` file. From 40caa7cbb55c22058fd0f19bed8aed39f66341eb Mon Sep 17 00:00:00 2001 From: "sarunas.svegzda" Date: Wed, 12 Nov 2025 00:57:54 +0200 Subject: [PATCH 09/12] remove bridge network --- getting-started/ceph/.env.example | 5 +- getting-started/ceph/README.md | 20 +++----- getting-started/ceph/ceph-conf/ceph.conf | 9 +--- getting-started/ceph/docker-compose.yml | 40 ---------------- getting-started/ceph/prepare-network.sh | 61 ------------------------ 5 files changed, 9 insertions(+), 126 deletions(-) delete mode 100755 getting-started/ceph/prepare-network.sh diff --git a/getting-started/ceph/.env.example b/getting-started/ceph/.env.example index d8c2366894..263b399b83 100644 --- a/getting-started/ceph/.env.example +++ b/getting-started/ceph/.env.example @@ -22,10 +22,9 @@ TZ=UTC # Timezone used inside conta DASHBOARD_PORT=8443 # Port for Ceph Dashboard INTERNAL_DASHBOARD_PORT=8443 # Internal port for Ceph Dashboard RGW_PORT=8080 # Port for Rados Gateway -MON_IP=172.18.0.2 # IP address of the monitor -MON1_CEPH_PUBLIC_NETWORK=172.18.0.0/16 # Public network subnet used by all Ceph daemons +MON_IP=$(hostname -i) # IP address of the monitor RGW_ACCESS_KEY=POLARIS123ACCESS # Access key for Polaris S3 user -RGW_SECRET_KEY=POLARIS456SECRET. # Secret key for Polaris S3 user +RGW_SECRET_KEY=POLARIS456SECRET # Secret key for Polaris S3 user FSID=b2f59c4b-5f14-4f8c-a9b7-3b7998c76a0e # Unique cluster identifier (use `uuidgen` to regenerate) OSD_UUID_1=80505106-0d32-4777-bac9-3dfc901b1273 # Unique OSD identifier (use `uuidgen` to regenerate) S3_ENDPOINT_URL=http://rgw1:7480 # Internal endpoint for S3-compatible RGW service diff --git a/getting-started/ceph/README.md b/getting-started/ceph/README.md index e765b0f089..8cd25d99e0 100644 --- a/getting-started/ceph/README.md +++ b/getting-started/ceph/README.md @@ -47,25 +47,17 @@ Note: this example pulls the `apache/polaris:latest` image, but assumes the imag cp .env.example .env ``` -### 2. Prepare Network -```shell -# Optional: force runtime (docker or podman) -export RUNTIME=docker - -./prepare-network.sh -``` - -### 3. Start monitor and manager +### 2. Start monitor and manager ```shell docker compose up -d mon1 mgr ``` -### 4. Start OSD +### 3. Start OSD ```shell docker compose up -d osd1 ``` -### 5. Start RGW +### 4. Start RGW ```shell docker compose up -d rgw1 ``` @@ -89,17 +81,17 @@ services: rgw: 1 daemon active (1 hosts, 1 zones) ``` -### 6. Create bucket for Polaris storage +### 5. Create bucket for Polaris storage ```shell docker compose up -d setup_bucket ``` -### 7. Run Polaris service +### 6. Run Polaris service ```shell docker compose up -d polaris ``` -### 8. Setup polaris catalog +### 7. Setup polaris catalog ```shell docker compose up -d polaris-setup ``` diff --git a/getting-started/ceph/ceph-conf/ceph.conf b/getting-started/ceph/ceph-conf/ceph.conf index 12bef94c71..b93e5b5909 100644 --- a/getting-started/ceph/ceph-conf/ceph.conf +++ b/getting-started/ceph/ceph-conf/ceph.conf @@ -20,9 +20,7 @@ [global] fsid = b2f59c4b-5f14-4f8c-a9b7-3b7998c76a0e mon_initial_members = mon1 -mon_host = 172.18.0.2 -public_network = 172.18.0.0/16 -cluster_network = 172.18.0.0/16 +mon_host = mon1 auth_cluster_required = cephx auth_service_required = cephx auth_client_required = cephx @@ -31,11 +29,6 @@ osd_pool_default_min_size = 1 osd_pool_default_pg_num = 333 osd_crush_chooseleaf_type = 1 mon_allow_pool_size_one= true -# max open files = 655350 -# cephx cluster require signatures = false -# cephx service require signatures = false -# osd max object name len = 256 -# osd max object namespace len = 64 [mon.mon1] mon_data = /var/lib/ceph/mon/ceph-mon1 diff --git a/getting-started/ceph/docker-compose.yml b/getting-started/ceph/docker-compose.yml index 9c333c0841..8b9f642dd0 100644 --- a/getting-started/ceph/docker-compose.yml +++ b/getting-started/ceph/docker-compose.yml @@ -17,11 +17,6 @@ # under the License. # -networks: - cluster-net: - driver: bridge - external: true - services: mon1: @@ -47,14 +42,11 @@ services: ceph-mon -i mon1 -f -d; environment: MON_IP: ${MON_IP} - CEPH_PUBLIC_NETWORK: ${MON1_CEPH_PUBLIC_NETWORK} FSID: ${FSID} volumes: - ./ceph-conf:/etc/ceph - ./bootstrap-osd:/var/lib/ceph/bootstrap-osd - ./osd1:/var/lib/ceph/osd/ceph-0/ - networks: - - cluster-net mgr: image: quay.io/ceph/ceph:v19.2.3 @@ -70,8 +62,6 @@ services: - ./ceph-conf:/etc/ceph depends_on: - mon1 - networks: - - cluster-net ports: - ${DASHBOARD_PORT}:${INTERNAL_DASHBOARD_PORT} @@ -103,34 +93,12 @@ services: - ./bootstrap-osd:/var/lib/ceph/bootstrap-osd depends_on: - mon1 - networks: - - cluster-net - mds1: - image: quay.io/ceph/ceph:v19.2.3 - entrypoint: "/bin/sh" - command: - - "-c" - - >- - set -ex; - mkdir -p /var/lib/ceph/mds/ceph-admin; - ceph-authtool --create-keyring /var/lib/ceph/mds/ceph-admin/keyring --gen-key -n mds. --cap mds 'allow *'; - ceph-mds -f -i admin; - hostname: "ceph-mds1-host" - environment: - CEPHFS_CREATE: 1 - volumes: - - ./ceph-conf:/etc/ceph - depends_on: - - osd1 - networks: - - cluster-net rgw1: image: quay.io/ceph/ceph:v19.2.3 container_name: rgw1 environment: MON_IP: ${MON_IP} - CEPH_PUBLIC_NETWORK: ${MON1_CEPH_PUBLIC_NETWORK} RGW_ACCESS_KEY: ${RGW_ACCESS_KEY} RGW_SECRET_KEY: ${RGW_SECRET_KEY} entrypoint: "/bin/sh" @@ -157,8 +125,6 @@ services: - ./ceph-conf:/etc/ceph depends_on: - osd1 - networks: - - cluster-net setup_bucket: image: peakcom/s5cmd:latest @@ -180,8 +146,6 @@ services: echo ">>> Create bucket if not exist..."; /s5cmd --endpoint-url ${S3_ENDPOINT_URL} mb s3://${S3_POLARIS_BUCKET} || true; tail -f /dev/null; - networks: - - cluster-net polaris: image: apache/polaris:latest @@ -207,8 +171,6 @@ services: timeout: 10s retries: 10 start_period: 10s - networks: - - cluster-net polaris-setup: image: alpine/curl @@ -243,5 +205,3 @@ services: curl -H "Authorization: Bearer $$TOKEN" -H 'Content-Type: application/json' \ -X GET \ http://polaris:8181/api/management/v1/catalogs; - networks: - - cluster-net diff --git a/getting-started/ceph/prepare-network.sh b/getting-started/ceph/prepare-network.sh deleted file mode 100755 index fbf782aa7b..0000000000 --- a/getting-started/ceph/prepare-network.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -#!/bin/bash -set -e - -NETWORK_NAME="cluster-net" -SUBNET="172.18.0.0/16" -GATEWAY="172.18.0.1" -RUNTIME=${RUNTIME:-auto} # choose: docker | podman | auto - -create_network() { - local cmd=$1 - local exists_cmd="$cmd network inspect $NETWORK_NAME >/dev/null 2>&1" - - if eval "$exists_cmd"; then - echo "Network '$NETWORK_NAME' already exists in $cmd." - else - echo "Creating network '$NETWORK_NAME' in $cmd..." - $cmd network create \ - --driver bridge \ - --subnet $SUBNET \ - --gateway $GATEWAY \ - $NETWORK_NAME - fi -} - -# Auto-detect or use user choice -if [ "$RUNTIME" = "docker" ]; then - create_network docker -elif [ "$RUNTIME" = "podman" ]; then - create_network podman -else - if command -v docker >/dev/null 2>&1; then - echo "Detected Docker (defaulting to Docker runtime)" - create_network docker - elif command -v podman >/dev/null 2>&1; then - echo "Detected Podman (defaulting to Podman runtime)" - create_network podman - else - echo "Neither Docker nor Podman found. Please install one." - exit 1 - fi -fi From e9237907a4714a25299df810322dcc4275373f50 Mon Sep 17 00:00:00 2001 From: "sarunas.svegzda" Date: Wed, 12 Nov 2025 10:17:33 +0200 Subject: [PATCH 10/12] hardcode keys in spark example --- getting-started/ceph/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/getting-started/ceph/README.md b/getting-started/ceph/README.md index 8cd25d99e0..59c7b2c04c 100644 --- a/getting-started/ceph/README.md +++ b/getting-started/ceph/README.md @@ -96,7 +96,7 @@ docker compose up -d polaris docker compose up -d polaris-setup ``` -## Connecting From Spark +## 8. Connecting From Spark ```shell bin/spark-sql \ @@ -104,15 +104,15 @@ bin/spark-sql \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ --conf spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog \ --conf spark.sql.catalog.polaris.type=rest \ - --conf spark.sql.catalog.polaris.io-impl="org.apache.iceberg.aws.s3.S3FileIO" \ - --conf spark.sql.catalog.polaris.uri=http://polaris:8181/api/catalog \ + --conf spark.sql.catalog.polaris.io-impl=org.apache.iceberg.aws.s3.S3FileIO \ + --conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \ --conf spark.sql.catalog.polaris.token-refresh-enabled=true \ --conf spark.sql.catalog.polaris.warehouse=quickstart_catalog \ --conf spark.sql.catalog.polaris.scope=PRINCIPAL_ROLE:ALL \ --conf spark.sql.catalog.polaris.credential=root:s3cr3t \ --conf spark.sql.catalog.polaris.client.region=irrelevant \ - --conf spark.sql.catalog.polaris.s3.access-key-id=$RGW_ACCESS_KEY \ - --conf spark.sql.catalog.polaris.s3.secret-access-key=$RGW_SECRET_KEY + --conf spark.sql.catalog.polaris.s3.access-key-id=POLARIS123ACCESS \ + --conf spark.sql.catalog.polaris.s3.secret-access-key=POLARIS456SECRET ``` Note: `s3cr3t` is defined as the password for the `root` user in the `docker-compose.yml` file. @@ -120,7 +120,7 @@ Note: `s3cr3t` is defined as the password for the `root` user in the `docker-com Note: The `client.region` configuration is required for the AWS S3 client to work, but it is not used in this example since Ceph does not require a specific region. -## Running Queries +## 9. Running Queries Run inside the Spark SQL shell: From 05c7a58899215b5ede01d2c91011b657755b8efb Mon Sep 17 00:00:00 2001 From: "sarunas.svegzda" Date: Tue, 18 Nov 2025 15:01:39 +0200 Subject: [PATCH 11/12] mod spark config and add polaris internalEndpoint --- getting-started/ceph/README.md | 1 + getting-started/ceph/docker-compose.yml | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/getting-started/ceph/README.md b/getting-started/ceph/README.md index 59c7b2c04c..b70da3ee59 100644 --- a/getting-started/ceph/README.md +++ b/getting-started/ceph/README.md @@ -109,6 +109,7 @@ bin/spark-sql \ --conf spark.sql.catalog.polaris.token-refresh-enabled=true \ --conf spark.sql.catalog.polaris.warehouse=quickstart_catalog \ --conf spark.sql.catalog.polaris.scope=PRINCIPAL_ROLE:ALL \ + --conf spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation="" \ --conf spark.sql.catalog.polaris.credential=root:s3cr3t \ --conf spark.sql.catalog.polaris.client.region=irrelevant \ --conf spark.sql.catalog.polaris.s3.access-key-id=POLARIS123ACCESS \ diff --git a/getting-started/ceph/docker-compose.yml b/getting-started/ceph/docker-compose.yml index 8b9f642dd0..850cf4d4b4 100644 --- a/getting-started/ceph/docker-compose.yml +++ b/getting-started/ceph/docker-compose.yml @@ -191,7 +191,8 @@ services: source /polaris/obtain-token.sh; echo Creating catalog...; export STORAGE_CONFIG_INFO='{"storageType":"S3", - "endpoint":"http://rgw1:7480", + "endpoint":"http://localhost:7480", + "endpointInternal":"http://rgw1:7480", "stsUnavailable":"true", "pathStyleAccess":true}'; export STORAGE_LOCATION='s3://polaris-storage'; From 013e771845553e6be40b9ce2e54f540ed7913457 Mon Sep 17 00:00:00 2001 From: "sarunas.svegzda" Date: Thu, 20 Nov 2025 14:46:31 +0200 Subject: [PATCH 12/12] mkdir in container --- getting-started/ceph/docker-compose.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/getting-started/ceph/docker-compose.yml b/getting-started/ceph/docker-compose.yml index 850cf4d4b4..9febf037e0 100644 --- a/getting-started/ceph/docker-compose.yml +++ b/getting-started/ceph/docker-compose.yml @@ -26,6 +26,7 @@ services: - "-c" - >- set -ex; + mkdir -p /var/lib/ceph/bootstrap-osd; mkdir -p /var/lib/ceph/osd/ceph-0; ceph-authtool --create-keyring /var/lib/ceph/tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'; ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring \ @@ -46,7 +47,6 @@ services: volumes: - ./ceph-conf:/etc/ceph - ./bootstrap-osd:/var/lib/ceph/bootstrap-osd - - ./osd1:/var/lib/ceph/osd/ceph-0/ mgr: image: quay.io/ceph/ceph:v19.2.3 @@ -76,6 +76,7 @@ services: - "-c" - >- set -ex; + mkdir -p /var/lib/ceph/bootstrap-osd; mkdir -p /var/lib/ceph/osd/ceph-0; chown -R ceph:ceph /var/lib/ceph/osd/ceph-0; ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-0/keyring \