Skip to content

Commit ac724d3

Browse files
authored
NoSQL: Add maintenance API, SPI (#3028)
Maintenance operations include a bunch of tasks that are regularly executed against a backend database. Types of maintenance operations include: * Purging unreferenced objects and references within a catalog * Purging whole catalogs that are marked to be purged * Purging whole realms that are marked to be purged Implementation added in a follow-up PR.
1 parent 1ab7814 commit ac724d3

File tree

21 files changed

+1481
-1
lines changed

21 files changed

+1481
-1
lines changed

bom/build.gradle.kts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ dependencies {
6262
api(project(":polaris-persistence-nosql-inmemory"))
6363
api(project(":polaris-persistence-nosql-mongodb"))
6464

65+
api(project(":polaris-persistence-nosql-maintenance-api"))
66+
api(project(":polaris-persistence-nosql-maintenance-cel"))
67+
api(project(":polaris-persistence-nosql-maintenance-spi"))
68+
6569
api(project(":polaris-config-docs-annotations"))
6670
api(project(":polaris-config-docs-generator"))
6771

codestyle/checkstyle.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
<!-- Checks for imports -->
4444
<!-- See http://checkstyle.org/config_imports.html -->
4545
<module name="IllegalImport">
46-
<property name="illegalPkgs" value=".*\.shaded\..*, .*\.relocated\..*"/>
46+
<property name="illegalPkgs" value=".*\.shaded\..*, (?!org\.projectnessie\.cel).*\.relocated\..*"/>
4747
<property name="regexp" value="true"/>
4848
</module>
4949

gradle/libs.versions.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ awssdk-bom = { module = "software.amazon.awssdk:bom", version = "2.38.2" }
4848
awaitility = { module = "org.awaitility:awaitility", version = "4.3.0" }
4949
azuresdk-bom = { module = "com.azure:azure-sdk-bom", version = "1.3.2" }
5050
caffeine = { module = "com.github.ben-manes.caffeine:caffeine", version = "3.2.3" }
51+
cel-bom = { module = "org.projectnessie.cel:cel-bom", version = "0.5.3" }
5152
commons-lang3 = { module = "org.apache.commons:commons-lang3", version = "3.19.0" }
5253
commons-text = { module = "org.apache.commons:commons-text", version = "1.14.0" }
5354
errorprone = { module = "com.google.errorprone:error_prone_core", version = "2.44.0" }

gradle/projects.main.properties

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ polaris-persistence-nosql-correctness=persistence/nosql/persistence/correctness
7474
polaris-persistence-nosql-standalone=persistence/nosql/persistence/standalone
7575
polaris-persistence-nosql-testextension=persistence/nosql/persistence/testextension
7676
polaris-persistence-nosql-varint=persistence/nosql/persistence/varint
77+
# persistence / maintenance
78+
polaris-persistence-nosql-maintenance-api=persistence/nosql/persistence/maintenance/api
79+
polaris-persistence-nosql-maintenance-cel=persistence/nosql/persistence/maintenance/retain-cel
80+
polaris-persistence-nosql-maintenance-spi=persistence/nosql/persistence/maintenance/spi
7781
# persistence / database specific implementations
7882
polaris-persistence-nosql-inmemory=persistence/nosql/persistence/db/inmemory
7983
polaris-persistence-nosql-mongodb=persistence/nosql/persistence/db/mongodb
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
Maintenance service,
21+
see [API package javadoc](api/src/main/java/org/apache/polaris/persistence/nosql/maintenance/api/package-info.java)
22+
for information.
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
plugins {
21+
id("org.kordamp.gradle.jandex")
22+
id("polaris-server")
23+
}
24+
25+
description = "Polaris NoSQL persistence maintenance - service interfaces"
26+
27+
dependencies {
28+
implementation(project(":polaris-persistence-nosql-api"))
29+
compileOnly(project(":polaris-persistence-nosql-realms-api"))
30+
compileOnly(project(":polaris-persistence-nosql-maintenance-spi"))
31+
32+
compileOnly(project(":polaris-immutables"))
33+
annotationProcessor(project(":polaris-immutables", configuration = "processor"))
34+
35+
implementation(libs.guava)
36+
37+
compileOnly(libs.smallrye.config.core)
38+
39+
compileOnly(libs.jakarta.annotation.api)
40+
compileOnly(libs.jakarta.validation.api)
41+
42+
compileOnly(platform(libs.jackson.bom))
43+
compileOnly("com.fasterxml.jackson.core:jackson-annotations")
44+
compileOnly("com.fasterxml.jackson.core:jackson-databind")
45+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.polaris.persistence.nosql.maintenance.api;
20+
21+
import static com.google.common.base.Preconditions.checkState;
22+
23+
import com.fasterxml.jackson.annotation.JsonFormat;
24+
import com.fasterxml.jackson.annotation.JsonInclude;
25+
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
26+
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
27+
import io.smallrye.config.ConfigMapping;
28+
import io.smallrye.config.WithDefault;
29+
import java.time.Duration;
30+
import java.util.Optional;
31+
import java.util.OptionalDouble;
32+
import java.util.OptionalInt;
33+
import java.util.OptionalLong;
34+
import org.apache.polaris.immutables.PolarisImmutable;
35+
import org.immutables.value.Value;
36+
37+
/** Maintenance service configuration. */
38+
@ConfigMapping(prefix = "polaris.persistence.maintenance")
39+
@PolarisImmutable
40+
@JsonSerialize(as = ImmutableMaintenanceConfig.class)
41+
@JsonDeserialize(as = ImmutableMaintenanceConfig.class)
42+
public interface MaintenanceConfig {
43+
44+
long DEFAULT_EXPECTED_REFERENCE_COUNT = 100;
45+
46+
/**
47+
* Provides the expected number of references in all realms to retain, defaults to {@value
48+
* #DEFAULT_EXPECTED_REFERENCE_COUNT}, must be at least {@code 100}. This value is used as the
49+
* default if no information of a previous maintenance run is present, it is also the minimum
50+
* number of expected references.
51+
*/
52+
@WithDefault("" + DEFAULT_EXPECTED_REFERENCE_COUNT)
53+
@JsonInclude(JsonInclude.Include.NON_ABSENT)
54+
OptionalLong expectedReferenceCount();
55+
56+
long DEFAULT_EXPECTED_OBJ_COUNT = 100_000;
57+
58+
/**
59+
* Provides the expected number of objects in all realms to retain, defaults to {@value
60+
* #DEFAULT_EXPECTED_OBJ_COUNT}, must be at least {@code 100000}. This value is used as the
61+
* default if no information of a previous maintenance run is present, it is also the minimum
62+
* number of expected objects.
63+
*/
64+
@WithDefault("" + DEFAULT_EXPECTED_OBJ_COUNT)
65+
@JsonInclude(JsonInclude.Include.NON_ABSENT)
66+
OptionalLong expectedObjCount();
67+
68+
double DEFAULT_COUNT_FROM_LAST_RUN_MULTIPLIER = 1.1;
69+
70+
/**
71+
* Maintenance service sizes the bloom-filters used to hold the identified references and objects
72+
* according to the expression {@code lastRun.numberOfIdentified * countFromLastRunMultiplier}.
73+
* The default is to add 10% to the number of identified items.
74+
*/
75+
@WithDefault("" + DEFAULT_COUNT_FROM_LAST_RUN_MULTIPLIER)
76+
@JsonInclude(JsonInclude.Include.NON_ABSENT)
77+
OptionalDouble countFromLastRunMultiplier();
78+
79+
double DEFAULT_INITIALIZED_FPP = 0.00001;
80+
81+
/**
82+
* False-positive-probability (FPP) used to initialize the bloom-filters for identified references
83+
* and objects.
84+
*/
85+
@WithDefault("" + DEFAULT_INITIALIZED_FPP)
86+
@JsonInclude(JsonInclude.Include.NON_ABSENT)
87+
OptionalDouble filterInitializedFpp();
88+
89+
double DEFAULT_MAX_ACCEPTABLE_FPP = 0.00005;
90+
91+
/**
92+
* Expected maximum false-positive-probability (FPP) used to check the bloom-filters for
93+
* identified references and objects.
94+
*
95+
* <p>If the FPP of a bloom filter exceeds this value, no individual references or objects will be
96+
* purged.
97+
*/
98+
@WithDefault("" + DEFAULT_MAX_ACCEPTABLE_FPP)
99+
@JsonInclude(JsonInclude.Include.NON_ABSENT)
100+
OptionalDouble maxAcceptableFilterFpp();
101+
102+
int DEFAULT_RETAINED_RUNS = 50;
103+
104+
/**
105+
* Number of retained {@linkplain MaintenanceRunInformation maintenance run objects}, must be at
106+
* least {@code 2}.
107+
*/
108+
@WithDefault("" + DEFAULT_RETAINED_RUNS)
109+
@JsonInclude(JsonInclude.Include.NON_ABSENT)
110+
OptionalInt retainedRuns();
111+
112+
String DEFAULT_CREATED_AT_GRACE_TIME_STRING = "PT3H";
113+
Duration DEFAULT_CREATED_AT_GRACE_TIME = Duration.parse(DEFAULT_CREATED_AT_GRACE_TIME_STRING);
114+
115+
/**
116+
* Objects and references that have been created <em>after</em> a maintenance run has started are
117+
* never purged. This option defines an additional grace time to when the maintenance run has
118+
* started.
119+
*
120+
* <p>This value is a safety net for two reasons:
121+
*
122+
* <ul>
123+
* <li>Respect the wall-clock drift between Polaris nodes.
124+
* <li>Respect the order of writes in Polaris persistence. Objects are written <em>before</em>
125+
* those become reachable via a commit. Commits may take a little time (milliseconds, up to
126+
* a few seconds, depending on the system load) to complete. Therefore, implementations
127+
* enforce a minimum of 5 minutes.
128+
* </ul>
129+
*/
130+
@WithDefault(DEFAULT_CREATED_AT_GRACE_TIME_STRING)
131+
@JsonInclude(JsonInclude.Include.NON_ABSENT)
132+
@JsonFormat(shape = JsonFormat.Shape.STRING)
133+
Optional<Duration> createdAtGraceTime();
134+
135+
/**
136+
* Optionally limit the number of objects scanned per second. Default is to not throttle object
137+
* scanning.
138+
*/
139+
@JsonInclude(JsonInclude.Include.NON_ABSENT)
140+
OptionalInt objectScanRateLimitPerSecond();
141+
142+
/**
143+
* Optionally limit the number of references scanned per second.
144+
*
145+
* <p>Default is to not throttle reference scanning.
146+
*/
147+
@JsonInclude(JsonInclude.Include.NON_ABSENT)
148+
OptionalInt referenceScanRateLimitPerSecond();
149+
150+
int DEFAULT_DELETE_BATCH_SIZE = 10;
151+
152+
/** Size of the delete-batches when purging objects. */
153+
@WithDefault("" + DEFAULT_DELETE_BATCH_SIZE)
154+
@JsonInclude(JsonInclude.Include.NON_ABSENT)
155+
OptionalInt deleteBatchSize();
156+
157+
static ImmutableMaintenanceConfig.Builder builder() {
158+
return ImmutableMaintenanceConfig.builder();
159+
}
160+
161+
@Value.Check
162+
default void check() {
163+
expectedReferenceCount()
164+
.ifPresent(v -> checkState(v > 0, "expectedReferenceCount must be positive"));
165+
expectedObjCount().ifPresent(v -> checkState(v > 0, "expectedObjCount must be positive"));
166+
countFromLastRunMultiplier()
167+
.ifPresent(v -> checkState(v > 1d, "countFromLastRunMultiplier must be greater than 1.0d"));
168+
filterInitializedFpp()
169+
.ifPresent(
170+
v -> checkState(v > 0d && v <= 1d, "filterInitializedFpp must be > 0.0d and <= 1.0d"));
171+
maxAcceptableFilterFpp()
172+
.ifPresent(
173+
v ->
174+
checkState(v > 0d && v <= 1d, "maxAcceptableFilterFpp must be > 0.0d and <= 1.0d"));
175+
retainedRuns().ifPresent(v -> checkState(v >= 2, "retainedRuns must 2 or greater"));
176+
createdAtGraceTime()
177+
.ifPresent(v -> checkState(!v.isNegative(), "createdAtGraceTime must not be negative"));
178+
objectScanRateLimitPerSecond()
179+
.ifPresent(v -> checkState(v >= 0, "objectScanRateLimitPerSecond must not be negative"));
180+
referenceScanRateLimitPerSecond()
181+
.ifPresent(v -> checkState(v >= 0, "referenceScanRateLimitPerSecond must not be negative"));
182+
deleteBatchSize().ifPresent(v -> checkState(v > 0, "deleteBatchSize must be positive"));
183+
}
184+
}

0 commit comments

Comments
 (0)