Skip to content

Commit 8e317d7

Browse files
authored
[9.2] Backport ML test fixes (#137746)
Backports the following test fixes - [ML] Add Auditor reset internal action (#136363) - Fix ML tests failing with "no shards available" (#136800)
1 parent b23c312 commit 8e317d7

File tree

9 files changed

+252
-89
lines changed

9 files changed

+252
-89
lines changed

muted-tests.yml

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -146,18 +146,12 @@ tests:
146146
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
147147
method: test {p0=snapshot/10_basic/Create a source only snapshot and then restore it}
148148
issue: https://github.com/elastic/elasticsearch/issues/122755
149-
- class: org.elasticsearch.smoketest.MlWithSecurityIT
150-
method: test {yaml=ml/data_frame_analytics_crud/Test get stats given multiple analytics}
151-
issue: https://github.com/elastic/elasticsearch/issues/123034
152149
- class: org.elasticsearch.indices.recovery.IndexRecoveryIT
153150
method: testSourceThrottling
154151
issue: https://github.com/elastic/elasticsearch/issues/123680
155152
- class: org.elasticsearch.smoketest.MlWithSecurityIT
156153
method: test {yaml=ml/3rd_party_deployment/Test start deployment fails while model download in progress}
157154
issue: https://github.com/elastic/elasticsearch/issues/120814
158-
- class: org.elasticsearch.smoketest.MlWithSecurityIT
159-
method: test {yaml=ml/start_data_frame_analytics/Test start classification analysis when the dependent variable is missing}
160-
issue: https://github.com/elastic/elasticsearch/issues/124168
161155
- class: org.elasticsearch.smoketest.MlWithSecurityIT
162156
method: test {yaml=ml/3rd_party_deployment/Test start and stop multiple deployments}
163157
issue: https://github.com/elastic/elasticsearch/issues/124315
@@ -170,15 +164,6 @@ tests:
170164
- class: org.elasticsearch.packaging.test.BootstrapCheckTests
171165
method: test10Install
172166
issue: https://github.com/elastic/elasticsearch/issues/124957
173-
- class: org.elasticsearch.smoketest.MlWithSecurityIT
174-
method: test {yaml=ml/data_frame_analytics_crud/Test get stats on newly created config}
175-
issue: https://github.com/elastic/elasticsearch/issues/121726
176-
- class: org.elasticsearch.smoketest.MlWithSecurityIT
177-
method: test {yaml=ml/data_frame_analytics_cat_apis/Test cat data frame analytics all jobs with header and column selection}
178-
issue: https://github.com/elastic/elasticsearch/issues/125641
179-
- class: org.elasticsearch.smoketest.MlWithSecurityIT
180-
method: test {yaml=ml/data_frame_analytics_cat_apis/Test cat data frame analytics single job with header}
181-
issue: https://github.com/elastic/elasticsearch/issues/125642
182167
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
183168
method: test {p0=transform/transforms_start_stop/Test schedule_now on an already started transform}
184169
issue: https://github.com/elastic/elasticsearch/issues/120720
@@ -188,9 +173,6 @@ tests:
188173
- class: org.elasticsearch.xpack.core.common.notifications.AbstractAuditorTests
189174
method: testRecreateTemplateWhenDeleted
190175
issue: https://github.com/elastic/elasticsearch/issues/123232
191-
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
192-
method: test {p0=ml/start_data_frame_analytics/Test start given dest index is not empty}
193-
issue: https://github.com/elastic/elasticsearch/issues/125909
194176
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
195177
method: test {p0=transform/transforms_stats/Test get transform stats with timeout}
196178
issue: https://github.com/elastic/elasticsearch/issues/125975
@@ -203,15 +185,6 @@ tests:
203185
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
204186
method: test {p0=transform/transforms_stats/Test get transform stats}
205187
issue: https://github.com/elastic/elasticsearch/issues/126270
206-
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
207-
method: test {p0=ml/start_data_frame_analytics/Test start classification analysis when the dependent variable cardinality is too low}
208-
issue: https://github.com/elastic/elasticsearch/issues/126299
209-
- class: org.elasticsearch.smoketest.MlWithSecurityIT
210-
method: test {yaml=ml/start_data_frame_analytics/Test start classification analysis when the dependent variable cardinality is too low}
211-
issue: https://github.com/elastic/elasticsearch/issues/123200
212-
- class: org.elasticsearch.smoketest.MlWithSecurityIT
213-
method: test {yaml=ml/trained_model_cat_apis/Test cat trained models}
214-
issue: https://github.com/elastic/elasticsearch/issues/125750
215188
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
216189
method: test {p0=transform/transforms_start_stop/Test start/stop only starts/stops specified transform}
217190
issue: https://github.com/elastic/elasticsearch/issues/126466
@@ -248,9 +221,6 @@ tests:
248221
- class: org.elasticsearch.cli.keystore.AddStringKeyStoreCommandTests
249222
method: testStdinWithMultipleValues
250223
issue: https://github.com/elastic/elasticsearch/issues/126882
251-
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
252-
method: test {p0=ml/data_frame_analytics_cat_apis/Test cat data frame analytics all jobs with header}
253-
issue: https://github.com/elastic/elasticsearch/issues/127625
254224
- class: org.elasticsearch.xpack.ccr.action.ShardFollowTaskReplicationTests
255225
method: testChangeFollowerHistoryUUID
256226
issue: https://github.com/elastic/elasticsearch/issues/127680
@@ -333,9 +303,6 @@ tests:
333303
- class: org.elasticsearch.packaging.test.DockerTests
334304
method: test171AdditionalCliOptionsAreForwarded
335305
issue: https://github.com/elastic/elasticsearch/issues/120925
336-
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
337-
method: test {p0=ml/delete_expired_data/Test delete expired data with body parameters}
338-
issue: https://github.com/elastic/elasticsearch/issues/131364
339306
- class: org.elasticsearch.packaging.test.DockerTests
340307
method: test070BindMountCustomPathConfAndJvmOptions
341308
issue: https://github.com/elastic/elasticsearch/issues/131366
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.core.ml.action;
9+
10+
import org.elasticsearch.action.ActionType;
11+
import org.elasticsearch.action.FailedNodeException;
12+
import org.elasticsearch.action.support.nodes.BaseNodeResponse;
13+
import org.elasticsearch.action.support.nodes.BaseNodesRequest;
14+
import org.elasticsearch.action.support.nodes.BaseNodesResponse;
15+
import org.elasticsearch.cluster.ClusterName;
16+
import org.elasticsearch.cluster.node.DiscoveryNode;
17+
import org.elasticsearch.common.io.stream.StreamInput;
18+
import org.elasticsearch.common.io.stream.StreamOutput;
19+
import org.elasticsearch.transport.AbstractTransportRequest;
20+
21+
import java.io.IOException;
22+
import java.util.List;
23+
import java.util.Objects;
24+
25+
public class ResetMlComponentsAction extends ActionType<ResetMlComponentsAction.Response> {
26+
27+
public static final ResetMlComponentsAction INSTANCE = new ResetMlComponentsAction();
28+
public static final String NAME = "cluster:internal/xpack/ml/auditor/reset";
29+
30+
private ResetMlComponentsAction() {
31+
super(NAME);
32+
}
33+
34+
public static class Request extends BaseNodesRequest {
35+
36+
public static Request RESET_AUDITOR_REQUEST = new Request();
37+
38+
private Request() {
39+
super(new String[] { "ml:true" }); // Only ml nodes. See DiscoveryNodes::resolveNodes
40+
}
41+
}
42+
43+
public static class NodeRequest extends AbstractTransportRequest {
44+
45+
public NodeRequest(StreamInput in) throws IOException {
46+
super(in);
47+
}
48+
49+
public NodeRequest() {}
50+
51+
@Override
52+
public boolean equals(Object o) {
53+
if (o == null || getClass() != o.getClass()) {
54+
return false;
55+
}
56+
return true;
57+
}
58+
59+
@Override
60+
public int hashCode() {
61+
return Objects.hash();
62+
}
63+
}
64+
65+
public static class Response extends BaseNodesResponse<Response.ResetResponse> {
66+
67+
public Response(ClusterName clusterName, List<ResetResponse> nodes, List<FailedNodeException> failures) {
68+
super(clusterName, nodes, failures);
69+
}
70+
71+
protected Response(StreamInput in) throws IOException {
72+
super(in);
73+
}
74+
75+
public static class ResetResponse extends BaseNodeResponse {
76+
private final boolean acknowledged;
77+
78+
public ResetResponse(DiscoveryNode node, boolean acknowledged) {
79+
super(node);
80+
this.acknowledged = acknowledged;
81+
}
82+
83+
public ResetResponse(StreamInput in) throws IOException {
84+
super(in, null);
85+
acknowledged = in.readBoolean();
86+
}
87+
88+
public ResetResponse(StreamInput in, DiscoveryNode node) throws IOException {
89+
super(in, node);
90+
acknowledged = in.readBoolean();
91+
}
92+
93+
public boolean isAcknowledged() {
94+
return acknowledged;
95+
}
96+
97+
@Override
98+
public void writeTo(StreamOutput out) throws IOException {
99+
super.writeTo(out);
100+
out.writeBoolean(acknowledged);
101+
}
102+
103+
@Override
104+
public boolean equals(Object o) {
105+
if (o == null || getClass() != o.getClass()) return false;
106+
ResetResponse that = (ResetResponse) o;
107+
return acknowledged == that.acknowledged;
108+
}
109+
110+
@Override
111+
public int hashCode() {
112+
return Objects.hashCode(acknowledged);
113+
}
114+
}
115+
116+
@Override
117+
protected List<Response.ResetResponse> readNodesFrom(StreamInput in) throws IOException {
118+
return in.readCollectionAsList(ResetResponse::new);
119+
}
120+
121+
@Override
122+
protected void writeNodesTo(StreamOutput out, List<Response.ResetResponse> nodes) throws IOException {
123+
out.writeCollection(nodes);
124+
}
125+
}
126+
}

x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/DeleteExpiredDataIT.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ public void testDeleteExpiredData_GivenNothingToDelete() throws Exception {
105105
client().execute(DeleteExpiredDataAction.INSTANCE, new DeleteExpiredDataAction.Request()).get();
106106
}
107107

108-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/62699")
109108
public void testDeleteExpiredDataNoThrottle() throws Exception {
110109
testExpiredDeletion(null, 10010);
111110
}
@@ -152,7 +151,6 @@ public void testDeleteExpiredDataActionDeletesEmptyStateIndices() throws Excepti
152151
);
153152
}
154153

155-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/62699")
156154
public void testDeleteExpiredDataWithStandardThrottle() throws Exception {
157155
testExpiredDeletion(-1.0f, 100);
158156
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
import org.elasticsearch.cluster.service.ClusterService;
3333
import org.elasticsearch.common.breaker.CircuitBreaker;
3434
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
35-
import org.elasticsearch.common.logging.DeprecationLogger;
3635
import org.elasticsearch.common.settings.ClusterSettings;
3736
import org.elasticsearch.common.settings.IndexScopedSettings;
3837
import org.elasticsearch.common.settings.Setting;
@@ -162,6 +161,7 @@
162161
import org.elasticsearch.xpack.core.ml.action.PutTrainedModelDefinitionPartAction;
163162
import org.elasticsearch.xpack.core.ml.action.PutTrainedModelVocabularyAction;
164163
import org.elasticsearch.xpack.core.ml.action.ResetJobAction;
164+
import org.elasticsearch.xpack.core.ml.action.ResetMlComponentsAction;
165165
import org.elasticsearch.xpack.core.ml.action.RevertModelSnapshotAction;
166166
import org.elasticsearch.xpack.core.ml.action.SetResetModeAction;
167167
import org.elasticsearch.xpack.core.ml.action.SetUpgradeModeAction;
@@ -272,6 +272,7 @@
272272
import org.elasticsearch.xpack.ml.action.TransportPutTrainedModelDefinitionPartAction;
273273
import org.elasticsearch.xpack.ml.action.TransportPutTrainedModelVocabularyAction;
274274
import org.elasticsearch.xpack.ml.action.TransportResetJobAction;
275+
import org.elasticsearch.xpack.ml.action.TransportResetMlComponentsAction;
275276
import org.elasticsearch.xpack.ml.action.TransportRevertModelSnapshotAction;
276277
import org.elasticsearch.xpack.ml.action.TransportSetResetModeAction;
277278
import org.elasticsearch.xpack.ml.action.TransportSetUpgradeModeAction;
@@ -785,7 +786,6 @@ public void loadExtensions(ExtensionLoader loader) {
785786
public static final int MAX_LOW_PRIORITY_MODELS_PER_NODE = 100;
786787

787788
private static final Logger logger = LogManager.getLogger(MachineLearning.class);
788-
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(MachineLearning.class);
789789

790790
private final Settings settings;
791791
private final boolean enabled;
@@ -805,7 +805,7 @@ public void loadExtensions(ExtensionLoader loader) {
805805
private final SetOnce<LearningToRankService> learningToRankService = new SetOnce<>();
806806
private final SetOnce<MlAutoscalingDeciderService> mlAutoscalingDeciderService = new SetOnce<>();
807807
private final SetOnce<DeploymentManager> deploymentManager = new SetOnce<>();
808-
private final SetOnce<TrainedModelAssignmentClusterService> trainedModelAllocationClusterServiceSetOnce = new SetOnce<>();
808+
private final SetOnce<TrainedModelAssignmentClusterService> trainedModelAllocationClusterService = new SetOnce<>();
809809

810810
private final SetOnce<MachineLearningExtension> machineLearningExtension = new SetOnce<>();
811811

@@ -1315,7 +1315,7 @@ public Collection<?> createComponents(PluginServices services) {
13151315
clusterService,
13161316
threadPool
13171317
);
1318-
trainedModelAllocationClusterServiceSetOnce.set(
1318+
trainedModelAllocationClusterService.set(
13191319
new TrainedModelAssignmentClusterService(
13201320
settings,
13211321
clusterService,
@@ -1391,7 +1391,8 @@ public Collection<?> createComponents(PluginServices services) {
13911391
trainedModelCacheMetadataService,
13921392
trainedModelProvider,
13931393
trainedModelAssignmentService,
1394-
trainedModelAllocationClusterServiceSetOnce.get(),
1394+
trainedModelAllocationClusterService.get(),
1395+
trainedModelStatsService,
13951396
deploymentManager.get(),
13961397
nodeAvailabilityZoneMapper,
13971398
new MachineLearningExtensionHolder(machineLearningExtension.get()),
@@ -1564,6 +1565,7 @@ public List<ActionHandler> getActions() {
15641565
actionHandlers.add(new ActionHandler(MlMemoryAction.INSTANCE, TransportMlMemoryAction.class));
15651566
actionHandlers.add(new ActionHandler(SetUpgradeModeAction.INSTANCE, TransportSetUpgradeModeAction.class));
15661567
actionHandlers.add(new ActionHandler(SetResetModeAction.INSTANCE, TransportSetResetModeAction.class));
1568+
actionHandlers.add(new ActionHandler(ResetMlComponentsAction.INSTANCE, TransportResetMlComponentsAction.class));
15671569
// Included in this section as it's used by MlMemoryAction
15681570
actionHandlers.add(new ActionHandler(TrainedModelCacheInfoAction.INSTANCE, TransportTrainedModelCacheInfoAction.class));
15691571
actionHandlers.add(new ActionHandler(GetMlAutoscalingStats.INSTANCE, TransportGetMlAutoscalingStats.class));
@@ -2149,8 +2151,6 @@ public void cleanUpFeature(
21492151
final Map<String, Boolean> results = new ConcurrentHashMap<>();
21502152

21512153
ActionListener<ResetFeatureStateResponse.ResetFeatureStateStatus> unsetResetModeListener = ActionListener.wrap(success -> {
2152-
// reset the auditors as aliases used may be removed
2153-
resetAuditors();
21542154

21552155
client.execute(SetResetModeAction.INSTANCE, SetResetModeActionRequest.disabled(true), ActionListener.wrap(resetSuccess -> {
21562156
finalListener.onResponse(success);
@@ -2176,8 +2176,24 @@ public void cleanUpFeature(
21762176
);
21772177
});
21782178

2179+
ActionListener<ResetFeatureStateResponse.ResetFeatureStateStatus> resetAuditors = ActionListener.wrap(success -> {
2180+
// reset components, such as the auditors the trained model stats queue
2181+
client.execute(
2182+
ResetMlComponentsAction.INSTANCE,
2183+
ResetMlComponentsAction.Request.RESET_AUDITOR_REQUEST,
2184+
ActionListener.wrap(ignored -> unsetResetModeListener.onResponse(success), unsetResetModeListener::onFailure)
2185+
);
2186+
}, failure -> {
2187+
logger.error("failed to reset machine learning", failure);
2188+
client.execute(
2189+
ResetMlComponentsAction.INSTANCE,
2190+
ResetMlComponentsAction.Request.RESET_AUDITOR_REQUEST,
2191+
ActionListener.wrap(ignored -> unsetResetModeListener.onFailure(failure), unsetResetModeListener::onFailure)
2192+
);
2193+
});
2194+
21792195
// Stop all model deployments
2180-
ActionListener<AcknowledgedResponse> pipelineValidation = unsetResetModeListener.<ListTasksResponse>delegateFailureAndWrap(
2196+
ActionListener<AcknowledgedResponse> pipelineValidation = resetAuditors.<ListTasksResponse>delegateFailureAndWrap(
21812197
(delegate, listTasksResponse) -> {
21822198
listTasksResponse.rethrowFailures("Waiting for indexing requests for .ml-* indices");
21832199
if (results.values().stream().allMatch(b -> b)) {
@@ -2306,11 +2322,11 @@ public void cleanUpFeature(
23062322
);
23072323
client.execute(CancelJobModelSnapshotUpgradeAction.INSTANCE, cancelSnapshotUpgradesReq, delegate);
23082324
}).delegateFailureAndWrap((delegate, acknowledgedResponse) -> {
2309-
if (trainedModelAllocationClusterServiceSetOnce.get() == null || machineLearningExtension.get().isNlpEnabled() == false) {
2325+
if (trainedModelAllocationClusterService.get() == null || machineLearningExtension.get().isNlpEnabled() == false) {
23102326
delegate.onResponse(AcknowledgedResponse.TRUE);
23112327
return;
23122328
}
2313-
trainedModelAllocationClusterServiceSetOnce.get().removeAllModelAssignments(delegate);
2329+
trainedModelAllocationClusterService.get().removeAllModelAssignments(delegate);
23142330
});
23152331

23162332
// validate no pipelines are using machine learning models
@@ -2332,18 +2348,6 @@ public void cleanUpFeature(
23322348
client.execute(SetResetModeAction.INSTANCE, SetResetModeActionRequest.enabled(), afterResetModeSet);
23332349
}
23342350

2335-
private void resetAuditors() {
2336-
if (anomalyDetectionAuditor.get() != null) {
2337-
anomalyDetectionAuditor.get().reset();
2338-
}
2339-
if (dataFrameAnalyticsAuditor.get() != null) {
2340-
dataFrameAnalyticsAuditor.get().reset();
2341-
}
2342-
if (inferenceAuditor.get() != null) {
2343-
inferenceAuditor.get().reset();
2344-
}
2345-
}
2346-
23472351
@Override
23482352
public BreakerSettings getCircuitBreaker(Settings settingsToUse) {
23492353
return BreakerSettings.updateFromSettings(

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetDataFrameAnalyticsStatsAction.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
import org.elasticsearch.xpack.ml.utils.persistence.MlParserUtils;
6262

6363
import java.util.ArrayList;
64+
import java.util.Arrays;
6465
import java.util.Collections;
6566
import java.util.Comparator;
6667
import java.util.List;
@@ -278,7 +279,7 @@ private void searchStats(DataFrameAnalyticsConfig config, TaskId parentTaskId, A
278279
() -> format(
279280
"[%s] Item failure encountered during multi search for request [indices=%s, source=%s]: %s",
280281
config.getId(),
281-
itemRequest.indices(),
282+
Arrays.toString(itemRequest.indices()),
282283
itemRequest.source(),
283284
itemResponse.getFailureMessage()
284285
),

0 commit comments

Comments
 (0)