Skip to content

Commit 1b9193d

Browse files
Add ScrollAllObservable docs (#4835) (#4836)
Co-authored-by: Russ Cam <russ.cam@elastic.co>
1 parent a0d7246 commit 1b9193d

File tree

6 files changed

+249
-1
lines changed

6 files changed

+249
-1
lines changed

docs/client-concepts/high-level/indexing/indexing-documents.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ var asyncBulkIndexResponse = await client.BulkAsync(b => b
138138
<1> synchronous method that returns an IBulkResponse, the same as IndexMany and can be inspected in the same way for errors
139139
<2> asynchronous method that returns a Task<IBulkResponse> that can be awaited
140140

141+
[[bulkall-observable]]
141142
==== Multiple documents with `BulkAllObservable` helper
142143

143144
Using the `BulkAllObservable` helper allows you to focus on the overall objective of indexing, without having to

docs/high-level.asciidoc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ with some smarts to make working with Elasticsearch from a strongly typed langua
199199

200200
* <<returned-fields, Returning only certain fields>>
201201

202+
* <<scrolling-documents, Scrolling documents>>
203+
202204
* <<covariant-search-results, Covariant search results>>
203205

204206
include::search/writing-queries.asciidoc[]
@@ -207,6 +209,8 @@ include::query-dsl/bool-dsl/bool-dsl.asciidoc[]
207209

208210
include::search/returned-fields.asciidoc[]
209211

212+
include::search/scrolling-documents.asciidoc[]
213+
210214
include::{output-dir}/covariant-hits/covariant-search-results.asciidoc[]
211215

212216
[[aggregations]]
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
:ref_current: https://www.elastic.co/guide/en/elasticsearch/reference/master
2+
3+
:github: https://github.com/elastic/elasticsearch-net
4+
5+
:nuget: https://www.nuget.org/packages
6+
7+
////
8+
IMPORTANT NOTE
9+
==============
10+
This file has been generated from https://github.com/elastic/elasticsearch-net/tree/master/src/Tests/Tests/Search/ScrollingDocuments.doc.cs.
11+
If you wish to submit a PR for any spelling mistakes, typos or grammatical errors for this file,
12+
please modify the original csharp file found at the link and submit the PR with that change. Thanks!
13+
////
14+
15+
[[scrolling-documents]]
16+
=== Scrolling documents
17+
18+
The scroll API can be used to return a large collection of documents from Elasticsearch.
19+
20+
NEST exposes the scroll API and an observable scroll implementation that can be used
21+
to write concurrent scroll requests.
22+
23+
==== Simple use
24+
25+
The simplest use of the scroll API is to perform a search request with a
26+
scroll timeout, then pass the scroll id returned in each response to
27+
the next request to the scroll API, until no more documents are returned
28+
29+
[source,csharp]
30+
----
31+
var searchResponse = Client.Search<Project>(s => s
32+
.Query(q => q
33+
.Term(f => f.State, StateOfBeing.Stable)
34+
)
35+
.Scroll("10s") <1>
36+
);
37+
38+
while (searchResponse.Documents.Any()) <2>
39+
{
40+
ProcessResponse(searchResponse); <3>
41+
searchResponse = Client.Scroll<Project>("10s", searchResponse.ScrollId);
42+
}
43+
----
44+
<1> Specify a scroll time for how long Elasticsearch should keep this scroll open on the server side. The time specified should be sufficient to process the response on the client side.
45+
<2> make subsequent requests to the scroll API to keep fetching documents, whilst documents are returned
46+
<3> do something with the response
47+
48+
[[scrollall-observable]]
49+
==== ScrollAllObservable
50+
51+
Similar to <<bulkall-observable, `BulkAllObservable`>> for bulk indexing a large number of documents,
52+
NEST exposes an observable scroll implementation, `ScrollAllObservable`, that can be used
53+
to write concurrent scroll requests. `ScrollAllObservable` uses sliced scrolls to split the scroll into
54+
multiple slices that can be consumed concurrently.
55+
56+
The simplest use of `ScrollAllObservable` is
57+
58+
[source,csharp]
59+
----
60+
int numberOfSlices = Environment.ProcessorCount; <1>
61+
62+
var scrollAllObservable = Client.ScrollAll<Project>("10s", numberOfSlices, sc => sc
63+
.MaxDegreeOfParallelism(numberOfSlices) <2>
64+
.Search(s => s
65+
.Query(q => q
66+
.Term(f => f.State, StateOfBeing.Stable)
67+
)
68+
)
69+
);
70+
71+
scrollAllObservable.Wait(TimeSpan.FromMinutes(10), response => <3>
72+
{
73+
ProcessResponse(response.SearchResponse); <4>
74+
});
75+
----
76+
<1> See https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html#sliced-scroll[sliced scroll] documentation for choosing an appropriate number of slices.
77+
<2> Number of concurrent sliced scroll requests. Usually want to set this to the same value as the number of slices
78+
<3> Total overall time for scrolling **all** documents. Ensure this is a sufficient value to scroll all documents
79+
<4> do something with the response
80+
81+
More control over how the observable is consumed can be achieved by writing
82+
your own observer and subscribing to the observable, which will initiate scrolling
83+
84+
[source,csharp]
85+
----
86+
int numberOfSlices = Environment.ProcessorCount;
87+
88+
var scrollAllObservable = Client.ScrollAll<Project>("10s", numberOfSlices, sc => sc
89+
.MaxDegreeOfParallelism(numberOfSlices)
90+
.Search(s => s
91+
.Query(q => q
92+
.Term(f => f.State, StateOfBeing.Stable)
93+
)
94+
)
95+
);
96+
97+
var waitHandle = new ManualResetEvent(false);
98+
ExceptionDispatchInfo info = null;
99+
100+
var scrollAllObserver = new ScrollAllObserver<Project>(
101+
onNext: response => ProcessResponse(response.SearchResponse), <1>
102+
onError: e =>
103+
{
104+
info = ExceptionDispatchInfo.Capture(e); <2>
105+
waitHandle.Set();
106+
},
107+
onCompleted: () => waitHandle.Set()
108+
);
109+
110+
scrollAllObservable.Subscribe(scrollAllObserver); <3>
111+
112+
waitHandle.WaitOne(); <4>
113+
info?.Throw(); <5>
114+
----
115+
<1> do something with the response
116+
<2> if an exception is thrown, capture it to throw outside of the observer
117+
<3> initiate scrolling
118+
<4> block the current thread until the wait handle is set
119+
<5> if an exception was captured whilst scrolling, throw it
120+

tests/Tests/ClientConcepts/HighLevel/Indexing/IndexingDocuments.doc.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ public async Task BulkIndexDocuments()
142142
.IndexMany(people)); //<2> asynchronous method that returns a Task<IBulkResponse> that can be awaited
143143
}
144144

145-
/**
145+
/**[[bulkall-observable]]
146146
* ==== Multiple documents with `BulkAllObservable` helper
147147
*
148148
* Using the `BulkAllObservable` helper allows you to focus on the overall objective of indexing, without having to
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System;
6+
using System.Linq;
7+
using System.Runtime.ExceptionServices;
8+
using System.Threading;
9+
using Elastic.Elasticsearch.Xunit.XunitPlumbing;
10+
using Nest;
11+
using Tests.Core.ManagedElasticsearch.Clusters;
12+
using Tests.Domain;
13+
using Tests.Framework.DocumentationTests;
14+
15+
namespace Tests.Search
16+
{
17+
/**=== Scrolling documents
18+
*
19+
* The scroll API can be used to return a large collection of documents from Elasticsearch.
20+
*
21+
* NEST exposes the scroll API and an observable scroll implementation that can be used
22+
* to write concurrent scroll requests.
23+
*/
24+
public class ScrollDocuments : IntegrationDocumentationTestBase, IClusterFixture<ReadOnlyCluster>
25+
{
26+
public ScrollDocuments(ReadOnlyCluster cluster) : base(cluster) { }
27+
28+
// hide
29+
private void ProcessResponse(ISearchResponse<Project> response) { }
30+
31+
/**==== Simple use
32+
*
33+
* The simplest use of the scroll API is to perform a search request with a
34+
* scroll timeout, then pass the scroll id returned in each response to
35+
* the next request to the scroll API, until no more documents are returned
36+
*/
37+
[I]
38+
public void SimpleUse()
39+
{
40+
var searchResponse = Client.Search<Project>(s => s
41+
.Query(q => q
42+
.Term(f => f.State, StateOfBeing.Stable)
43+
)
44+
.Scroll("10s") // <1> Specify a scroll time for how long Elasticsearch should keep this scroll open on the server side. The time specified should be sufficient to process the response on the client side.
45+
);
46+
47+
while (searchResponse.Documents.Any()) // <2> make subsequent requests to the scroll API to keep fetching documents, whilst documents are returned
48+
{
49+
ProcessResponse(searchResponse); // <3> do something with the response
50+
searchResponse = Client.Scroll<Project>("10s", searchResponse.ScrollId);
51+
}
52+
}
53+
54+
/**[[scrollall-observable]]
55+
* ==== ScrollAllObservable
56+
*
57+
* Similar to <<bulkall-observable, `BulkAllObservable`>> for bulk indexing a large number of documents,
58+
* NEST exposes an observable scroll implementation, `ScrollAllObservable`, that can be used
59+
* to write concurrent scroll requests. `ScrollAllObservable` uses sliced scrolls to split the scroll into
60+
* multiple slices that can be consumed concurrently.
61+
*
62+
* The simplest use of `ScrollAllObservable` is
63+
*/
64+
[I]
65+
public void SimpleScrollAllObservable()
66+
{
67+
int numberOfSlices = Environment.ProcessorCount; // <1> See https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html#sliced-scroll[sliced scroll] documentation for choosing an appropriate number of slices.
68+
69+
var scrollAllObservable = Client.ScrollAll<Project>("10s", numberOfSlices, sc => sc
70+
.MaxDegreeOfParallelism(numberOfSlices) // <2> Number of concurrent sliced scroll requests. Usually want to set this to the same value as the number of slices
71+
.Search(s => s
72+
.Query(q => q
73+
.Term(f => f.State, StateOfBeing.Stable)
74+
)
75+
)
76+
);
77+
78+
scrollAllObservable.Wait(TimeSpan.FromMinutes(10), response => // <3> Total overall time for scrolling **all** documents. Ensure this is a sufficient value to scroll all documents
79+
{
80+
ProcessResponse(response.SearchResponse); // <4> do something with the response
81+
});
82+
}
83+
84+
/**
85+
* More control over how the observable is consumed can be achieved by writing
86+
* your own observer and subscribing to the observable, which will initiate scrolling
87+
*/
88+
[I]
89+
public void ComplexScrollAllObservable()
90+
{
91+
int numberOfSlices = Environment.ProcessorCount;
92+
93+
var scrollAllObservable = Client.ScrollAll<Project>("10s", numberOfSlices, sc => sc
94+
.MaxDegreeOfParallelism(numberOfSlices)
95+
.Search(s => s
96+
.Query(q => q
97+
.Term(f => f.State, StateOfBeing.Stable)
98+
)
99+
)
100+
);
101+
102+
var waitHandle = new ManualResetEvent(false);
103+
ExceptionDispatchInfo info = null;
104+
105+
var scrollAllObserver = new ScrollAllObserver<Project>(
106+
onNext: response => ProcessResponse(response.SearchResponse), // <1> do something with the response
107+
onError: e =>
108+
{
109+
info = ExceptionDispatchInfo.Capture(e); // <2> if an exception is thrown, capture it to throw outside of the observer
110+
waitHandle.Set();
111+
},
112+
onCompleted: () => waitHandle.Set()
113+
);
114+
115+
scrollAllObservable.Subscribe(scrollAllObserver); // <3> initiate scrolling
116+
117+
waitHandle.WaitOne(); // <4> block the current thread until the wait handle is set
118+
info?.Throw(); // <5> if an exception was captured whilst scrolling, throw it
119+
}
120+
}
121+
}

tests/Tests/high-level.asciidoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,11 +149,13 @@ with some smarts to make working with Elasticsearch from a strongly typed langua
149149
- <<writing-queries, Writing queries>>
150150
- <<bool-queries, Writing bool queries>>
151151
- <<returned-fields, Returning only certain fields>>
152+
- <<scrolling-documents, Scrolling documents>>
152153
- <<covariant-search-results, Covariant search results>>
153154

154155
include::search/writing-queries.asciidoc[]
155156
include::query-dsl/bool-dsl/bool-dsl.asciidoc[]
156157
include::search/returned-fields.asciidoc[]
158+
include::search/scrolling-documents.asciidoc[]
157159
include::{output-dir}/covariant-hits/covariant-search-results.asciidoc[]
158160

159161
[[aggregations]]

0 commit comments

Comments
 (0)