Add ScrollAllObservable docs (#4835) (#4836)

github-actions[bot] · russcam · web-flow · commit 1b9193d71dc5 · 2020-07-01T18:52:30.000+10:00
Co-authored-by: Russ Cam &lt;russ.cam@elastic.co&gt;
diff --git a/docs/client-concepts/high-level/indexing/indexing-documents.asciidoc b/docs/client-concepts/high-level/indexing/indexing-documents.asciidoc
@@ -138,6 +138,7 @@ var asyncBulkIndexResponse = await client.BulkAsync(b => b
 <1> synchronous method that returns an IBulkResponse, the same as IndexMany and can be inspected in the same way for errors
 <2> asynchronous method that returns a Task<IBulkResponse> that can be awaited
 
+[[bulkall-observable]]
 ==== Multiple documents with `BulkAllObservable` helper
 
 Using the `BulkAllObservable` helper allows you to focus on the overall objective of indexing, without having to
diff --git a/docs/high-level.asciidoc b/docs/high-level.asciidoc
@@ -199,6 +199,8 @@ with some smarts to make working with Elasticsearch from a strongly typed langua
 
 * <<returned-fields, Returning only certain fields>>
 
+* <<scrolling-documents, Scrolling documents>>
+
 * <<covariant-search-results, Covariant search results>>
 
 include::search/writing-queries.asciidoc[]
@@ -207,6 +209,8 @@ include::query-dsl/bool-dsl/bool-dsl.asciidoc[]
 
 include::search/returned-fields.asciidoc[]
 
+include::search/scrolling-documents.asciidoc[]
+
 include::{output-dir}/covariant-hits/covariant-search-results.asciidoc[]
 
 [[aggregations]]
diff --git a/docs/search/scrolling-documents.asciidoc b/docs/search/scrolling-documents.asciidoc
@@ -0,0 +1,120 @@
+:ref_current: https://www.elastic.co/guide/en/elasticsearch/reference/master
+
+:github: https://github.com/elastic/elasticsearch-net
+
+:nuget: https://www.nuget.org/packages
+
+////
+IMPORTANT NOTE
+==============
+This file has been generated from https://github.com/elastic/elasticsearch-net/tree/master/src/Tests/Tests/Search/ScrollingDocuments.doc.cs. 
+If you wish to submit a PR for any spelling mistakes, typos or grammatical errors for this file,
+please modify the original csharp file found at the link and submit the PR with that change. Thanks!
+////
+
+[[scrolling-documents]]
+=== Scrolling documents
+
+The scroll API can be used to return a large collection of documents from Elasticsearch.
+
+NEST exposes the scroll API and an observable scroll implementation that can be used
+to write concurrent scroll requests.
+
+==== Simple use
+
+The simplest use of the scroll API is to perform a search request with a
+scroll timeout, then pass the scroll id returned in each response to
+the next request to the scroll API, until no more documents are returned
+
+[source,csharp]
+----
+var searchResponse = Client.Search<Project>(s => s
+    .Query(q => q
+        .Term(f => f.State, StateOfBeing.Stable)
+    )
+    .Scroll("10s") <1>
+);
+
+while (searchResponse.Documents.Any()) <2>
+{
+    ProcessResponse(searchResponse); <3>
+    searchResponse = Client.Scroll<Project>("10s", searchResponse.ScrollId);
+}
+----
+<1> Specify a scroll time for how long Elasticsearch should keep this scroll open on the server side. The time specified should be sufficient to process the response on the client side.
+<2> make subsequent requests to the scroll API to keep fetching documents, whilst documents are returned
+<3> do something with the response
+
+[[scrollall-observable]]
+==== ScrollAllObservable
+
+Similar to <<bulkall-observable, `BulkAllObservable`>> for bulk indexing a large number of documents,
+NEST exposes an observable scroll implementation, `ScrollAllObservable`, that can be used
+to write concurrent scroll requests. `ScrollAllObservable` uses sliced scrolls to split the scroll into
+multiple slices that can be consumed concurrently.
+
+The simplest use of `ScrollAllObservable` is
+
+[source,csharp]
+----
+int numberOfSlices = Environment.ProcessorCount; <1>
+
+var scrollAllObservable = Client.ScrollAll<Project>("10s", numberOfSlices, sc => sc
+    .MaxDegreeOfParallelism(numberOfSlices) <2>
+    .Search(s => s
+        .Query(q => q
+            .Term(f => f.State, StateOfBeing.Stable)
+        )
+    )
+);
+
+scrollAllObservable.Wait(TimeSpan.FromMinutes(10), response => <3>
+{
+    ProcessResponse(response.SearchResponse); <4>
+});
+----
+<1> See https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html#sliced-scroll[sliced scroll] documentation for choosing an appropriate number of slices.
+<2> Number of concurrent sliced scroll requests. Usually want to set this to the same value as the number of slices
+<3> Total overall time for scrolling **all** documents. Ensure this is a sufficient value to scroll all documents
+<4> do something with the response
+
+More control over how the observable is consumed can be achieved by writing
+your own observer and subscribing to the observable, which will initiate scrolling
+
+[source,csharp]
+----
+int numberOfSlices = Environment.ProcessorCount;
+
+var scrollAllObservable = Client.ScrollAll<Project>("10s", numberOfSlices, sc => sc
+    .MaxDegreeOfParallelism(numberOfSlices)
+    .Search(s => s
+        .Query(q => q
+            .Term(f => f.State, StateOfBeing.Stable)
+        )
+    )
+);
+
+var waitHandle = new ManualResetEvent(false);
+ExceptionDispatchInfo info = null;
+
+var scrollAllObserver = new ScrollAllObserver<Project>(
+    onNext: response => ProcessResponse(response.SearchResponse), <1>
+    onError: e =>
+    {
+        info = ExceptionDispatchInfo.Capture(e); <2>
+        waitHandle.Set();
+    },
+    onCompleted: () => waitHandle.Set()
+);
+
+scrollAllObservable.Subscribe(scrollAllObserver); <3>
+
+waitHandle.WaitOne(); <4>
+info?.Throw(); <5>
+----
+<1> do something with the response
+<2> if an exception is thrown, capture it to throw outside of the observer
+<3> initiate scrolling
+<4> block the current thread until the wait handle is set
+<5> if an exception was captured whilst scrolling, throw it
+
diff --git a/tests/Tests/ClientConcepts/HighLevel/Indexing/IndexingDocuments.doc.cs b/tests/Tests/ClientConcepts/HighLevel/Indexing/IndexingDocuments.doc.cs
@@ -142,7 +142,7 @@ public async Task BulkIndexDocuments()
 				.IndexMany(people)); //<2> asynchronous method that returns a Task<IBulkResponse> that can be awaited
 		}
 
-		/**
+		/**[[bulkall-observable]]
 		* ==== Multiple documents with `BulkAllObservable` helper
 		*
 		* Using the `BulkAllObservable` helper allows you to focus on the overall objective of indexing, without having to
diff --git a/tests/Tests/Search/ScrollingDocuments.doc.cs b/tests/Tests/Search/ScrollingDocuments.doc.cs
@@ -0,0 +1,121 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+using System;
+using System.Linq;
+using System.Runtime.ExceptionServices;
+using System.Threading;
+using Elastic.Elasticsearch.Xunit.XunitPlumbing;
+using Nest;
+using Tests.Core.ManagedElasticsearch.Clusters;
+using Tests.Domain;
+using Tests.Framework.DocumentationTests;
+
+namespace Tests.Search
+{
+	/**=== Scrolling documents
+	 *
+	 * The scroll API can be used to return a large collection of documents from Elasticsearch.
+	 *
+	 * NEST exposes the scroll API and an observable scroll implementation that can be used
+	 * to write concurrent scroll requests.
+	 */
+	public class ScrollDocuments : IntegrationDocumentationTestBase, IClusterFixture<ReadOnlyCluster>
+	{
+		public ScrollDocuments(ReadOnlyCluster cluster) : base(cluster) { }
+
+		// hide
+		private void ProcessResponse(ISearchResponse<Project> response) { }
+
+		/**==== Simple use
+		 *
+		 * The simplest use of the scroll API is to perform a search request with a
+		 * scroll timeout, then pass the scroll id returned in each response to
+		 * the next request to the scroll API, until no more documents are returned
+		 */
+		[I]
+		public void SimpleUse()
+		{
+			var searchResponse = Client.Search<Project>(s => s
+				.Query(q => q
+					.Term(f => f.State, StateOfBeing.Stable)
+				)
+				.Scroll("10s") // <1> Specify a scroll time for how long Elasticsearch should keep this scroll open on the server side. The time specified should be sufficient to process the response on the client side.
+			);
+
+			while (searchResponse.Documents.Any()) // <2> make subsequent requests to the scroll API to keep fetching documents, whilst documents are returned
+			{
+				ProcessResponse(searchResponse); // <3> do something with the response
+				searchResponse = Client.Scroll<Project>("10s", searchResponse.ScrollId);
+			}
+		}
+
+		/**[[scrollall-observable]]
+		 * ==== ScrollAllObservable
+		 *
+		 * Similar to <<bulkall-observable, `BulkAllObservable`>> for bulk indexing a large number of documents,
+		 * NEST exposes an observable scroll implementation, `ScrollAllObservable`, that can be used
+		 * to write concurrent scroll requests. `ScrollAllObservable` uses sliced scrolls to split the scroll into
+		 * multiple slices that can be consumed concurrently.
+		 *
+		 * The simplest use of `ScrollAllObservable` is
+		 */
+		[I]
+		public void SimpleScrollAllObservable()
+		{
+			int numberOfSlices = Environment.ProcessorCount; // <1> See https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html#sliced-scroll[sliced scroll] documentation for choosing an appropriate number of slices.
+
+			var scrollAllObservable = Client.ScrollAll<Project>("10s", numberOfSlices, sc => sc
+				.MaxDegreeOfParallelism(numberOfSlices) // <2> Number of concurrent sliced scroll requests. Usually want to set this to the same value as the number of slices
+				.Search(s => s
+					.Query(q => q
+						.Term(f => f.State, StateOfBeing.Stable)
+					)
+				)
+			);
+
+			scrollAllObservable.Wait(TimeSpan.FromMinutes(10), response => // <3> Total overall time for scrolling **all** documents. Ensure this is a sufficient value to scroll all documents
+			{
+				ProcessResponse(response.SearchResponse); // <4> do something with the response
+			});
+		}
+
+		/**
+		 * More control over how the observable is consumed can be achieved by writing
+		 * your own observer and subscribing to the observable, which will initiate scrolling
+		 */
+		[I]
+		public void ComplexScrollAllObservable()
+		{
+			int numberOfSlices = Environment.ProcessorCount;
+
+			var scrollAllObservable = Client.ScrollAll<Project>("10s", numberOfSlices, sc => sc
+				.MaxDegreeOfParallelism(numberOfSlices)
+				.Search(s => s
+					.Query(q => q
+						.Term(f => f.State, StateOfBeing.Stable)
+					)
+				)
+			);
+
+			var waitHandle = new ManualResetEvent(false);
+			ExceptionDispatchInfo info = null;
+
+			var scrollAllObserver = new ScrollAllObserver<Project>(
+				onNext: response => ProcessResponse(response.SearchResponse), // <1> do something with the response
+				onError: e =>
+				{
+					info = ExceptionDispatchInfo.Capture(e); // <2> if an exception is thrown, capture it to throw outside of the observer
+					waitHandle.Set();
+				},
+				onCompleted: () => waitHandle.Set()
+			);
+
+			scrollAllObservable.Subscribe(scrollAllObserver); // <3> initiate scrolling
+
+			waitHandle.WaitOne(); // <4> block the current thread until the wait handle is set
+			info?.Throw(); // <5> if an exception was captured whilst scrolling, throw it
+		}
+	}
+}
diff --git a/tests/Tests/high-level.asciidoc b/tests/Tests/high-level.asciidoc
@@ -149,11 +149,13 @@ with some smarts to make working with Elasticsearch from a strongly typed langua
 - <<writing-queries, Writing queries>>
 - <<bool-queries, Writing bool queries>>
 - <<returned-fields, Returning only certain fields>>
+- <<scrolling-documents, Scrolling documents>>
 - <<covariant-search-results, Covariant search results>>
 
 include::search/writing-queries.asciidoc[]
 include::query-dsl/bool-dsl/bool-dsl.asciidoc[]
 include::search/returned-fields.asciidoc[]
+include::search/scrolling-documents.asciidoc[]
 include::{output-dir}/covariant-hits/covariant-search-results.asciidoc[]
 
 [[aggregations]]

Original file line number	Diff line number	Diff line change
`@@ -142,7 +142,7 @@ public async Task BulkIndexDocuments()`
`142`	`142`	`.IndexMany(people)); //<2> asynchronous method that returns a Task<IBulkResponse> that can be awaited`
`143`	`143`	`}`
`144`	`144`
`145`		`- /**`
	`145`	`+ /**[[bulkall-observable]]`
`146`	`146`	* ==== Multiple documents with `BulkAllObservable` helper
`147`	`147`	`*`
`148`	`148`	* Using the `BulkAllObservable` helper allows you to focus on the overall objective of indexing, without having to