Skip to content

Commit 26bd4b0

Browse files
Mpdreamzrusscam
authored andcommitted
add support for explain on analyze API
Closes #2684
1 parent f79e0fc commit 26bd4b0

File tree

5 files changed

+177
-10
lines changed

5 files changed

+177
-10
lines changed

src/Nest/Indices/Analyze/AnalyzeRequest.cs

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,19 @@ public partial interface IAnalyzeRequest
3131

3232
///<summary>The text on which the analysis should be performed (when request body is not used)</summary>
3333
[JsonProperty("text")]
34-
string[] Text { get; set; }
34+
IEnumerable<string> Text { get; set; }
3535

3636
///<summary>The name of the tokenizer to use for the analysis</summary>
3737
[JsonProperty("tokenizer")]
3838
Union<string, ITokenizer> Tokenizer { get; set; }
39+
40+
///<summary>Return more details, and output the analyzer chain per step in the process</summary>
41+
[JsonProperty("explain")]
42+
bool? Explain { get; set; }
43+
44+
///<summary>Filter only certain token attributes to be returned</summary>
45+
[JsonProperty("attributes")]
46+
IEnumerable<string> Attributes { get; set; }
3947
}
4048

4149
public partial class AnalyzeRequest
@@ -49,6 +57,12 @@ public AnalyzeRequest(IndexName indices, string textToAnalyze)
4957
/// <inheritdoc />
5058
public Union<string, ITokenizer> Tokenizer { get; set; }
5159

60+
/// <inheritdoc />
61+
public bool? Explain { get; set; }
62+
63+
/// <inheritdoc />
64+
public IEnumerable<string> Attributes { get; set; }
65+
5266
/// <inheritdoc />
5367
public Union<string, IAnalyzer> Analyzer { get; set; }
5468

@@ -65,7 +79,7 @@ public AnalyzeRequest(IndexName indices, string textToAnalyze)
6579
public Field Field { get; set; }
6680

6781
/// <inheritdoc />
68-
public string[] Text { get; set; }
82+
public IEnumerable<string> Text { get; set; }
6983

7084
}
7185

@@ -78,7 +92,9 @@ public partial class AnalyzeDescriptor
7892
string IAnalyzeRequest.Normalizer { get; set; }
7993
AnalyzeTokenFilters IAnalyzeRequest.Filter { get; set; }
8094
Field IAnalyzeRequest.Field { get; set; }
81-
string[] IAnalyzeRequest.Text { get; set; }
95+
IEnumerable<string> IAnalyzeRequest.Text { get; set; }
96+
bool? IAnalyzeRequest.Explain { get; set; }
97+
IEnumerable<string> IAnalyzeRequest.Attributes { get; set; }
8298

8399
///<summary>The name of the tokenizer to use for the analysis</summary>
84100
public AnalyzeDescriptor Tokenizer(string tokenizer) => Assign(a => a.Tokenizer = tokenizer);
@@ -135,7 +151,15 @@ public AnalyzeDescriptor Filter(Func<AnalyzeTokenFiltersDescriptor, IPromise<Ana
135151
public AnalyzeDescriptor Text(params string[] text) => Assign(a => a.Text = text);
136152

137153
///<summary>The text on which the analysis should be performed</summary>
138-
public AnalyzeDescriptor Text(IEnumerable<string> text) => Assign(a => a.Text = text.ToArray());
154+
public AnalyzeDescriptor Text(IEnumerable<string> text) => Assign(a => a.Text = text);
155+
156+
/// <inheritdoc cref="IAnalyzeRequst.Explain" />
157+
public AnalyzeDescriptor Explain(bool? explain = true) => Assign(a => a.Explain = explain);
158+
159+
/// <inheritdoc cref="IAnalyzeRequst.Attributes" />
160+
public AnalyzeDescriptor Attributes(params string[] attributes) => Assign(a => a.Attributes = attributes);
139161

162+
/// <inheritdoc cref="IAnalyzeRequst.Attributes" />
163+
public AnalyzeDescriptor Attributes(IEnumerable<string> attributes) => Assign(a => a.Attributes = attributes);
140164
}
141165
}

src/Nest/Indices/Analyze/AnalyzeResponse.cs

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,95 @@ namespace Nest
55
{
66
public interface IAnalyzeResponse : IResponse
77
{
8+
/// <summary>
9+
/// When <see cref="IAnalyzeRequest.Explain "/> is not true this will hold the analyzed tokens.
10+
/// </summary>
811
IReadOnlyCollection<AnalyzeToken> Tokens { get; }
12+
13+
/// <summary>
14+
/// When <see cref="IAnalyzeRequest.Explain "/> is to true this will hold the detailed view of the analyzed tokens.
15+
/// </summary>
16+
AnalyzeDetail Detail { get; }
917
}
1018

1119
[JsonObject]
1220
public class AnalyzeResponse : ResponseBase, IAnalyzeResponse
1321
{
1422
[JsonProperty("tokens")]
1523
public IReadOnlyCollection<AnalyzeToken> Tokens { get; internal set; } = EmptyReadOnly<AnalyzeToken>.Collection;
24+
25+
[JsonProperty("detail")]
26+
public AnalyzeDetail Detail { get; internal set; }
27+
}
28+
29+
30+
[JsonObject]
31+
public class AnalyzeDetail
32+
{
33+
[JsonProperty("custom_analyzer")]
34+
public bool CustomAnalyzer { get; internal set; }
35+
36+
[JsonProperty("charfilters")]
37+
public IReadOnlyCollection<CharFilterDetail> CharFilters { get; internal set; } = EmptyReadOnly<CharFilterDetail>.Collection;
38+
39+
[JsonProperty("tokenfilters")]
40+
public IReadOnlyCollection<TokenDetail> Filters { get; internal set; } = EmptyReadOnly<TokenDetail>.Collection;
41+
42+
[JsonProperty("tokenizer")]
43+
public TokenDetail Tokenizer { get; internal set; }
44+
}
45+
46+
[JsonObject]
47+
public class CharFilterDetail
48+
{
49+
[JsonProperty("name")]
50+
public string Name { get; internal set; }
51+
52+
[JsonProperty("filtered_text")]
53+
public IReadOnlyCollection<string> FilteredText { get; internal set; } = EmptyReadOnly<string>.Collection;
54+
55+
}
56+
57+
[JsonObject]
58+
public class TokenDetail
59+
{
60+
[JsonProperty("name")]
61+
public string Name { get; internal set; }
62+
63+
[JsonProperty("tokens")]
64+
public IReadOnlyCollection<ExplainAnalyzeToken> Tokens { get; internal set; } = EmptyReadOnly<ExplainAnalyzeToken>.Collection;
65+
}
66+
67+
//TODO create an issue on the main repos that this API uses camelCase
68+
//this causes us to be unable to subclass from AnalyzeToken directly
69+
[JsonObject]
70+
public class ExplainAnalyzeToken
71+
{
72+
[JsonProperty("token")]
73+
public string Token { get; internal set; }
74+
75+
[JsonProperty("type")]
76+
public string Type { get; internal set; }
77+
78+
[JsonProperty("start_offset")]
79+
public long StartOffset { get; internal set; }
80+
81+
[JsonProperty("end_offset")]
82+
public long EndOffset { get; internal set; }
83+
84+
[JsonProperty("position")]
85+
public long Position { get; internal set; }
86+
87+
[JsonProperty("positionLength")]
88+
public long? PositionLength { get; internal set; }
89+
90+
[JsonProperty("termFrequency")]
91+
public long? TermFrequency { get; internal set; }
92+
93+
[JsonProperty("keyword")]
94+
public bool? Keyword { get; internal set; }
95+
96+
[JsonProperty("bytes")]
97+
public string Bytes { get; internal set; }
1698
}
1799
}

src/Nest/Indices/Analyze/AnalyzeToken.cs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,14 @@ public class AnalyzeToken
1212
[JsonProperty("type")]
1313
public string Type { get; internal set; }
1414

15-
//TODO change to long in 6.0... RC: (this is int in Elasticsearch codebase)
1615
[JsonProperty("start_offset")]
17-
public int StartOffset { get; internal set; }
16+
public long StartOffset { get; internal set; }
1817

1918
[JsonProperty("end_offset")]
20-
public int EndOffset { get; internal set; }
19+
public long EndOffset { get; internal set; }
2120

2221
[JsonProperty("position")]
23-
public int Position { get; internal set; }
22+
public long Position { get; internal set; }
2423

2524
[JsonProperty("position_length")]
2625
public long? PositionLength { get; internal set; }

src/Tests/Indices/Analyze/AnalyzeApiTests.cs

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ protected override LazyResponses ClientUsage() => Calls(
5050

5151
public class AnalyzeInlineApiTests : ApiIntegrationTestBase<ReadOnlyCluster, IAnalyzeResponse, IAnalyzeRequest, AnalyzeDescriptor, AnalyzeRequest>
5252
{
53-
private const string TextToAnalyze = "F# is <b>THE SUPERIOR</b> language :) :gandalf: ";
53+
protected const string TextToAnalyze = "F# is <b>THE SUPERIOR</b> language :) :gandalf: ";
5454

5555
public AnalyzeInlineApiTests(ReadOnlyCluster cluster, EndpointUsage usage) : base(cluster, usage) { }
5656
protected override LazyResponses ClientUsage() => Calls(
@@ -117,4 +117,66 @@ protected override void ExpectResponse(IAnalyzeResponse response)
117117
tokens.Should().Contain("fsharp", "gandalf");
118118
}
119119
}
120+
121+
public class AnalyzeExplainApiTests : AnalyzeInlineApiTests
122+
{
123+
public AnalyzeExplainApiTests(ReadOnlyCluster cluster, EndpointUsage usage) : base(cluster, usage) { }
124+
125+
protected override object ExpectJson => new
126+
{
127+
text = new[] { TextToAnalyze },
128+
tokenizer = new { max_token_length = 7, type = "standard" },
129+
char_filter = new object[]
130+
{
131+
"html_strip",
132+
new { type = "mapping", mappings = new[] { "F# => fsharp" } }
133+
},
134+
filter = new object[]
135+
{
136+
"lowercase",
137+
new { type = "stop", stopwords = new[] { "_english_", "the" } }
138+
},
139+
explain = true
140+
};
141+
142+
protected override Func<AnalyzeDescriptor, IAnalyzeRequest> Fluent => d => base.Fluent(d.Explain());
143+
144+
protected override AnalyzeRequest Initializer
145+
{
146+
get
147+
{
148+
var r = base.Initializer;
149+
r.Explain = true;
150+
return r;
151+
}
152+
}
153+
154+
protected override void ExpectResponse(IAnalyzeResponse response)
155+
{
156+
response.Tokens.Should().HaveCount(0);
157+
response.Detail.Should().NotBeNull("details should not be null because explain was specified");
158+
response.Detail.CustomAnalyzer.Should().BeTrue();
159+
response.Detail.CharFilters.Should().NotBeEmpty();
160+
foreach (var c in response.Detail.CharFilters)
161+
{
162+
c.Name.Should().NotBeNullOrWhiteSpace();
163+
c.FilteredText.Should().NotBeEmpty();
164+
}
165+
response.Detail.Filters.Should().NotBeEmpty();
166+
foreach (var c in response.Detail.Filters)
167+
AssertTokenDetail(c);
168+
169+
response.Detail.Tokenizer.Should().NotBeNull();
170+
AssertTokenDetail(response.Detail.Tokenizer);
171+
}
172+
173+
private static void AssertTokenDetail(TokenDetail c)
174+
{
175+
c.Name.Should().NotBeNullOrWhiteSpace();
176+
foreach (var t in c.Tokens)
177+
{
178+
t.Token.Should().NotBeNullOrWhiteSpace();
179+
}
180+
}
181+
}
120182
}

src/Tests/tests.default.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# tracked by git).
66

77
# mode either u (unit test), i (integration test) or m (mixed mode)
8-
mode: u
8+
mode: m
99
# the elasticsearch version that should be started
1010
# Can be a snapshot version of sonatype or "latest" to get the latest snapshot of sonatype
1111
elasticsearch_version: 6.0.0

0 commit comments

Comments
 (0)