Skip to content

Commit bf78582

Browse files
committed
Include similarity in index settings (#2924)
Non binary breaking back port of 648c3cf Similarity settings are nested under settings>index Add ClassicSimilarity Add DFISimilarity Add similarity settings to create index integration tests Add XML comments for each similarity Refactor SimilarityJsonConverter to not look up the NEST ISimilarity type with Type.GetType() but to infer it from the type property value directly. Add Similarity to the updatable index settings Closes #2890
1 parent a180e2b commit bf78582

File tree

22 files changed

+498
-92
lines changed

22 files changed

+498
-92
lines changed

src/Nest/CommonAbstractions/DictionaryLike/IsADictionaryBase.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ public abstract class IsADictionaryBase<TKey, TValue> : IIsADictionary<TKey, TVa
1010
protected Dictionary<TKey, TValue> BackingDictionary { get; }
1111
private ICollection<KeyValuePair<TKey, TValue>> Self => BackingDictionary;
1212

13-
protected IsADictionaryBase() { this.BackingDictionary = new Dictionary<TKey, TValue>(); }
13+
protected IsADictionaryBase() => this.BackingDictionary = new Dictionary<TKey, TValue>();
1414

1515
protected IsADictionaryBase(IDictionary<TKey, TValue> backingDictionary)
1616
{
@@ -52,14 +52,14 @@ void ICollection<KeyValuePair<TKey, TValue>>.Add(KeyValuePair<TKey, TValue> item
5252

5353
TValue IDictionary<TKey, TValue>.this[TKey key]
5454
{
55-
get { return this.BackingDictionary[key]; }
56-
set { this.BackingDictionary[ValidateKey(key)] = value; }
55+
get => this.BackingDictionary[key];
56+
set => this.BackingDictionary[ValidateKey(key)] = value;
5757
}
5858

5959
public TValue this[TKey key]
6060
{
61-
get { return this.BackingDictionary[key]; }
62-
set { this.BackingDictionary[ValidateKey(key)] = value; }
61+
get => this.BackingDictionary[key];
62+
set => this.BackingDictionary[ValidateKey(key)] = value;
6363
}
6464
}
6565
}

src/Nest/IndexModules/IndexSettings/IndexState.cs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using Newtonsoft.Json;
1+
using System;
2+
using Newtonsoft.Json;
23

34
namespace Nest
45
{
@@ -14,7 +15,8 @@ public interface IIndexState
1415
[JsonProperty("mappings")]
1516
IMappings Mappings { get; set; }
1617

17-
[JsonProperty("similarity")]
18+
[JsonIgnore]
19+
[Obsolete("Use Similarity within Settings. Removed in NEST 6.x")]
1820
ISimilarities Similarity { get; set; }
1921
}
2022

@@ -26,6 +28,7 @@ public class IndexState : IIndexState
2628

2729
public IAliases Aliases { get; set; }
2830

31+
[Obsolete("Use Similarity within Settings. Removed in NEST 6.x")]
2932
public ISimilarities Similarity { get; set; }
3033
}
3134
}

src/Nest/IndexModules/IndexSettings/Settings/DynamicIndexSettings.cs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,11 @@ public interface IDynamicIndexSettings : IIsADictionary<string, object>
9191
/// Configure analysis
9292
/// </summary>
9393
IAnalysis Analysis { get; set; }
94+
95+
/// <summary>
96+
/// Configure similarity
97+
/// </summary>
98+
ISimilarities Similarity { get; set; }
9499
}
95100

96101
public class DynamicIndexSettings : IsADictionaryBase<string, object>, IDynamicIndexSettings
@@ -147,6 +152,9 @@ public DynamicIndexSettings(IDictionary<string, object> container) : base(contai
147152
/// <inheritdoc/>
148153
public IAnalysis Analysis { get; set; }
149154

155+
/// <inheritdoc/>
156+
public ISimilarities Similarity { get; set; }
157+
150158
/// <summary>
151159
/// Add any setting to the index
152160
/// </summary>
@@ -227,7 +235,12 @@ public TDescriptor Translog(Func<TranslogSettingsDescriptor, ITranslogSettings>
227235
public TDescriptor UnassignedNodeLeftDelayedTimeout(Time time) =>
228236
Assign(a => a.UnassignedNodeLeftDelayedTimeout = time);
229237

238+
/// <inheritdoc/>
230239
public TDescriptor Analysis(Func<AnalysisDescriptor, IAnalysis> selector) =>
231240
Assign(a => a.Analysis = selector?.Invoke(new AnalysisDescriptor()));
241+
242+
/// <inheritdoc/>
243+
public TDescriptor Similarity(Func<SimilaritiesDescriptor, IPromise<ISimilarities>> selector) =>
244+
Assign(a => a.Similarity = selector?.Invoke(new SimilaritiesDescriptor())?.Value);
232245
}
233246
}

src/Nest/IndexModules/IndexSettings/Settings/IndexSettingsConverter.cs

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -75,40 +75,42 @@ public override void WriteJson(JsonWriter writer, object value, JsonSerializer s
7575
d[UpdatableIndexSettings.SlowlogIndexingSource] = indexing?.Source;
7676

7777
d[UpdatableIndexSettings.Analysis] = ds.Analysis;
78+
d[UpdatableIndexSettings.Similarity] = ds.Similarity;
7879

7980
var indexSettings = value as IIndexSettings;
80-
d[FixedIndexSettings.NumberOfShards] = indexSettings?.NumberOfShards;
81+
d[FixedIndexSettings.NumberOfShards] = indexSettings?.NumberOfShards;
8182
d[FixedIndexSettings.RoutingPartitionSize] = indexSettings?.RoutingPartitionSize;
8283
d[UpdatableIndexSettings.StoreType] = indexSettings?.FileSystemStorageImplementation;
8384
d[UpdatableIndexSettings.QueriesCacheEnabled] = indexSettings?.Queries?.Cache?.Enabled;
8485

8586
base.WriteJson(writer, d, serializer);
8687
}
8788

89+
public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
90+
{
91+
var s = new IndexSettings();
92+
SetKnownIndexSettings(reader, serializer, s);
93+
if (!typeof (IUpdateIndexSettingsRequest).IsAssignableFrom(objectType)) return s;
8894

89-
public JObject Flatten(JObject original, string prefix = "", JObject newObject = null)
95+
var request = new UpdateIndexSettingsRequest() { IndexSettings = s};
96+
return request;
97+
}
98+
99+
private static JObject Flatten(JObject original, string prefix = "", JObject newObject = null)
90100
{
91101
newObject = newObject ?? new JObject();
92102
foreach (var property in original.Properties())
93103
{
94-
if (property.Value is JObject && property.Name != UpdatableIndexSettings.Analysis)
104+
if (property.Value is JObject &&
105+
property.Name != UpdatableIndexSettings.Analysis &&
106+
property.Name != UpdatableIndexSettings.Similarity)
95107
Flatten(property.Value.Value<JObject>(), prefix + property.Name + ".", newObject);
96108
else newObject.Add(prefix + property.Name, property.Value);
97109
}
98110
return newObject;
99111
}
100112

101-
public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
102-
{
103-
var s = new IndexSettings();
104-
SetKnownIndexSettings(reader, serializer, s);
105-
if (!typeof (IUpdateIndexSettingsRequest).IsAssignableFrom(objectType)) return s;
106-
107-
var request = new UpdateIndexSettingsRequest() { IndexSettings = s};
108-
return request;
109-
}
110-
111-
private void SetKnownIndexSettings(JsonReader reader, JsonSerializer serializer, IIndexSettings s)
113+
private static void SetKnownIndexSettings(JsonReader reader, JsonSerializer serializer, IIndexSettings s)
112114
{
113115
var settings = Flatten(JObject.Load(reader)).Properties().ToDictionary(kv => kv.Name);
114116

@@ -194,6 +196,8 @@ private void SetKnownIndexSettings(JsonReader reader, JsonSerializer serializer,
194196
var setting = kv.Value;
195197
if (kv.Key == UpdatableIndexSettings.Analysis || kv.Key == "index.analysis")
196198
s.Analysis = setting.Value.Value<JObject>().ToObject<Analysis>(serializer);
199+
if (kv.Key == UpdatableIndexSettings.Similarity || kv.Key == "index.similarity")
200+
s.Similarity = setting.Value.Value<JObject>().ToObject<Similarities>(serializer);
197201
else
198202
{
199203
dict?.Add(kv.Key, serializer.Deserialize(kv.Value.Value.CreateReader()));

src/Nest/IndexModules/IndexSettings/Settings/UpdatableIndexSettings.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ public static class UpdatableIndexSettings
5050
public const string MergeSchedulerMaxThreadCount = "index.merge.scheduler.max_thread_count";
5151
public const string MergeSchedulerAutoThrottle = "index.merge.scheduler.auto_throttle";
5252

53+
public const string Similarity = "similarity";
54+
5355
public const string SlowlogSearchThresholdQueryWarn = "index.search.slowlog.threshold.query.warn";
5456
public const string SlowlogSearchThresholdQueryInfo = "index.search.slowlog.threshold.query.info";
5557
public const string SlowlogSearchThresholdQueryDebug = "index.search.slowlog.threshold.query.debug";

src/Nest/IndexModules/Similarity/BM25Similarity.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
namespace Nest
44
{
55
/// <summary>
6-
/// BM25 Similarity. Introduced in Stephen E. Robertson, Steve Walker, Susan Jones, Micheline Hancock-Beaulieu,
7-
/// and Mike Gatford. Okapi at TREC-3. In Proceedings of the Third Text REtrieval Conference (TREC 1994). Gaithersburg, USA, November 1994.
6+
/// BM25 Similarity. Introduced in Stephen E. Robertson, Steve Walker, Susan Jones, Micheline Hancock-Beaulieu,
7+
/// and Mike Gatford. Okapi at TREC-3. In Proceedings of the Third Text Retrieval Conference (TREC 1994). Gaithersburg, USA, November 1994.
88
/// </summary>
99
public interface IBM25Similarity : ISimilarity
1010
{
@@ -41,7 +41,7 @@ public class BM25Similarity : IBM25Similarity
4141
public bool? DiscountOverlaps { get; set; }
4242
}
4343
/// <inheritdoc/>
44-
public class BM25SimilarityDescriptor
44+
public class BM25SimilarityDescriptor
4545
: DescriptorBase<BM25SimilarityDescriptor, IBM25Similarity>, IBM25Similarity
4646
{
4747
string ISimilarity.Type => "BM25";
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
using Newtonsoft.Json;
2+
3+
namespace Nest
4+
{
5+
/// <summary>
6+
/// The classic similarity that is based on the TF/IDF model.
7+
/// </summary>
8+
public interface IClassicSimilarity : ISimilarity
9+
{
10+
/// <summary>
11+
/// Determines whether overlap tokens (tokens with 0 position increment) are ignored when computing norm.
12+
/// By default this is <c>true</c>, meaning overlap tokens do not count when computing norms.
13+
/// </summary>
14+
[JsonProperty("discount_overlaps")]
15+
bool? DiscountOverlaps { get; set; }
16+
}
17+
18+
/// <inheritdoc />
19+
public class ClassicSimilarity : IClassicSimilarity
20+
{
21+
public string Type => "classic";
22+
23+
/// <inheritdoc />
24+
public bool? DiscountOverlaps { get; set; }
25+
}
26+
27+
/// <inheritdoc />
28+
public class ClassicSimilarityDescriptor
29+
: DescriptorBase<ClassicSimilarityDescriptor, IClassicSimilarity>, IClassicSimilarity
30+
{
31+
string ISimilarity.Type => "classic";
32+
bool? IClassicSimilarity.DiscountOverlaps { get; set; }
33+
34+
/// <inheritdoc />
35+
public ClassicSimilarityDescriptor DiscountOverlaps(bool discount = true) => Assign(a => a.DiscountOverlaps = discount);
36+
}
37+
}

src/Nest/IndexModules/Similarity/CustomSimilarity.cs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,33 +4,41 @@
44

55
namespace Nest
66
{
7+
/// <summary>
8+
/// A custom similarity
9+
/// </summary>
710
public interface ICustomSimilarity : ISimilarity, IIsADictionary<string, object> { }
811

912
/// <inheritdoc/>
1013
public class CustomSimilarity : IsADictionaryBase<string, object>, ICustomSimilarity
1114
{
12-
public string Type { get { return this["type"] as string; } set { this.Add("type", value); } }
15+
public string Type
16+
{
17+
get => this["type"] as string;
18+
set => this.Add("type", value);
19+
}
1320

1421
public CustomSimilarity(string type)
1522
{
1623
if (!string.IsNullOrEmpty(type)) this.Type = type;
1724
}
1825

1926
internal CustomSimilarity(IDictionary<string, object> container) : base(container) { }
27+
2028
internal CustomSimilarity(Dictionary<string, object> container)
21-
: base(container.Select(kv => kv).ToDictionary(kv => kv.Key, kv => kv.Value))
22-
{}
29+
: base(container.Select(kv => kv).ToDictionary(kv => kv.Key, kv => kv.Value)) { }
2330

2431
public void Add(string key, object value) => BackingDictionary.Add(key, value);
2532
}
33+
2634
/// <inheritdoc/>
2735
public class CustomSimilarityDescriptor
2836
: IsADictionaryDescriptorBase<CustomSimilarityDescriptor, ICustomSimilarity, string, object>
2937
{
30-
3138
public CustomSimilarityDescriptor() : base(new CustomSimilarity(string.Empty)) { }
3239

3340
internal CustomSimilarityDescriptor Type(string type) => Assign("type", type);
41+
3442
public CustomSimilarityDescriptor Add(string key, object value) => Assign(key, value);
3543
}
3644

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
using System.Runtime.Serialization;
2+
using Newtonsoft.Json;
3+
using Newtonsoft.Json.Converters;
4+
5+
namespace Nest
6+
{
7+
/// <summary>
8+
/// <see cref="IDFISimilarity"/> independence measure
9+
/// </summary>
10+
[JsonConverter(typeof(StringEnumConverter))]
11+
public enum DFIIndependenceMeasure
12+
{
13+
[EnumMember(Value = "standardized")]
14+
Standardized,
15+
16+
[EnumMember(Value = "saturated")]
17+
Saturated,
18+
19+
[EnumMember(Value = "chisquared")]
20+
ChiSquared
21+
}
22+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
using Newtonsoft.Json;
2+
3+
namespace Nest
4+
{
5+
/// <summary>
6+
/// Similarity that implements the divergence from independence model
7+
/// </summary>
8+
public interface IDFISimilarity : ISimilarity
9+
{
10+
/// <summary>
11+
/// The independence measure
12+
/// </summary>
13+
[JsonProperty("independence_measure")]
14+
DFIIndependenceMeasure? IndependenceMeasure { get; set; }
15+
}
16+
17+
/// <inheritdoc/>
18+
public class DFISimilarity : IDFISimilarity
19+
{
20+
public string Type => "DFI";
21+
22+
/// <inheritdoc/>
23+
public DFIIndependenceMeasure? IndependenceMeasure { get; set; }
24+
}
25+
26+
/// <inheritdoc/>
27+
public class DFISimilarityDescriptor
28+
: DescriptorBase<DFISimilarityDescriptor, IDFISimilarity>, IDFISimilarity
29+
{
30+
string ISimilarity.Type => "DFI";
31+
DFIIndependenceMeasure? IDFISimilarity.IndependenceMeasure { get; set; }
32+
33+
/// <inheritdoc/>
34+
public DFISimilarityDescriptor IndependenceMeasure(DFIIndependenceMeasure independenceMeasure) =>
35+
Assign(a => a.IndependenceMeasure = independenceMeasure);
36+
}
37+
}

0 commit comments

Comments
 (0)