Skip to content

Commit 76bebfc

Browse files
authored
zipf distribution (#28)
* zipf distribution * reformat * fix fomrat
1 parent 36ef126 commit 76bebfc

File tree

7 files changed

+198
-3
lines changed

7 files changed

+198
-3
lines changed

BitFaster.Caching.Benchmarks/BitFaster.Caching.Benchmarks.csproj

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,13 @@
55
<TargetFramework>netcoreapp3.1</TargetFramework>
66
</PropertyGroup>
77

8+
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
9+
<NoWarn>1701;1702;CS8002</NoWarn>
10+
</PropertyGroup>
11+
812
<ItemGroup>
913
<PackageReference Include="BenchmarkDotNet" Version="0.12.1" />
14+
<PackageReference Include="MathNet.Numerics" Version="4.11.0" />
1015
<PackageReference Include="System.Runtime.Caching" Version="4.7.0" />
1116
</ItemGroup>
1217

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
using BenchmarkDotNet.Attributes;
5+
using BitFaster.Caching.Lru;
6+
using MathNet.Numerics.Distributions;
7+
using MathNet.Numerics.Random;
8+
9+
namespace BitFaster.Caching.Benchmarks.Lru
10+
{
11+
public class ZipDistribution
12+
{
13+
const double s = 0.86;
14+
const int n = 500;
15+
const int sampleCount = 1000;
16+
private static int[] samples;
17+
18+
const int concurrencyLevel = 1;
19+
const int cacheSize = 50; // 10% cache size
20+
21+
private static readonly ClassicLru<int, int> classicLru = new ClassicLru<int, int>(concurrencyLevel, cacheSize, EqualityComparer<int>.Default);
22+
private static readonly ConcurrentLru<int, int> concurrentLru = new ConcurrentLru<int, int>(concurrencyLevel, cacheSize, EqualityComparer<int>.Default);
23+
private static readonly ConcurrentTLru<int, int> concurrentTlru = new ConcurrentTLru<int, int>(concurrencyLevel, cacheSize, EqualityComparer<int>.Default, TimeSpan.FromMinutes(10));
24+
private static readonly FastConcurrentLru<int, int> fastConcurrentLru = new FastConcurrentLru<int, int>(concurrencyLevel, cacheSize, EqualityComparer<int>.Default);
25+
private static readonly FastConcurrentTLru<int, int> fastConcurrentTLru = new FastConcurrentTLru<int, int>(concurrencyLevel, cacheSize, EqualityComparer<int>.Default, TimeSpan.FromMinutes(1));
26+
27+
[GlobalSetup]
28+
public void GlobalSetup()
29+
{
30+
samples = new int[sampleCount];
31+
Zipf.Samples(samples, s, n);
32+
}
33+
34+
[Benchmark(Baseline = true, OperationsPerInvoke = sampleCount)]
35+
public void ClassicLru()
36+
{
37+
Func<int, int> func = x => x;
38+
39+
for (int i = 0; i < sampleCount; i++)
40+
{
41+
classicLru.GetOrAdd(samples[i], func);
42+
}
43+
}
44+
45+
[Benchmark(OperationsPerInvoke = sampleCount)]
46+
public void FastConcurrentLru()
47+
{
48+
Func<int, int> func = x => x;
49+
50+
for (int i = 0; i < sampleCount; i++)
51+
{
52+
fastConcurrentLru.GetOrAdd(samples[i], func);
53+
}
54+
}
55+
56+
[Benchmark(OperationsPerInvoke = sampleCount)]
57+
public void ConcurrentLru()
58+
{
59+
Func<int, int> func = x => x;
60+
61+
for (int i = 0; i < sampleCount; i++)
62+
{
63+
concurrentLru.GetOrAdd(samples[i], func);
64+
}
65+
}
66+
67+
[Benchmark(OperationsPerInvoke = sampleCount)]
68+
public void FastConcurrentTLru()
69+
{
70+
Func<int, int> func = x => x;
71+
72+
for (int i = 0; i < sampleCount; i++)
73+
{
74+
fastConcurrentTLru.GetOrAdd(samples[i], func);
75+
}
76+
}
77+
78+
[Benchmark(OperationsPerInvoke = sampleCount)]
79+
public void ConcurrentTLru()
80+
{
81+
Func<int, int> func = x => x;
82+
83+
for (int i = 0; i < sampleCount; i++)
84+
{
85+
concurrentTlru.GetOrAdd(samples[i], func);
86+
}
87+
}
88+
}
89+
}

BitFaster.Caching.Benchmarks/Program.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class Program
1515
static void Main(string[] args)
1616
{
1717
var summary = BenchmarkRunner
18-
.Run<MissHitHitRemove>(ManualConfig.Create(DefaultConfig.Instance)
18+
.Run<ZipDistribution>(ManualConfig.Create(DefaultConfig.Instance)
1919
.AddJob(Job.RyuJitX64));
2020
}
2121
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>netcoreapp3.1</TargetFramework>
6+
</PropertyGroup>
7+
8+
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
9+
<NoWarn>1701;1702;CS8002</NoWarn>
10+
</PropertyGroup>
11+
12+
<ItemGroup>
13+
<PackageReference Include="MathNet.Numerics" Version="4.11.0" />
14+
</ItemGroup>
15+
16+
<ItemGroup>
17+
<ProjectReference Include="..\BitFaster.Caching\BitFaster.Caching.csproj" />
18+
</ItemGroup>
19+
20+
</Project>

BitFaster.Sampling/Program.cs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using BitFaster.Caching.Lru;
4+
using MathNet.Numerics.Distributions;
5+
6+
namespace BitFaster.Sampling
7+
{
8+
class Program
9+
{
10+
// Test methodolopy from 2Q paper:
11+
// http://www.vldb.org/conf/1994/P439.PDF
12+
13+
// s = 0.5 and s = 0.86.
14+
// If there are N items, the probability of accessing an item numbered i or less is (i / N)^s.
15+
// A setting of (s = 0.86 gives an 80 / 20 distribution, while a setting of (s = 0.5 give a less skewed
16+
// distribution (about 45 / 20).
17+
const double s = 0.86;
18+
// const double s = 0.5;
19+
20+
// Took 1 million samples
21+
const int sampleCount = 20000;
22+
23+
// We simulated a database of 50,000 pages and
24+
// buffer sizes ranging from 2,500 (5%) items to 20,000
25+
// (40%) items.
26+
const int n = 50000;
27+
28+
const double cacheSizeRatio = 0.05;
29+
30+
const int cacheSize = (int)(n * cacheSizeRatio);
31+
32+
static void Main(string[] args)
33+
{
34+
Console.WriteLine($"Generating Zipfan distribution with {sampleCount} samples, s = {s}, N = {n}");
35+
36+
var samples = new int[sampleCount];
37+
Zipf.Samples(samples, s, n);
38+
39+
var concurrentLru = new ConcurrentLru<int, int>(1, cacheSize, EqualityComparer<int>.Default);
40+
var classicLru = new ClassicLru<int, int>(1, cacheSize, EqualityComparer<int>.Default);
41+
42+
Func<int, int> func = x => x;
43+
Console.WriteLine($"Running {sampleCount} iterations");
44+
45+
for (int i = 0; i < sampleCount; i++)
46+
{
47+
concurrentLru.GetOrAdd(samples[i], func);
48+
classicLru.GetOrAdd(samples[i], func);
49+
}
50+
51+
Console.WriteLine($"ConcurrentLru hit ratio {concurrentLru.HitRatio * 100.0}%");
52+
Console.WriteLine($"ClassicLru hit ratio {classicLru.HitRatio * 100.0}%");
53+
54+
Console.ReadLine();
55+
}
56+
}
57+
}

BitFaster.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BitFaster.Caching.UnitTests
1414
EndProject
1515
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BitFaster.Caching.Benchmarks", "BitFaster.Caching.Benchmarks\BitFaster.Caching.Benchmarks.csproj", "{8CDE3FA5-B08A-4375-9EF0-F1F044B841C4}"
1616
EndProject
17+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BitFaster.Sampling", "BitFaster.Sampling\BitFaster.Sampling.csproj", "{EAAE8DD3-EA1C-4BDF-920B-A0C858E853CB}"
18+
EndProject
1719
Global
1820
GlobalSection(SolutionConfigurationPlatforms) = preSolution
1921
Debug|Any CPU = Debug|Any CPU
@@ -32,6 +34,10 @@ Global
3234
{8CDE3FA5-B08A-4375-9EF0-F1F044B841C4}.Debug|Any CPU.Build.0 = Debug|Any CPU
3335
{8CDE3FA5-B08A-4375-9EF0-F1F044B841C4}.Release|Any CPU.ActiveCfg = Release|Any CPU
3436
{8CDE3FA5-B08A-4375-9EF0-F1F044B841C4}.Release|Any CPU.Build.0 = Release|Any CPU
37+
{EAAE8DD3-EA1C-4BDF-920B-A0C858E853CB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
38+
{EAAE8DD3-EA1C-4BDF-920B-A0C858E853CB}.Debug|Any CPU.Build.0 = Debug|Any CPU
39+
{EAAE8DD3-EA1C-4BDF-920B-A0C858E853CB}.Release|Any CPU.ActiveCfg = Release|Any CPU
40+
{EAAE8DD3-EA1C-4BDF-920B-A0C858E853CB}.Release|Any CPU.Build.0 = Release|Any CPU
3541
EndGlobalSection
3642
GlobalSection(SolutionProperties) = preSolution
3743
HideSolutionNode = FALSE

README.md

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,27 @@ Intel Core i7-5600U CPU 2.60GHz (Broadwell), 1 CPU, 4 logical and 2 physical cor
7777
Job=RyuJitX64 Jit=RyuJit Platform=X64
7878
~~~
7979

80-
### Lookup speed
80+
### Lookup keys with a Zipf distribution
8181

82-
Cache contains 6 items which are fetched repeatedly, no items are evicted. Representative of high hit rate scenario, when there are a low number of hot items.
82+
Take 1000 samples of a [Zipfan distribution](https://en.wikipedia.org/wiki/Zipf%27s_law) over a set of keys of size *N* and use the keys to lookup values in the cache. If there are *N* items, the probability of accessing an item numbered *i* or less is (*i* / *N*)^*s*.
83+
84+
*s* = 0.86 (yields approx 80/20 distribution)<br>
85+
*N* = 500
86+
87+
Cache size = *N* / 10 (so we can cache 10% of the total set). ConcurrentLru has approximately the same performance as ClassicLru in this single threaded test.
88+
89+
90+
| Method | Mean | Error | StdDev | Ratio | RatioSD |
91+
|------------------- |---------:|--------:|--------:|------:|--------:|
92+
| ClassicLru | 176.1 ns | 2.74 ns | 2.56 ns | 1.00 | 0.00 |
93+
| FastConcurrentLru | 178.0 ns | 2.76 ns | 2.45 ns | 1.01 | 0.02 |
94+
| ConcurrentLru | 185.2 ns | 1.87 ns | 1.56 ns | 1.06 | 0.01 |
95+
| FastConcurrentTLru | 435.7 ns | 2.88 ns | 2.41 ns | 2.48 | 0.03 |
96+
| ConcurrentTLru | 425.1 ns | 8.46 ns | 7.91 ns | 2.41 | 0.07 |
97+
98+
### Raw Lookup speed
99+
100+
In this test the same items are fetched repeatedly, no items are evicted. Representative of high hit rate scenario, when there are a low number of hot items.
83101

84102
- ConcurrentLru family does not move items in the queues, it is just marking as accessed for pure cache hits.
85103
- ClassicLru must maintain item order, and is internally splicing the fetched item to the head of the linked list.

0 commit comments

Comments
 (0)