@@ -8,10 +8,7 @@ package ycsb
88
99import (
1010 "context"
11- "encoding/binary"
1211 "fmt"
13- "hash"
14- "hash/fnv"
1512 "math"
1613 "math/rand/v2"
1714 "strings"
@@ -26,6 +23,7 @@ import (
2623 "github.com/cockroachdb/cockroach/pkg/workload"
2724 "github.com/cockroachdb/cockroach/pkg/workload/histogram"
2825 "github.com/cockroachdb/cockroach/pkg/workload/workloadimpl"
26+ "github.com/cockroachdb/crlib/crrand"
2927 "github.com/cockroachdb/errors"
3028 "github.com/jackc/pgx/v5"
3129 "github.com/jackc/pgx/v5/pgconn"
@@ -89,17 +87,17 @@ type ycsb struct {
8987 flags workload.Flags
9088 connFlags * workload.ConnFlags
9189
92- timeString bool
93- insertHash bool
94- zeroPadding int
95- insertStart int
96- insertCount int
97- recordCount int
98- json bool
99- families bool
100- rmwInTxn bool
101- sfu bool
102- splits int
90+ timeString bool
91+ insertRandom bool
92+ zeroPadding int
93+ insertStart int
94+ insertCount int
95+ recordCount int
96+ json bool
97+ families bool
98+ rmwInTxn bool
99+ sfu bool
100+ splits int
103101
104102 workload string
105103 requestDistribution string
@@ -131,9 +129,15 @@ var ycsbMeta = workload.Meta{
131129 `scan-freq` : {RuntimeOnly : true },
132130 `read-modify-write-freq` : {RuntimeOnly : true },
133131 }
132+ g .flags .SetNormalizeFunc (func (flags * pflag.FlagSet , name string ) pflag.NormalizedName {
133+ if name == `insert-hash` {
134+ name = `insert-random`
135+ }
136+ return pflag .NormalizedName (name )
137+ })
134138 g .flags .BoolVar (& g .timeString , `time-string` , false , `Prepend field[0-9] data with current time in microsecond precision.` )
135- g .flags .BoolVar (& g .insertHash , `insert-hash ` , true , `Key to be hashed or ordered.` )
136- g .flags .IntVar (& g .zeroPadding , `zero-padding` , 1 , `Key using "insert-hash=false" has zeros padded to left to make this length of digits.` )
139+ g .flags .BoolVar (& g .insertRandom , `insert-random ` , true , `Key to be pseduorandom or ordered.` )
140+ g .flags .IntVar (& g .zeroPadding , `zero-padding` , 1 , `Key has zeros padded to left to make this length of digits.` )
137141 g .flags .IntVar (& g .insertStart , `insert-start` , 0 , `Key to start initial sequential insertions from. (default 0)` )
138142 g .flags .IntVar (& g .insertCount , `insert-count` , 10000 , `Number of rows to sequentially insert before beginning workload.` )
139143 g .flags .IntVar (& g .recordCount , `record-count` , 0 , `Key to start workload insertions from. Must be >= insert-start + insert-count. (Default: insert-start + insert-count)` )
@@ -328,9 +332,13 @@ func (g *ycsb) Tables() []workload.Table {
328332 Splits : workload .Tuples (
329333 g .splits ,
330334 func (splitIdx int ) []interface {} {
335+ w := ycsbWorker {
336+ config : g ,
337+ prngPerm : crrand .MakePerm64 (RandomSeed .Seed ()),
338+ }
331339 step := math .MaxUint64 / uint64 (g .splits + 1 )
332340 return []interface {}{
333- keyNameFromHash (step * uint64 (splitIdx + 1 )),
341+ w . buildKeyName (step * uint64 (splitIdx + 1 )),
334342 }
335343 },
336344 ),
@@ -342,7 +350,7 @@ func (g *ycsb) Tables() []workload.Table {
342350 func (rowIdx int ) []interface {} {
343351 w := ycsbWorker {
344352 config : g ,
345- hashFunc : fnv . New64 ( ),
353+ prngPerm : crrand . MakePerm64 ( RandomSeed . Seed () ),
346354 }
347355 key := w .buildKeyName (uint64 (g .insertStart + rowIdx ))
348356 // TODO(peter): Need to fill in FIELD here, rather than an empty JSONB
@@ -359,12 +367,6 @@ func (g *ycsb) Tables() []workload.Table {
359367 const batchSize = 1000
360368 usertable .InitialRows = workload.BatchedTuples {
361369 NumBatches : (g .insertCount + batchSize - 1 ) / batchSize ,
362- // If the key sequence is hashed, duplicates are possible. Hash
363- // collisions are inevitable at large insert counts (they're at
364- // least inevitable at ~1b rows). Marking that the keys may contain
365- // duplicates will cause the data loader to use INSERT ... ON
366- // CONFLICT DO NOTHING statements.
367- MayContainDuplicates : ! g .insertHash ,
368370 FillBatch : func (batchIdx int , cb coldata.Batch , _ * bufalloc.ByteAllocator ) {
369371 rowBegin , rowEnd := batchIdx * batchSize , (batchIdx + 1 )* batchSize
370372 if rowEnd > g .insertCount {
@@ -385,7 +387,7 @@ func (g *ycsb) Tables() []workload.Table {
385387
386388 w := ycsbWorker {
387389 config : g ,
388- hashFunc : fnv . New64 ( ),
390+ prngPerm : crrand . MakePerm64 ( RandomSeed . Seed () ),
389391 }
390392 rng := rand .NewPCG (RandomSeed .Seed (), uint64 (batchIdx ))
391393
@@ -577,7 +579,7 @@ func (g *ycsb) Ops(
577579 requestGen : requestGen ,
578580 scanLengthGen : scanLengthGen ,
579581 rng : rng ,
580- hashFunc : fnv . New64 ( ),
582+ prngPerm : crrand . MakePerm64 ( RandomSeed . Seed () ),
581583 }
582584 ql .WorkerFns = append (ql .WorkerFns , w .run )
583585 }
@@ -622,8 +624,7 @@ type ycsbWorker struct {
622624 requestGen randGenerator // used to generate random keys for requests
623625 scanLengthGen randGenerator // used to generate length of scan operations
624626 rng * rand.Rand // used to generate random strings for the values
625- hashFunc hash.Hash64
626- hashBuf [binary .MaxVarintLen64 ]byte
627+ prngPerm crrand.Perm64 // used to map the key index to a pseudorandom key
627628}
628629
629630func (yw * ycsbWorker ) run (ctx context.Context ) error {
@@ -691,29 +692,12 @@ const (
691692 readModifyWriteOp operation = `readModifyWrite`
692693)
693694
694- func (yw * ycsbWorker ) hashKey (key uint64 ) uint64 {
695- yw .hashBuf = [binary .MaxVarintLen64 ]byte {} // clear hashBuf
696- binary .PutUvarint (yw .hashBuf [:], key )
697- yw .hashFunc .Reset ()
698- if _ , err := yw .hashFunc .Write (yw .hashBuf [:]); err != nil {
699- panic (err )
700- }
701- return yw .hashFunc .Sum64 ()
702- }
703-
704695func (yw * ycsbWorker ) buildKeyName (keynum uint64 ) string {
705- if yw .config .insertHash {
706- return keyNameFromHash (yw .hashKey (keynum ))
696+ if yw .config .insertRandom {
697+ // Use prngPerm to map the key index to a pseudorandom key.
698+ keynum = yw .prngPerm .At (keynum )
707699 }
708- return keyNameFromOrder (keynum , yw .config .zeroPadding )
709- }
710-
711- func keyNameFromHash (hashedKey uint64 ) string {
712- return fmt .Sprintf ("user%d" , hashedKey )
713- }
714-
715- func keyNameFromOrder (keynum uint64 , zeroPadding int ) string {
716- return fmt .Sprintf ("user%0*d" , zeroPadding , keynum )
700+ return fmt .Sprintf ("user%0*d" , yw .config .zeroPadding , keynum )
717701}
718702
719703// Keys are chosen by first drawing from a Zipf distribution, hashing the drawn
0 commit comments