Skip to content

Commit 30be13b

Browse files
authored
Use VersionedCollapsingMergeTree and insert based deletes (#149)
### TL;DR Migrated database tables from ReplacingMergeTree to VersionedCollapsingMergeTree engine and updated column names for better consistency. ### What changed? - Replaced lightweight deletes with inserts for blocks, logs, transactions and traces - Changed database engine from ReplacingMergeTree to VersionedCollapsingMergeTree - Added `sign` and `insert_timestamp` columns for versioning support - Renamed block table columns for consistency: - `number` -> `block_number` - `timestamp` -> `block_timestamp` - Updated block timestamp type from UInt64 to DateTime - Optimized index granularity settings - Adjusted table ordering to use block_timestamp - Removed nullable constraints from certain columns - Updated corresponding Go structs to match new schema - Added option to force consistent data (uses FINAL with clickhouse) ### How to test? 1. Drop existing tables 2. Run new table creation scripts 3. Verify data insertion works with new schema 4. Test data retrieval using updated column names 5. Verify versioning works by inserting duplicate records 6. Confirm proper handling of deleted records ### Why make this change? - Better support for data versioning and deletion tracking - Improved data consistency with standardized column naming - Enhanced query performance through optimized indexing - More efficient handling of data updates and deletions - Better type handling for timestamp-related operations
2 parents ae78b9d + 672a462 commit 30be13b

17 files changed

+638
-441
lines changed

api/api.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ type QueryParams struct {
4444

4545
// @Description Flag to enable abi decoding of tx data
4646
Decode bool `schema:"decode"`
47+
// @Description Flag to force consistent data at the expense of query speed
48+
ForceConsistentData bool `schema:"force_consistent_data"`
4749
}
4850

4951
// Meta represents metadata for a query response

configs/config.go

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package config
22

33
import (
4+
"encoding/json"
45
"fmt"
6+
"os"
57
"strings"
68

79
"github.com/rs/zerolog/log"
@@ -63,15 +65,23 @@ type StorageConnectionConfig struct {
6365
Redis *RedisConfig `mapstructure:"redis"`
6466
}
6567

68+
type TableConfig struct {
69+
DefaultSelectFields []string `mapstructure:"defaultSelectFields"`
70+
TableName string `mapstructure:"tableName"`
71+
}
72+
73+
type TableOverrideConfig map[string]TableConfig
74+
6675
type ClickhouseConfig struct {
67-
Host string `mapstructure:"host"`
68-
Port int `mapstructure:"port"`
69-
Username string `mapstructure:"username"`
70-
Password string `mapstructure:"password"`
71-
Database string `mapstructure:"database"`
72-
DisableTLS bool `mapstructure:"disableTLS"`
73-
AsyncInsert bool `mapstructure:"asyncInsert"`
74-
MaxRowsPerInsert int `mapstructure:"maxRowsPerInsert"`
76+
Host string `mapstructure:"host"`
77+
Port int `mapstructure:"port"`
78+
Username string `mapstructure:"username"`
79+
Password string `mapstructure:"password"`
80+
Database string `mapstructure:"database"`
81+
DisableTLS bool `mapstructure:"disableTLS"`
82+
AsyncInsert bool `mapstructure:"asyncInsert"`
83+
MaxRowsPerInsert int `mapstructure:"maxRowsPerInsert"`
84+
ChainBasedConfig map[string]TableOverrideConfig `mapstructure:"chainBasedConfig"`
7585
}
7686

7787
type MemoryConfig struct {
@@ -160,5 +170,48 @@ func LoadConfig(cfgFile string) error {
160170
return fmt.Errorf("error unmarshalling config: %v", err)
161171
}
162172

173+
err = setCustomJSONConfigs()
174+
if err != nil {
175+
return fmt.Errorf("error setting custom JSON configs: %v", err)
176+
}
177+
178+
// Add debug logging
179+
if clickhouse := Cfg.Storage.Main.Clickhouse; clickhouse != nil {
180+
log.Debug().
181+
Interface("chainConfig", clickhouse.ChainBasedConfig).
182+
Msgf("Loaded chain config %v", clickhouse.ChainBasedConfig)
183+
}
184+
185+
return nil
186+
}
187+
188+
func setCustomJSONConfigs() error {
189+
if chainConfigJSON := os.Getenv("STORAGE_MAIN_CLICKHOUSE_CHAINBASEDCONFIG"); chainConfigJSON != "" {
190+
var mainChainConfig map[string]TableOverrideConfig
191+
if err := json.Unmarshal([]byte(chainConfigJSON), &mainChainConfig); err != nil {
192+
return fmt.Errorf("error parsing main chainBasedConfig JSON: %v", err)
193+
}
194+
if Cfg.Storage.Main.Clickhouse != nil {
195+
Cfg.Storage.Main.Clickhouse.ChainBasedConfig = mainChainConfig
196+
}
197+
}
198+
if chainConfigJSON := os.Getenv("STORAGE_STAGING_CLICKHOUSE_CHAINBASEDCONFIG"); chainConfigJSON != "" {
199+
var stagingChainConfig map[string]TableOverrideConfig
200+
if err := json.Unmarshal([]byte(chainConfigJSON), &stagingChainConfig); err != nil {
201+
return fmt.Errorf("error parsing staging chainBasedConfig JSON: %v", err)
202+
}
203+
if Cfg.Storage.Staging.Clickhouse != nil {
204+
Cfg.Storage.Staging.Clickhouse.ChainBasedConfig = stagingChainConfig
205+
}
206+
}
207+
if chainConfigJSON := os.Getenv("STORAGE_ORCHESTRATOR_CLICKHOUSE_CHAINBASEDCONFIG"); chainConfigJSON != "" {
208+
var orchestratorChainConfig map[string]TableOverrideConfig
209+
if err := json.Unmarshal([]byte(chainConfigJSON), &orchestratorChainConfig); err != nil {
210+
return fmt.Errorf("error parsing orchestrator chainBasedConfig JSON: %v", err)
211+
}
212+
if Cfg.Storage.Main.Clickhouse != nil {
213+
Cfg.Storage.Main.Clickhouse.ChainBasedConfig = orchestratorChainConfig
214+
}
215+
}
163216
return nil
164217
}

internal/common/block.go

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,34 @@ package common
22

33
import (
44
"math/big"
5+
"time"
56
)
67

78
type Block struct {
8-
ChainId *big.Int `json:"chain_id" ch:"chain_id"`
9-
Number *big.Int `json:"number" ch:"number"`
10-
Hash string `json:"hash" ch:"hash"`
11-
ParentHash string `json:"parent_hash" ch:"parent_hash"`
12-
Timestamp uint64 `json:"timestamp" ch:"timestamp"`
13-
Nonce string `json:"nonce" ch:"nonce"`
14-
Sha3Uncles string `json:"sha3_uncles" ch:"sha3_uncles"`
15-
MixHash string `json:"mix_hash" ch:"mix_hash"`
16-
Miner string `json:"miner" ch:"miner"`
17-
StateRoot string `json:"state_root" ch:"state_root"`
18-
TransactionsRoot string `json:"transactions_root" ch:"transactions_root"`
19-
ReceiptsRoot string `json:"receipts_root" ch:"receipts_root"`
20-
LogsBloom string `json:"logs_bloom" ch:"logs_bloom"`
21-
Size uint64 `json:"size" ch:"size"`
22-
ExtraData string `json:"extra_data" ch:"extra_data"`
23-
Difficulty *big.Int `json:"difficulty" ch:"difficulty"`
24-
TotalDifficulty *big.Int `json:"total_difficulty" ch:"total_difficulty"`
25-
TransactionCount uint64 `json:"transaction_count" ch:"transaction_count"`
26-
GasLimit *big.Int `json:"gas_limit" ch:"gas_limit"`
27-
GasUsed *big.Int `json:"gas_used" ch:"gas_used"`
28-
WithdrawalsRoot string `json:"withdrawals_root" ch:"withdrawals_root"`
29-
BaseFeePerGas uint64 `json:"base_fee_per_gas" ch:"base_fee_per_gas"`
9+
ChainId *big.Int `json:"chain_id" ch:"chain_id"`
10+
Number *big.Int `json:"block_number" ch:"block_number"`
11+
Hash string `json:"hash" ch:"hash"`
12+
ParentHash string `json:"parent_hash" ch:"parent_hash"`
13+
Timestamp time.Time `json:"block_timestamp" ch:"block_timestamp"`
14+
Nonce string `json:"nonce" ch:"nonce"`
15+
Sha3Uncles string `json:"sha3_uncles" ch:"sha3_uncles"`
16+
MixHash string `json:"mix_hash" ch:"mix_hash"`
17+
Miner string `json:"miner" ch:"miner"`
18+
StateRoot string `json:"state_root" ch:"state_root"`
19+
TransactionsRoot string `json:"transactions_root" ch:"transactions_root"`
20+
ReceiptsRoot string `json:"receipts_root" ch:"receipts_root"`
21+
LogsBloom string `json:"logs_bloom" ch:"logs_bloom"`
22+
Size uint64 `json:"size" ch:"size"`
23+
ExtraData string `json:"extra_data" ch:"extra_data"`
24+
Difficulty *big.Int `json:"difficulty" ch:"difficulty"`
25+
TotalDifficulty *big.Int `json:"total_difficulty" ch:"total_difficulty"`
26+
TransactionCount uint64 `json:"transaction_count" ch:"transaction_count"`
27+
GasLimit *big.Int `json:"gas_limit" ch:"gas_limit"`
28+
GasUsed *big.Int `json:"gas_used" ch:"gas_used"`
29+
WithdrawalsRoot string `json:"withdrawals_root" ch:"withdrawals_root"`
30+
BaseFeePerGas uint64 `json:"base_fee_per_gas" ch:"base_fee_per_gas"`
31+
Sign int8 `json:"sign" ch:"sign"`
32+
InsertTimestamp time.Time `json:"insert_timestamp" ch:"insert_timestamp"`
3033
}
3134

3235
type BlockData struct {

internal/common/log.go

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,29 @@ import (
55
"fmt"
66
"math/big"
77
"sync"
8+
"time"
89

910
"github.com/ethereum/go-ethereum/accounts/abi"
1011
gethCommon "github.com/ethereum/go-ethereum/common"
1112
"github.com/rs/zerolog/log"
1213
)
1314

1415
type Log struct {
15-
ChainId *big.Int `json:"chain_id" ch:"chain_id" swaggertype:"string"`
16-
BlockNumber *big.Int `json:"block_number" ch:"block_number" swaggertype:"string"`
17-
BlockHash string `json:"block_hash" ch:"block_hash"`
18-
BlockTimestamp uint64 `json:"block_timestamp" ch:"block_timestamp"`
19-
TransactionHash string `json:"transaction_hash" ch:"transaction_hash"`
20-
TransactionIndex uint64 `json:"transaction_index" ch:"transaction_index"`
21-
LogIndex uint64 `json:"log_index" ch:"log_index"`
22-
Address string `json:"address" ch:"address"`
23-
Data string `json:"data" ch:"data"`
24-
Topic0 string `json:"topic_0" ch:"topic_0"`
25-
Topic1 string `json:"topic_1" ch:"topic_1"`
26-
Topic2 string `json:"topic_2" ch:"topic_2"`
27-
Topic3 string `json:"topic_3" ch:"topic_3"`
16+
ChainId *big.Int `json:"chain_id" ch:"chain_id" swaggertype:"string"`
17+
BlockNumber *big.Int `json:"block_number" ch:"block_number" swaggertype:"string"`
18+
BlockHash string `json:"block_hash" ch:"block_hash"`
19+
BlockTimestamp time.Time `json:"block_timestamp" ch:"block_timestamp"`
20+
TransactionHash string `json:"transaction_hash" ch:"transaction_hash"`
21+
TransactionIndex uint64 `json:"transaction_index" ch:"transaction_index"`
22+
LogIndex uint64 `json:"log_index" ch:"log_index"`
23+
Address string `json:"address" ch:"address"`
24+
Data string `json:"data" ch:"data"`
25+
Topic0 string `json:"topic_0" ch:"topic_0"`
26+
Topic1 string `json:"topic_1" ch:"topic_1"`
27+
Topic2 string `json:"topic_2" ch:"topic_2"`
28+
Topic3 string `json:"topic_3" ch:"topic_3"`
29+
Sign int8 `json:"sign" ch:"sign"`
30+
InsertTimestamp time.Time `json:"insert_timestamp" ch:"insert_timestamp"`
2831
}
2932

3033
func (l *Log) GetTopic(index int) (string, error) {

internal/common/trace.go

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,33 @@ package common
22

33
import (
44
"math/big"
5+
"time"
56
)
67

78
type Trace struct {
8-
ChainID *big.Int `json:"chain_id" ch:"chain_id"`
9-
BlockNumber *big.Int `json:"block_number" ch:"block_number"`
10-
BlockHash string `json:"block_hash" ch:"block_hash"`
11-
BlockTimestamp uint64 `json:"block_timestamp" ch:"block_timestamp"`
12-
TransactionHash string `json:"transaction_hash" ch:"transaction_hash"`
13-
TransactionIndex uint64 `json:"transaction_index" ch:"transaction_index"`
14-
Subtraces int64 `json:"subtraces" ch:"subtraces"`
15-
TraceAddress []uint64 `json:"trace_address" ch:"trace_address"`
16-
TraceType string `json:"trace_type" ch:"type"`
17-
CallType string `json:"call_type" ch:"call_type"`
18-
Error string `json:"error" ch:"error"`
19-
FromAddress string `json:"from_address" ch:"from_address"`
20-
ToAddress string `json:"to_address" ch:"to_address"`
21-
Gas *big.Int `json:"gas" ch:"gas"`
22-
GasUsed *big.Int `json:"gas_used" ch:"gas_used"`
23-
Input string `json:"input" ch:"input"`
24-
Output string `json:"output" ch:"output"`
25-
Value *big.Int `json:"value" ch:"value"`
26-
Author string `json:"author" ch:"author"`
27-
RewardType string `json:"reward_type" ch:"reward_type"`
28-
RefundAddress string `json:"refund_address" ch:"refund_address"`
9+
ChainID *big.Int `json:"chain_id" ch:"chain_id"`
10+
BlockNumber *big.Int `json:"block_number" ch:"block_number"`
11+
BlockHash string `json:"block_hash" ch:"block_hash"`
12+
BlockTimestamp time.Time `json:"block_timestamp" ch:"block_timestamp"`
13+
TransactionHash string `json:"transaction_hash" ch:"transaction_hash"`
14+
TransactionIndex uint64 `json:"transaction_index" ch:"transaction_index"`
15+
Subtraces int64 `json:"subtraces" ch:"subtraces"`
16+
TraceAddress []uint64 `json:"trace_address" ch:"trace_address"`
17+
TraceType string `json:"trace_type" ch:"type"`
18+
CallType string `json:"call_type" ch:"call_type"`
19+
Error string `json:"error" ch:"error"`
20+
FromAddress string `json:"from_address" ch:"from_address"`
21+
ToAddress string `json:"to_address" ch:"to_address"`
22+
Gas *big.Int `json:"gas" ch:"gas"`
23+
GasUsed *big.Int `json:"gas_used" ch:"gas_used"`
24+
Input string `json:"input" ch:"input"`
25+
Output string `json:"output" ch:"output"`
26+
Value *big.Int `json:"value" ch:"value"`
27+
Author string `json:"author" ch:"author"`
28+
RewardType string `json:"reward_type" ch:"reward_type"`
29+
RefundAddress string `json:"refund_address" ch:"refund_address"`
30+
Sign int8 `json:"sign" ch:"sign"`
31+
InsertTimestamp time.Time `json:"insert_timestamp" ch:"insert_timestamp"`
2932
}
3033

3134
type RawTraces = []map[string]interface{}

internal/common/transaction.go

Lines changed: 32 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5,41 +5,44 @@ import (
55
"math/big"
66
"strings"
77
"sync"
8+
"time"
89

910
"github.com/ethereum/go-ethereum/accounts/abi"
1011
"github.com/rs/zerolog/log"
1112
)
1213

1314
type Transaction struct {
14-
ChainId *big.Int `json:"chain_id" ch:"chain_id" swaggertype:"string"`
15-
Hash string `json:"hash" ch:"hash"`
16-
Nonce uint64 `json:"nonce" ch:"nonce"`
17-
BlockHash string `json:"block_hash" ch:"block_hash"`
18-
BlockNumber *big.Int `json:"block_number" ch:"block_number" swaggertype:"string"`
19-
BlockTimestamp uint64 `json:"block_timestamp" ch:"block_timestamp"`
20-
TransactionIndex uint64 `json:"transaction_index" ch:"transaction_index"`
21-
FromAddress string `json:"from_address" ch:"from_address"`
22-
ToAddress string `json:"to_address" ch:"to_address"`
23-
Value *big.Int `json:"value" ch:"value" swaggertype:"string"`
24-
Gas uint64 `json:"gas" ch:"gas"`
25-
GasPrice *big.Int `json:"gas_price" ch:"gas_price" swaggertype:"string"`
26-
Data string `json:"data" ch:"data"`
27-
FunctionSelector string `json:"function_selector" ch:"function_selector"`
28-
MaxFeePerGas *big.Int `json:"max_fee_per_gas" ch:"max_fee_per_gas" swaggertype:"string"`
29-
MaxPriorityFeePerGas *big.Int `json:"max_priority_fee_per_gas" ch:"max_priority_fee_per_gas" swaggertype:"string"`
30-
TransactionType uint8 `json:"transaction_type" ch:"transaction_type"`
31-
R *big.Int `json:"r" ch:"r" swaggertype:"string"`
32-
S *big.Int `json:"s" ch:"s" swaggertype:"string"`
33-
V *big.Int `json:"v" ch:"v" swaggertype:"string"`
34-
AccessListJson *string `json:"access_list_json" ch:"access_list"`
35-
ContractAddress *string `json:"contract_address" ch:"contract_address"`
36-
GasUsed *uint64 `json:"gas_used" ch:"gas_used"`
37-
CumulativeGasUsed *uint64 `json:"cumulative_gas_used" ch:"cumulative_gas_used"`
38-
EffectiveGasPrice *big.Int `json:"effective_gas_price" ch:"effective_gas_price" swaggertype:"string"`
39-
BlobGasUsed *uint64 `json:"blob_gas_used" ch:"blob_gas_used"`
40-
BlobGasPrice *big.Int `json:"blob_gas_price" ch:"blob_gas_price" swaggertype:"string"`
41-
LogsBloom *string `json:"logs_bloom" ch:"logs_bloom"`
42-
Status *uint64 `json:"status" ch:"status"`
15+
ChainId *big.Int `json:"chain_id" ch:"chain_id" swaggertype:"string"`
16+
Hash string `json:"hash" ch:"hash"`
17+
Nonce uint64 `json:"nonce" ch:"nonce"`
18+
BlockHash string `json:"block_hash" ch:"block_hash"`
19+
BlockNumber *big.Int `json:"block_number" ch:"block_number" swaggertype:"string"`
20+
BlockTimestamp time.Time `json:"block_timestamp" ch:"block_timestamp"`
21+
TransactionIndex uint64 `json:"transaction_index" ch:"transaction_index"`
22+
FromAddress string `json:"from_address" ch:"from_address"`
23+
ToAddress string `json:"to_address" ch:"to_address"`
24+
Value *big.Int `json:"value" ch:"value" swaggertype:"string"`
25+
Gas uint64 `json:"gas" ch:"gas"`
26+
GasPrice *big.Int `json:"gas_price" ch:"gas_price" swaggertype:"string"`
27+
Data string `json:"data" ch:"data"`
28+
FunctionSelector string `json:"function_selector" ch:"function_selector"`
29+
MaxFeePerGas *big.Int `json:"max_fee_per_gas" ch:"max_fee_per_gas" swaggertype:"string"`
30+
MaxPriorityFeePerGas *big.Int `json:"max_priority_fee_per_gas" ch:"max_priority_fee_per_gas" swaggertype:"string"`
31+
TransactionType uint8 `json:"transaction_type" ch:"transaction_type"`
32+
R *big.Int `json:"r" ch:"r" swaggertype:"string"`
33+
S *big.Int `json:"s" ch:"s" swaggertype:"string"`
34+
V *big.Int `json:"v" ch:"v" swaggertype:"string"`
35+
AccessListJson *string `json:"access_list_json" ch:"access_list"`
36+
ContractAddress *string `json:"contract_address" ch:"contract_address"`
37+
GasUsed *uint64 `json:"gas_used" ch:"gas_used"`
38+
CumulativeGasUsed *uint64 `json:"cumulative_gas_used" ch:"cumulative_gas_used"`
39+
EffectiveGasPrice *big.Int `json:"effective_gas_price" ch:"effective_gas_price" swaggertype:"string"`
40+
BlobGasUsed *uint64 `json:"blob_gas_used" ch:"blob_gas_used"`
41+
BlobGasPrice *big.Int `json:"blob_gas_price" ch:"blob_gas_price" swaggertype:"string"`
42+
LogsBloom *string `json:"logs_bloom" ch:"logs_bloom"`
43+
Status *uint64 `json:"status" ch:"status"`
44+
Sign int8 `json:"sign" ch:"sign"`
45+
InsertTimestamp time.Time `json:"insert_timestamp" ch:"insert_timestamp"`
4346
}
4447

4548
type DecodedTransactionData struct {

internal/common/utils.go

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -163,20 +163,21 @@ func isType(word string) bool {
163163
var allowedFunctions = map[string]struct{}{
164164
"sum": {},
165165
"count": {},
166-
"reinterpretAsUInt256": {},
166+
"reinterpretasuint256": {},
167167
"reverse": {},
168168
"unhex": {},
169169
"substring": {},
170170
"length": {},
171-
"toUInt256": {},
171+
"touint256": {},
172172
"if": {},
173-
"toStartOfMonth": {},
174-
"toStartOfDay": {},
175-
"toStartOfHour": {},
176-
"toStartOfMinute": {},
177-
"toDate": {},
178-
"toDateTime": {},
173+
"tostartofmonth": {},
174+
"tostartofday": {},
175+
"tostartofhour": {},
176+
"tostartofminute": {},
177+
"todate": {},
178+
"todatetime": {},
179179
"concat": {},
180+
"in": {},
180181
}
181182

182183
var disallowedPatterns = []string{
@@ -207,7 +208,7 @@ func ValidateQuery(query string) error {
207208
matches := functionPattern.FindAllStringSubmatch(query, -1)
208209
for _, match := range matches {
209210
funcName := match[1]
210-
if _, ok := allowedFunctions[funcName]; !ok {
211+
if _, ok := allowedFunctions[strings.ToLower(funcName)]; !ok {
211212
return fmt.Errorf("function '%s' is not allowed", funcName)
212213
}
213214
}

0 commit comments

Comments
 (0)