|
| 1 | +package ingest |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "fmt" |
| 6 | + "log/slog" |
| 7 | + "strings" |
| 8 | + "time" |
| 9 | + |
| 10 | + "github.com/bsv-blockchain/go-wallet-toolbox/pkg/defs" |
| 11 | + "github.com/bsv-blockchain/go-wallet-toolbox/pkg/internal/logging" |
| 12 | + "github.com/bsv-blockchain/go-wallet-toolbox/pkg/services/chaintracks/models" |
| 13 | + "github.com/go-softwarelab/common/pkg/must" |
| 14 | + "github.com/go-softwarelab/common/pkg/to" |
| 15 | +) |
| 16 | + |
| 17 | +// BulkIngestorWOC provides logic to ingest and synchronize block headers from WhatsOnChain bulk endpoints. |
| 18 | +// Utilizes a wocClient to fetch block headers and block height resources from the WhatsOnChain API service. |
| 19 | +// Maintains a logger for structured logging and a chain identifier for selecting network-specific resources. |
| 20 | +// Designed for efficient bulk fetching of header file metadata and incremental synchronization of chain state. |
| 21 | +type BulkIngestorWOC struct { |
| 22 | + logger *slog.Logger |
| 23 | + chain defs.BSVNetwork |
| 24 | + wocClient *wocClient |
| 25 | +} |
| 26 | + |
| 27 | +// NewBulkIngestorWOC creates a new BulkIngestorWOC for a given logger, network, and optional configuration options. |
| 28 | +// It sets up a dedicated WhatsOnChain bulk client for the specified BSV network and uses the provided logger. |
| 29 | +// Optional configuration options allow customization such as API key or overriding the default HTTP client factory. |
| 30 | +// Returns a pointer to the BulkIngestorWOC which can efficiently ingest and synchronize block header files. |
| 31 | +func NewBulkIngestorWOC(logger *slog.Logger, chain defs.BSVNetwork, opts ...func(options *BulkIngestorWocOptions)) *BulkIngestorWOC { |
| 32 | + logger = logging.Child(logger, "bulk_ingestor_woc") |
| 33 | + |
| 34 | + options := to.OptionsWithDefault(DefaultBulkIngestorWocOptions(), opts...) |
| 35 | + |
| 36 | + return &BulkIngestorWOC{ |
| 37 | + logger: logger, |
| 38 | + chain: chain, |
| 39 | + wocClient: newWocClient(logger, chain, options.APIKey, options.RestyClientFactory.New()), |
| 40 | + } |
| 41 | +} |
| 42 | + |
| 43 | +// Synchronize fetches available bulk header files and selects those overlapping the specified height range. |
| 44 | +// Synchronize returns metadata for the required files and a downloader for retrieving their data from WhatsOnChain. |
| 45 | +// Synchronize returns an error if fetching or parsing file metadata fails, or if no appropriate files are found. |
| 46 | +func (b *BulkIngestorWOC) Synchronize(ctx context.Context, presentHeight uint, rangeToFetch models.HeightRange) ([]BulkHeaderFileInfo, BulkFileDownloader, error) { |
| 47 | + allFiles, err := b.fetchBulkHeaderFilesInfo(ctx) |
| 48 | + if err != nil { |
| 49 | + return nil, nil, fmt.Errorf("failed to fetch bulk header files info: %w", err) |
| 50 | + } |
| 51 | + |
| 52 | + if len(allFiles) == 0 { |
| 53 | + return nil, nil, fmt.Errorf("no bulk header files available from WhatsOnChain") |
| 54 | + } |
| 55 | + |
| 56 | + neededFiles := make([]wocBulkFileInfo, 0) |
| 57 | + for _, file := range allFiles { |
| 58 | + if file.heightRange.Overlaps(rangeToFetch) { |
| 59 | + neededFiles = append(neededFiles, file) |
| 60 | + } |
| 61 | + } |
| 62 | + |
| 63 | + result := make([]BulkHeaderFileInfo, 0, len(neededFiles)) |
| 64 | + for _, file := range neededFiles { |
| 65 | + bulkFileInfo, err := b.toBulkHeaderFileInfo(ctx, &file) |
| 66 | + if err != nil { |
| 67 | + return nil, nil, fmt.Errorf("failed to convert to BulkHeaderFileInfo for file %s: %w", file.filename, err) |
| 68 | + } |
| 69 | + |
| 70 | + result = append(result, *bulkFileInfo) |
| 71 | + } |
| 72 | + |
| 73 | + return result, b.bulkFileDownloader(), nil |
| 74 | + |
| 75 | +} |
| 76 | + |
| 77 | +func (b *BulkIngestorWOC) toBulkHeaderFileInfo(ctx context.Context, file *wocBulkFileInfo) (*BulkHeaderFileInfo, error) { |
| 78 | + prevChainWork := prevChainWorkForGenesis |
| 79 | + prevHash := genesisAsPrevBlockHash |
| 80 | + if file.heightRange.MinHeight > 0 { |
| 81 | + prevBlock, err := b.wocClient.GetBlockByHeight(ctx, file.heightRange.MinHeight-1) |
| 82 | + if err != nil { |
| 83 | + return nil, fmt.Errorf("failed to get previous block at height %d: %w", file.heightRange.MinHeight-1, err) |
| 84 | + } |
| 85 | + |
| 86 | + prevChainWork = prevBlock.Chainwork |
| 87 | + prevHash = prevBlock.Hash |
| 88 | + } |
| 89 | + |
| 90 | + lastBlock, err := b.wocClient.GetBlockByHeight(ctx, file.heightRange.MaxHeight) |
| 91 | + if err != nil { |
| 92 | + return nil, fmt.Errorf("failed to get last block at height %d: %w", file.heightRange.MaxHeight, err) |
| 93 | + } |
| 94 | + |
| 95 | + return &BulkHeaderFileInfo{ |
| 96 | + FileName: fmt.Sprintf("%d_%d_headers.bin", file.heightRange.MinHeight, file.heightRange.MaxHeight), |
| 97 | + FirstHeight: file.heightRange.MinHeight, |
| 98 | + Count: must.ConvertToIntFromUnsigned(file.heightRange.MaxHeight) - must.ConvertToIntFromUnsigned(file.heightRange.MinHeight) + 1, |
| 99 | + Chain: b.chain, |
| 100 | + SourceURL: to.Ptr(file.url), |
| 101 | + |
| 102 | + PrevChainWork: prevChainWork, |
| 103 | + PrevHash: prevHash, |
| 104 | + |
| 105 | + LastChainWork: lastBlock.Chainwork, |
| 106 | + LastHash: &lastBlock.Hash, |
| 107 | + |
| 108 | + // Not supported, we don't download the file at this point and WoC doesn't provide it in metadata |
| 109 | + FileHash: nil, |
| 110 | + }, nil |
| 111 | +} |
| 112 | + |
| 113 | +func (b *BulkIngestorWOC) bulkFileDownloader() BulkFileDownloader { |
| 114 | + return func(ctx context.Context, fileInfo BulkHeaderFileInfo) (BulkFileData, error) { |
| 115 | + if fileInfo.SourceURL == nil { |
| 116 | + panic("SourceURL is nil in bulk file downloader") |
| 117 | + } |
| 118 | + |
| 119 | + b.logger.Info("Downloading bulk header file", slog.String("file_name", fileInfo.FileName)) |
| 120 | + |
| 121 | + content, err := b.wocClient.DownloadHeaderFile(ctx, *fileInfo.SourceURL) |
| 122 | + if err != nil { |
| 123 | + return BulkFileData{}, fmt.Errorf("failed to download bulk header file %s: %w", fileInfo.FileName, err) |
| 124 | + } |
| 125 | + |
| 126 | + return BulkFileData{ |
| 127 | + Info: fileInfo, |
| 128 | + Data: content, |
| 129 | + AccessedAt: time.Now(), |
| 130 | + }, nil |
| 131 | + } |
| 132 | +} |
| 133 | + |
| 134 | +type wocBulkFileInfo struct { |
| 135 | + heightRange models.HeightRange |
| 136 | + url string |
| 137 | + filename string |
| 138 | +} |
| 139 | + |
| 140 | +func (b *BulkIngestorWOC) fetchBulkHeaderFilesInfo(ctx context.Context) ([]wocBulkFileInfo, error) { |
| 141 | + response, err := b.wocClient.GetHeadersResourceList(ctx) |
| 142 | + if err != nil { |
| 143 | + return nil, fmt.Errorf("failed to get headers resource list from WhatsOnChain: %w", err) |
| 144 | + } |
| 145 | + |
| 146 | + result := make([]wocBulkFileInfo, 0, len(response.Files)) |
| 147 | + for _, fileURL := range response.Files { |
| 148 | + filename, heightRange, err := b.parseURL(ctx, fileURL) |
| 149 | + if err != nil { |
| 150 | + return nil, fmt.Errorf("failed to parse height range from URL %s: %w", fileURL, err) |
| 151 | + } |
| 152 | + |
| 153 | + result = append(result, wocBulkFileInfo{ |
| 154 | + heightRange: heightRange, |
| 155 | + url: fileURL, |
| 156 | + filename: filename, |
| 157 | + }) |
| 158 | + } |
| 159 | + |
| 160 | + return result, nil |
| 161 | +} |
| 162 | + |
| 163 | +// parseURL parses the height range from the given WhatsOnChain bulk header file URL. |
| 164 | +// "https://api.whatsonchain.com/v1/bsv/main/block/headers/0_10000_headers.bin", |
| 165 | +// "https://api.whatsonchain.com/v1/bsv/main/block/headers/10001_20000_headers.bin", |
| 166 | +// (...) |
| 167 | +// "https://api.whatsonchain.com/v1/bsv/main/block/headers/latest" |
| 168 | +// The latest endpoint - we don't know the max height by URL alone; the min height is previous max + 1 |
| 169 | +// So we need to get the Content-Disposition header from the HEAD request to get the actual filename |
| 170 | +func (b *BulkIngestorWOC) parseURL(ctx context.Context, url string) (filename string, heightRange models.HeightRange, err error) { |
| 171 | + parts := strings.Split(url, "/block/headers/") |
| 172 | + if len(parts) != 2 { |
| 173 | + err = fmt.Errorf("invalid URL format: %s", url) |
| 174 | + return |
| 175 | + } |
| 176 | + filename = parts[1] |
| 177 | + |
| 178 | + if filename == "latest" { |
| 179 | + filename, err = b.getLatestHeightRange(ctx, url) |
| 180 | + if err != nil { |
| 181 | + err = fmt.Errorf("failed to get latest height range from URL %s: %w", url, err) |
| 182 | + return |
| 183 | + } |
| 184 | + } |
| 185 | + |
| 186 | + _, err = fmt.Sscanf(filename, "%d_%d_headers.bin", &heightRange.MinHeight, &heightRange.MaxHeight) |
| 187 | + if err != nil { |
| 188 | + err = fmt.Errorf("failed to parse height range from filename %s: %w", filename, err) |
| 189 | + return |
| 190 | + } |
| 191 | + |
| 192 | + return |
| 193 | +} |
| 194 | + |
| 195 | +// getLatestHeightRange performs a HEAD request to the given latest URL to retrieve the Content-Disposition header. |
| 196 | +// It extracts the filename from the header to determine the actual height range of the latest bulk header |
| 197 | +func (b *BulkIngestorWOC) getLatestHeightRange(ctx context.Context, latestURL string) (string, error) { |
| 198 | + contentHeader, err := b.wocClient.GetContentDispositionFilename(ctx, latestURL) |
| 199 | + if err != nil { |
| 200 | + return "", fmt.Errorf("failed to get Content-Disposition header from WhatsOnChain: %w", err) |
| 201 | + } |
| 202 | + |
| 203 | + // example: Content-Disposition: attachment; filename=922001_923532_headers.bin |
| 204 | + var filename string |
| 205 | + if _, err = fmt.Sscanf(contentHeader, "attachment; filename=%s", &filename); err != nil { |
| 206 | + return "", fmt.Errorf("failed to parse filename from Content-Disposition header: %w", err) |
| 207 | + } |
| 208 | + |
| 209 | + return filename, nil |
| 210 | +} |
0 commit comments