|
5 | 5 |
|
6 | 6 | import { SQLiteCloudRowset } from '../src' |
7 | 7 | import { SQLiteCloudConnection } from '../src/drivers/connection' |
8 | | -import { EXTRA_LONG_TIMEOUT, LONG_TIMEOUT, getChinookTlsConnection, getTestingDatabaseName, sendCommandsAsync } from './shared' |
| 8 | +import { getChinookTlsConnection, getTestingDatabaseName, sendCommandsAsync } from './shared' |
9 | 9 | import * as util from 'util' |
10 | 10 |
|
11 | 11 | const fs = require('fs') |
12 | 12 | const path = require('path') |
13 | 13 |
|
14 | 14 | const BRC_UNIQUE_STATIONS = 41343 |
| 15 | +const BRC_INSERT_CHUNKS = 200_000 // we insert this many rows per request |
15 | 16 |
|
16 | 17 | async function createDatabaseAsync(numberOfRows: number): Promise<{ connection: SQLiteCloudConnection; database: string }> { |
17 | 18 | const connection = getChinookTlsConnection() |
@@ -45,6 +46,13 @@ describe('1 billion row challenge', () => { |
45 | 46 | it('should run 500_000 row challenge', async () => { |
46 | 47 | await testChallenge(500_000) |
47 | 48 | }) |
| 49 | + |
| 50 | + it('should create 10_000_000 measurements', async () => { |
| 51 | + await createMeasurements(10_000_000) |
| 52 | + }) |
| 53 | + it('should run 10_000_000 row challenge', async () => { |
| 54 | + await testChallenge(10_000_000) |
| 55 | + }) |
48 | 56 | }) |
49 | 57 |
|
50 | 58 | // |
@@ -106,49 +114,57 @@ async function createMeasurements(numberOfRows: number = 1000000) { |
106 | 114 | async function testChallenge(numberOfRows: number) { |
107 | 115 | const startedOn = Date.now() |
108 | 116 |
|
| 117 | + const { connection, database } = await createDatabaseAsync(numberOfRows) |
109 | 118 | try { |
| 119 | + const parseOn = Date.now() |
| 120 | + // parse csv into array of city/temperature |
110 | 121 | const csvPathname = path.resolve(__dirname, 'assets/1brc', `1brc_${numberOfRows}_rows.csv`) |
111 | 122 | const csvText = fs.readFileSync(csvPathname, 'utf8') |
112 | | - |
113 | | - // parse into array of city/temperature |
114 | 123 | const lines = csvText.trim().split('\n') // Split the CSV text by newline |
115 | 124 | const data: { city: string; temp: number }[] = lines.map((line: string) => { |
116 | 125 | const [city, temp] = line.split(';') // Split each line by semicolon |
117 | 126 | return { city, temp: parseFloat(temp) } // Parse the temperature as a number |
118 | 127 | }) |
119 | 128 | expect(lines.length).toBe(numberOfRows) |
120 | | - |
121 | 129 | const uniqueStations = new Set(data.map(item => item.city)) |
122 | 130 | expect(uniqueStations.size).toBe(BRC_UNIQUE_STATIONS) |
| 131 | + console.debug(`Parsed ${numberOfRows} rows .csv file in ${Date.now() - parseOn}ms`) |
123 | 132 |
|
124 | 133 | // create database and table |
125 | | - const { connection, database } = await createDatabaseAsync(lines.length) |
126 | 134 | const createResult = await sendCommandsAsync(connection, `CREATE TABLE measurements(city VARCHAR(26), temp FLOAT);`) |
127 | 135 | expect(createResult).toBe('OK') |
128 | 136 |
|
129 | | - // insert into sqlite database |
130 | | - const values = data.map(({ city, temp }) => `('${city.replaceAll("'", "''")}', ${temp})`).join(',\n') |
131 | | - const insertSql = `INSERT INTO measurements (city, temp) VALUES \n${values};` |
132 | | - const sqlPathname = path.resolve(__dirname, 'assets/1brc', `1brc_${numberOfRows}_rows.sql`) |
133 | | - fs.writeFileSync(sqlPathname, insertSql) |
| 137 | + const insertOn = Date.now() |
| 138 | + for (let chunk = 0, startRow = 0; startRow < numberOfRows; chunk++, startRow += BRC_INSERT_CHUNKS) { |
| 139 | + // insert chunk of rows into sqlite database |
| 140 | + const dataChunk = data.slice(startRow, Math.min(numberOfRows, startRow + BRC_INSERT_CHUNKS)) |
| 141 | + const values = dataChunk.map(({ city, temp }) => `('${city.replaceAll("'", "''")}', ${temp})`).join(',\n') |
| 142 | + const insertSql = `INSERT INTO measurements (city, temp) VALUES \n${values};` |
| 143 | + |
| 144 | + // const sqlPathname = path.resolve(__dirname, 'assets/1brc', `1brc_${numberOfRows}_rows_${chunk}.sql`) |
| 145 | + // fs.writeFileSync(sqlPathname, insertSql) |
134 | 146 |
|
135 | | - // insert values into database |
136 | | - const insertResult = (await sendCommandsAsync(connection, insertSql)) as Array<number> |
137 | | - expect(Array.isArray(insertResult)).toBeTruthy() |
138 | | - expect(insertResult[2] as number).toBe(numberOfRows) |
| 147 | + // insert values into database |
| 148 | + const insertResult = (await sendCommandsAsync(connection, insertSql)) as Array<number> |
| 149 | + expect(Array.isArray(insertResult)).toBeTruthy() |
| 150 | + expect(insertResult[3] as number).toBe(dataChunk.length) // totalChanges |
| 151 | + } |
| 152 | + console.debug(`Inserted ${numberOfRows} rows in ${Date.now() - insertOn}ms`) |
139 | 153 |
|
140 | 154 | // calculate averages, etc |
| 155 | + const selectOn = Date.now() |
141 | 156 | const selectSql = 'SELECT city, MIN(temp), AVG(temp), MAX(temp) FROM measurements GROUP BY city' |
142 | 157 | const selectResult = (await sendCommandsAsync(connection, selectSql)) as SQLiteCloudRowset |
143 | 158 | expect(selectResult).toBeTruthy() |
144 | 159 | expect(selectResult.length).toBe(BRC_UNIQUE_STATIONS) |
| 160 | + console.debug(`Selected ${numberOfRows} rows with aggregates in ${Date.now() - selectOn}ms`) |
145 | 161 |
|
146 | 162 | console.log(`Ran ${numberOfRows} challenge in ${Date.now() - startedOn}ms`) |
147 | | - debugger |
148 | 163 | } catch (error) { |
149 | 164 | console.error(`An error occoured while running 1brc, error: ${error}`) |
150 | 165 | throw error |
151 | 166 | } finally { |
152 | 167 | // await destroyDatabaseAsync(connection, database) |
| 168 | + connection?.close() |
153 | 169 | } |
154 | 170 | } |
0 commit comments