Skip to content

Commit 79dc95a

Browse files
Writing in chunks
1 parent 46b4c20 commit 79dc95a

File tree

1 file changed

+31
-15
lines changed

1 file changed

+31
-15
lines changed

test/1brc.test.ts

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@
55

66
import { SQLiteCloudRowset } from '../src'
77
import { SQLiteCloudConnection } from '../src/drivers/connection'
8-
import { EXTRA_LONG_TIMEOUT, LONG_TIMEOUT, getChinookTlsConnection, getTestingDatabaseName, sendCommandsAsync } from './shared'
8+
import { getChinookTlsConnection, getTestingDatabaseName, sendCommandsAsync } from './shared'
99
import * as util from 'util'
1010

1111
const fs = require('fs')
1212
const path = require('path')
1313

1414
const BRC_UNIQUE_STATIONS = 41343
15+
const BRC_INSERT_CHUNKS = 200_000 // we insert this many rows per request
1516

1617
async function createDatabaseAsync(numberOfRows: number): Promise<{ connection: SQLiteCloudConnection; database: string }> {
1718
const connection = getChinookTlsConnection()
@@ -45,6 +46,13 @@ describe('1 billion row challenge', () => {
4546
it('should run 500_000 row challenge', async () => {
4647
await testChallenge(500_000)
4748
})
49+
50+
it('should create 10_000_000 measurements', async () => {
51+
await createMeasurements(10_000_000)
52+
})
53+
it('should run 10_000_000 row challenge', async () => {
54+
await testChallenge(10_000_000)
55+
})
4856
})
4957

5058
//
@@ -106,49 +114,57 @@ async function createMeasurements(numberOfRows: number = 1000000) {
106114
async function testChallenge(numberOfRows: number) {
107115
const startedOn = Date.now()
108116

117+
const { connection, database } = await createDatabaseAsync(numberOfRows)
109118
try {
119+
const parseOn = Date.now()
120+
// parse csv into array of city/temperature
110121
const csvPathname = path.resolve(__dirname, 'assets/1brc', `1brc_${numberOfRows}_rows.csv`)
111122
const csvText = fs.readFileSync(csvPathname, 'utf8')
112-
113-
// parse into array of city/temperature
114123
const lines = csvText.trim().split('\n') // Split the CSV text by newline
115124
const data: { city: string; temp: number }[] = lines.map((line: string) => {
116125
const [city, temp] = line.split(';') // Split each line by semicolon
117126
return { city, temp: parseFloat(temp) } // Parse the temperature as a number
118127
})
119128
expect(lines.length).toBe(numberOfRows)
120-
121129
const uniqueStations = new Set(data.map(item => item.city))
122130
expect(uniqueStations.size).toBe(BRC_UNIQUE_STATIONS)
131+
console.debug(`Parsed ${numberOfRows} rows .csv file in ${Date.now() - parseOn}ms`)
123132

124133
// create database and table
125-
const { connection, database } = await createDatabaseAsync(lines.length)
126134
const createResult = await sendCommandsAsync(connection, `CREATE TABLE measurements(city VARCHAR(26), temp FLOAT);`)
127135
expect(createResult).toBe('OK')
128136

129-
// insert into sqlite database
130-
const values = data.map(({ city, temp }) => `('${city.replaceAll("'", "''")}', ${temp})`).join(',\n')
131-
const insertSql = `INSERT INTO measurements (city, temp) VALUES \n${values};`
132-
const sqlPathname = path.resolve(__dirname, 'assets/1brc', `1brc_${numberOfRows}_rows.sql`)
133-
fs.writeFileSync(sqlPathname, insertSql)
137+
const insertOn = Date.now()
138+
for (let chunk = 0, startRow = 0; startRow < numberOfRows; chunk++, startRow += BRC_INSERT_CHUNKS) {
139+
// insert chunk of rows into sqlite database
140+
const dataChunk = data.slice(startRow, Math.min(numberOfRows, startRow + BRC_INSERT_CHUNKS))
141+
const values = dataChunk.map(({ city, temp }) => `('${city.replaceAll("'", "''")}', ${temp})`).join(',\n')
142+
const insertSql = `INSERT INTO measurements (city, temp) VALUES \n${values};`
143+
144+
// const sqlPathname = path.resolve(__dirname, 'assets/1brc', `1brc_${numberOfRows}_rows_${chunk}.sql`)
145+
// fs.writeFileSync(sqlPathname, insertSql)
134146

135-
// insert values into database
136-
const insertResult = (await sendCommandsAsync(connection, insertSql)) as Array<number>
137-
expect(Array.isArray(insertResult)).toBeTruthy()
138-
expect(insertResult[2] as number).toBe(numberOfRows)
147+
// insert values into database
148+
const insertResult = (await sendCommandsAsync(connection, insertSql)) as Array<number>
149+
expect(Array.isArray(insertResult)).toBeTruthy()
150+
expect(insertResult[3] as number).toBe(dataChunk.length) // totalChanges
151+
}
152+
console.debug(`Inserted ${numberOfRows} rows in ${Date.now() - insertOn}ms`)
139153

140154
// calculate averages, etc
155+
const selectOn = Date.now()
141156
const selectSql = 'SELECT city, MIN(temp), AVG(temp), MAX(temp) FROM measurements GROUP BY city'
142157
const selectResult = (await sendCommandsAsync(connection, selectSql)) as SQLiteCloudRowset
143158
expect(selectResult).toBeTruthy()
144159
expect(selectResult.length).toBe(BRC_UNIQUE_STATIONS)
160+
console.debug(`Selected ${numberOfRows} rows with aggregates in ${Date.now() - selectOn}ms`)
145161

146162
console.log(`Ran ${numberOfRows} challenge in ${Date.now() - startedOn}ms`)
147-
debugger
148163
} catch (error) {
149164
console.error(`An error occoured while running 1brc, error: ${error}`)
150165
throw error
151166
} finally {
152167
// await destroyDatabaseAsync(connection, database)
168+
connection?.close()
153169
}
154170
}

0 commit comments

Comments
 (0)