cypress-io · colebrumley · Nov 15, 2023 · Nov 15, 2023 · Nov 15, 2023 · Nov 15, 2023
diff --git a/.gitignore b/.gitignore
@@ -21,3 +21,5 @@ node_modules
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
+
+chromium_downloads.db
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-chromium-downloads
+# Chromium Downloads
 ==========
 
 [![Build Status](https://travis-ci.org/flotwig/chromium-downloads.svg?branch=master)](https://travis-ci.org/flotwig/chromium-downloads)
@@ -7,14 +7,75 @@ Live at: https://chromium.cypress.io
 
 ![image](https://user-images.githubusercontent.com/1151760/52878049-c29d0000-3129-11e9-8c71-e9497fc7a253.png)
 
+This application is a tool for tracking and downloading Chromium builds. It scrapes build data from the Chromium project and provides a user-friendly interface for viewing and downloading the builds. The application is split into a backend and a frontend, both written in JavaScript.
+
+The backend is an Express server that handles data scraping, storage, and retrieval. It uses SQLite for data storage and AWS S3 for database file synchronization. The frontend is a React application that provides a user interface for viewing and downloading the builds.
+
+## SQLite+S3
+
+In the recent changes, the application's database has been switched from PostgreSQL to SQLite. The SQLite database file is stored in the backend's local file system and is synchronized with an S3 bucket. This change was made to simplify the application's architecture and deployment.
+
+The SQLite database is managed by the sqlite3 library. The database file is named chromium_downloads.db and is located in the backend's root directory. The database schema is defined in backend/db.js and backend/s3.js.
+
+The S3 integration is handled by the `@aws-sdk/client-s3` and `@aws-sdk/lib-storage` libraries. The S3 bucket name is set by the `S3_BUCKET_NAME` environment variable. The S3 integration can be skipped by setting the `SKIP_S3_INTEGRATION` environment variable to true. This is useful for local development when you don't want to connect to S3.
+
+The database file is downloaded from S3 when the server starts and is uploaded to S3 when the server shuts down. This is handled in backend/index.js.
+
+The database file is also ignored by Git to prevent it from being committed to the repository.
+
 ### Installing dependencies
 
 ```
 yarn
 ```
 
-### Starting the dev server
+### Running
 
+
+To run the application, you need to install the dependencies with yarn and then start the server with yarn start. For local development, you can run the server without S3 integration by setting the SKIP_S3_INTEGRATION environment variable to true:
+
+```shell
+SKIP_S3_INTEGRATION=true yarn start
 ```
+
+The application uses several environment variables to configure its behavior:
+
+- `REACT_APP_API_URL`: This variable is used to set the API URL for the frontend. It is defined in frontend/src/index.js. If not set, it defaults to 'http://localhost:3001'.
+
+- `PORT`: This variable is used to set the port on which the backend server listens. It is defined in backend/index.js. If not set, it defaults to 3001.
+
+- `AWS_REGION`: This variable is used to set the AWS region for the S3 client. It is defined in backend/s3.js. If not set, it defaults to 'us-east-1'.
+
+- `S3_BUCKET_NAME`: This variable is used to set the name of the S3 bucket where the database file is stored. It is defined in backend/s3.js. There is no default value, so it must be set.
+
+- `SKIP_S3_INTEGRATION`: This variable is used to skip the integration with S3 for downloading and uploading the database file. It is defined in backend/s3.js and backend/index.js. If not set, it defaults to false.
+
+Please note that the `SKIP_S3_INTEGRATION` environment variable is used in the yarn start command above to run the application without S3 integration. This is useful for local development when you don't want to connect to S3.
+
+
+#### Running in Production
+
+To run the application in a production environment, you need to set the following environment variables:
+
+```shell
+REACT_APP_API_URL=<your_api_url>
+PORT=<your_port>
+AWS_REGION=<your_aws_region>
+S3_BUCKET_NAME=<your_s3_bucket_name>
+```
+
+Replace `<your_api_url>`, `<your_port>`, `<your_aws_region>`, and `<your_s3_bucket_name>` with your actual values.
+
+Then, you can start the server with:
+
+```shell
 yarn start
 ```
+
+Please note that in a production environment, you should not skip the S3 integration. The `SKIP_S3_INTEGRATION` environment variable should be left unset or set to false.
+
+The application leverages the [default AWS profile search paths](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-authentication.html#cli-chap-authentication-precedence), so an IAM role, profile, hard-coded credentials, etc will be respected.
+
+## Contributing
+
+Contributions are welcome. Please make sure to follow the coding style and add tests for any new features or changes.
diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -0,0 +1,7 @@
+FROM --platform=linux/x86_64 node:20-slim
+WORKDIR /backend
+COPY . .
+EXPOSE 3001
+RUN yarn
+ENTRYPOINT [ "yarn" ]
+CMD [ "start" ]
diff --git a/backend/db.js b/backend/db.js
@@ -1,47 +1,17 @@
-const Sequelize = require("sequelize");
+const Database = require('better-sqlite3');
+const db = new Database('./chromium_downloads.db');
 
-let DATABASE_URL = process.env.DATABASE_URL;
+// Define the schema and create tables if they don't exist
+const stmt = db.prepare(`CREATE TABLE IF NOT EXISTS builds (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  version TEXT,
+  os TEXT,
+  channel TEXT,
+  timestamp TEXT,
+  baseRevision TEXT,
+  artifactsRevision TEXT,
+  downloads TEXT
+)`);
+stmt.run();
 
-if (process.env.NODE_ENV === "production") {
-  DATABASE_URL += "?ssl=true";
-}
-
-const sequelize = new Sequelize(DATABASE_URL);
-
-class Build extends Sequelize.Model {}
-Build.init(
-  {
-    version: Sequelize.STRING,
-    os: Sequelize.STRING,
-    channel: Sequelize.STRING,
-    timestamp: Sequelize.DATE,
-    baseRevision: Sequelize.STRING,
-    artifactsRevision: Sequelize.STRING,
-    downloads: Sequelize.JSONB,
-  },
-  {
-    sequelize,
-    modelName: "builds",
-    timestamps: false,
-    indexes: [
-      {
-        unique: true,
-        fields: ["version", "os", "channel", "timestamp"],
-      },
-    ],
-    pool: {
-      max: 4,
-      min: 1,
-    },
-  }
-);
-
-async function initialize() {
-  console.log(sequelize);
-  sequelize.sync();
-}
-
-module.exports = {
-  initialize,
-  Build,
-};
+module.exports = db;
diff --git a/backend/index.js b/backend/index.js
@@ -1,55 +1,67 @@
-const db = require("./db");
 const express = require("express");
+const db = require("./db");
 const scraper = require("./scraper");
+const { downloadDbFromS3, uploadDbToS3 } = require('./s3');
 
 const PORT = Number(process.env.PORT) || 3001;
-
 const app = express();
 
+// Middleware to allow cross-origin requests
 app.use((req, res, next) => {
   res.setHeader("access-control-allow-origin", "*");
-
   next();
 });
 
-app.get("/builds", (req, res) => {
-  db.Build.findAll({
-    attributes: ["version", "os", "channel", "timestamp"],
-    order: [["timestamp", "DESC"]],
-  }).then((builds) => {
-    res.json(builds);
-  });
-});
+// Route definitions
+// Assuming `db` is the sqlite3 database object and is properly initialized
 
-app.get("/builds/:version/:channel/:os", (req, res) => {
-  db.Build.findAll({
-    where: {
-      channel: req.params.channel,
-      os: req.params.os,
-      version: req.params.version,
-    },
-  }).then((builds) => {
-    if (!builds.length) {
-      return res.sendStatus(404);
-    }
-
-    res.json(builds[0]);
-  });
+app.get("/builds", async (req, res) => {
+  const stmt = db.prepare("SELECT version, os, channel, timestamp FROM builds ORDER BY timestamp DESC");
+  const builds = stmt.all();
+  res.json(builds);
 });
 
-console.log("Initializing");
-
-console.log(db.initialize);
+app.get("/builds/:version/:channel/:os", async (req, res) => {
+  const { version, channel, os } = req.params;
+  const stmt = db.prepare("SELECT * FROM builds WHERE channel = ? AND os = ? AND version = ?");
+  const builds = stmt.all([channel, os, version]);
+  if (builds.length === 0) {
+    return res.sendStatus(404);
+  }
+  res.json(builds[0]);
+});
 
-db.initialize()
-  .then(() => {
-    console.log("Starting scraping");
+// Server startup logic
+async function startServer() {
+  try {
+    await downloadDbFromS3();
+    console.log("Database downloaded from S3");
     scraper.start();
-
+    console.log("Scraper started");
     app.listen(PORT, () => {
       console.log(`Backend listening on ${PORT}.`);
     });
-  })
-  .catch((e) => {
-    console.error(e);
-  });
+  } catch (error) {
+    console.error(error);
+    process.exit(1);
+  }
+}
+
+// Shutdown logic
+async function handleShutdown() {
+  try {
+    await uploadDbToS3();
+    console.log("Database uploaded to S3");
+    process.exit(0);
+  } catch (error) {
+    console.error(error);
+    process.exit(1);
+  }
+}
+
+// Handle shutdown signals
+process.on('SIGINT', handleShutdown);
+process.on('SIGTERM', handleShutdown);
+
+// Initialize the server
+startServer();
diff --git a/backend/package.json b/backend/package.json
@@ -3,12 +3,13 @@
   "version": "0.0.0",
   "private": true,
   "dependencies": {
+    "@aws-sdk/client-s3": "^3.451.0",
+    "@aws-sdk/lib-storage": "^3.451.0",
     "bluebird": "^3.5.5",
     "express": "^4.17.1",
     "got": "^9.6.0",
     "nodemon": "^1.19.1",
-    "pg": "^8.8.0",
-    "sequelize": "^6.28.0"
+    "better-sqlite3": "^9.1.0"
   },
   "scripts": {
     "start": "node index.js",

diff --git a/backend/s3.js b/backend/s3.js
@@ -0,0 +1,74 @@
+// backend/s3.js
+const { S3Client, GetObjectCommand } = require('@aws-sdk/client-s3');
+const { Upload } = require('@aws-sdk/lib-storage');
+const fs = require('fs');
+const Database = require('better-sqlite3');
+
+const s3Client = new S3Client({
+  region: process.env.AWS_REGION || 'us-east-1'
+});
+
+const BUCKET_NAME = process.env.S3_BUCKET_NAME;
+const DB_FILE_NAME = 'chromium_downloads.db';
+const SKIP_S3_INTEGRATION = process.env.SKIP_S3_INTEGRATION === 'true';
+
+async function uploadDbToS3() {
+  if (SKIP_S3_INTEGRATION) {
+    return;
+  }
+
+  const fileStream = fs.createReadStream(DB_FILE_NAME);
+  const uploadParams = {
+    Bucket: BUCKET_NAME,
+    Key: DB_FILE_NAME,
+    Body: fileStream
+  };
+  const upload = new Upload({
+    client: s3Client,
+    params: uploadParams
+  });
+
+  await upload.done();
+}
+
+async function downloadDbFromS3() {
+  if (SKIP_S3_INTEGRATION) {
+    return;
+  }
+
+  const getObjectParams = {
+    Bucket: BUCKET_NAME,
+    Key: DB_FILE_NAME
+  };
+  try {
+    const { Body } = await s3Client.send(new GetObjectCommand(getObjectParams));
+    const fileStream = Body.pipe(fs.createWriteStream(DB_FILE_NAME));
+    await new Promise((resolve, reject) => {
+      fileStream.on('error', reject);
+      fileStream.on('close', resolve);
+    });
+  } catch (err) {
+    if (err.name === 'NoSuchKey') {
+      // Create an empty file
+      fs.writeFileSync(DB_FILE_NAME, '');
+      // Initialize the database and create the builds table
+      const db = new Database(DB_FILE_NAME);
+      const stmt = db.prepare(`CREATE TABLE IF NOT EXISTS builds (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        version TEXT,
+        os TEXT,
+        channel TEXT,
+        timestamp TEXT,
+        baseRevision TEXT,
+        artifactsRevision TEXT,
+        downloads TEXT
+      )`);
+      stmt.run();
+      db.close();
+    } else {
+      throw err;
+    }
+  }
+}
+
+module.exports = { uploadDbToS3, downloadDbFromS3 };
diff --git a/backend/scraper.js b/backend/scraper.js
@@ -1,9 +1,25 @@
-const { Build } = require('./db')
 const { getBuilds } = require('./get-chromium-builds')
 const Promise = require('bluebird')
 
+const db = require('./db');
+
 function saveBuild(build) {
-  return Build.create(build)
+  const { version, os, channel, timestamp, baseRevision, artifactsRevision, downloads } = build;
+  return new Promise((resolve, reject) => {
+    const stmt = db.prepare(`
+    INSERT INTO builds (version, os, channel, timestamp, baseRevision, artifactsRevision, downloads)
+    SELECT ?, ?, ?, ?, ?, ?, ?
+    WHERE NOT EXISTS (
+      SELECT 1 FROM builds WHERE version = ? AND os = ? AND channel = ?
+    )
+  `);
+    const params = [
+      version, os, channel, timestamp, baseRevision, artifactsRevision, JSON.stringify(downloads),
+      version, os, channel // These are for the WHERE NOT EXISTS subquery
+    ];
+
+    stmt.run(params);
+  });
 }
 
 function scrape() {
@@ -21,9 +37,8 @@ function scrape() {
         return build
       })
       .then(saveBuild)
-      .catch(() => {
-        console.error(`Had an error storing downloads for Chromium ${build.version} ${build.channel} on ${build.os}`)
-        return
+      .catch((error) => {
+        console.error(`Had an error storing downloads for Chromium ${build.version} ${build.channel} on ${build.os}:`, error.message);
       })
     })
   })