From fa818be94caa9f58342728d5f259ab928557b6b8 Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Mon, 12 May 2025 18:37:33 +0100 Subject: [PATCH 01/13] feat: add gpdr erasure script for tinybird --- .../config/custom-environment-variables.json | 3 + .../src/bin/erase-members-data-tinybird.ts | 64 +++++++++++++++++++ .../apps/data_sink_worker/src/conf/index.ts | 12 ++++ 3 files changed, 79 insertions(+) create mode 100644 services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts diff --git a/services/apps/data_sink_worker/config/custom-environment-variables.json b/services/apps/data_sink_worker/config/custom-environment-variables.json index 07554b54d9..6666602960 100644 --- a/services/apps/data_sink_worker/config/custom-environment-variables.json +++ b/services/apps/data_sink_worker/config/custom-environment-variables.json @@ -33,5 +33,8 @@ }, "github": { "isSnowflakeEnabled": "CROWD_GITHUB_IS_SNOWFLAKE_ENABLED" + }, + "tinybird": { + "token": "CROWD_TINYBIRD_TOKEN" } } diff --git a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts new file mode 100644 index 0000000000..3a48e71086 --- /dev/null +++ b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts @@ -0,0 +1,64 @@ +import { TINYBIRD_CONFIG } from '../conf' + +const TINYBIRD_API_URL = 'https://api.us-west-2.aws.tinybird.co/v0/datasources' +const DATA_SOURCES = ['activityRelations', 'members'] +const TOKEN = TINYBIRD_CONFIG().token + +/** + * This uses "Delete Data Selectively" from Tinybird: + * https://www.tinybird.co/docs/classic/get-data-in/data-operations/replace-and-delete-data#delete-data-selectively + * + * It deletes member data for GDPR compliance from the `members` and `activityRelations` datasources. + * All datasources created from pipes based on these tables will reflect the deletions within one hour, + * as the relevant copy pipes are scheduled to run hourly. + */ + +const args = process.argv.slice(2) + +if (args.length !== 1) { + console.error('Usage: deleteMemberTinybird.ts ') + process.exit(1) +} + +const memberId = args[0] + +async function deleteFromDataSource(tableName: string, memberId: string) { + const url = `${TINYBIRD_API_URL}/${tableName}/delete` + const body = new URLSearchParams({ + delete_condition: `memberId = '${memberId}'`, + }) + + const response = await fetch(url, { + method: 'POST', + headers: { + Authorization: `Bearer ${TOKEN}`, + 'Content-Type': 'application/x-www-form-urlencoded', + }, + body, + }) + + const text = await response.text() + if (!response.ok) { + console.error(`Failed to delete from ${tableName}:`, text) + throw new Error(`Delete failed for table ${tableName}`) + } + + console.log(`Deleted from ${tableName}:`, text) +} + +async function main() { + if (!TOKEN) { + console.error('TINYBIRD_TOKEN environment variable not set!') + process.exit(1) + } + + for (const table of DATA_SOURCES) { + try { + await deleteFromDataSource(table, memberId) + } catch (err) { + console.error(err) + } + } +} + +main() diff --git a/services/apps/data_sink_worker/src/conf/index.ts b/services/apps/data_sink_worker/src/conf/index.ts index ec71246f7b..2bcd514af6 100644 --- a/services/apps/data_sink_worker/src/conf/index.ts +++ b/services/apps/data_sink_worker/src/conf/index.ts @@ -20,6 +20,10 @@ export interface IWorkerConfig { queuePriorityLevel: QueuePriorityLevel } +export interface ITinybirdConfig { + token: string +} + let workerSettings: IWorkerConfig export const WORKER_SETTINGS = (): IWorkerConfig => { if (workerSettings) return workerSettings @@ -80,3 +84,11 @@ export const GITHUB_CONFIG = (): IGithubConfig => { githubConfig = config.get('github') return githubConfig } + +let tinybirdConfig: ITinybirdConfig +export const TINYBIRD_CONFIG = (): ITinybirdConfig => { + if (tinybirdConfig) return tinybirdConfig + + tinybirdConfig = config.get('tinybird') + return tinybirdConfig +} From 0476689ec7b22da981c4d9c9584d82ab40769ff7 Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Wed, 21 May 2025 16:25:04 +0100 Subject: [PATCH 02/13] chore: add memberIdentities table to data sources Signed-off-by: Joana Maia --- .../data_sink_worker/src/bin/erase-members-data-tinybird.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts index 3a48e71086..7e16dbd96a 100644 --- a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts +++ b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts @@ -1,7 +1,7 @@ import { TINYBIRD_CONFIG } from '../conf' const TINYBIRD_API_URL = 'https://api.us-west-2.aws.tinybird.co/v0/datasources' -const DATA_SOURCES = ['activityRelations', 'members'] +const DATA_SOURCES = ['activityRelations', 'members', 'memberIdentities'] const TOKEN = TINYBIRD_CONFIG().token /** From 714e740d14dc19beb2089ee88809040bef152b07 Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Wed, 10 Sep 2025 17:51:49 +0100 Subject: [PATCH 03/13] chore: tinybird gdpr --- services/apps/data_sink_worker/src/bin/erase-member.ts | 10 ++++++++-- .../src/bin/erase-members-data-tinybird.ts | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/services/apps/data_sink_worker/src/bin/erase-member.ts b/services/apps/data_sink_worker/src/bin/erase-member.ts index 9ee20649f4..b6cbb61b40 100644 --- a/services/apps/data_sink_worker/src/bin/erase-member.ts +++ b/services/apps/data_sink_worker/src/bin/erase-member.ts @@ -313,18 +313,24 @@ export async function deleteMemberFromDb(store: DbStore, memberId: string): Prom ) } + // TODO: Understand what should be done with maintainersInternal + const tablesToDelete: Map = new Map([ ['activities', ['memberId']], + ['activityRelations', ['memberId']], ['memberNoMerge', ['memberId', 'noMergeId']], + ['memberOrganizationAffiliationOverrides', ['memberId']], ['memberOrganizations', ['memberId']], - ['memberTags', ['memberId']], + ['memberSegmentAffiliations', ['memberId']], ['memberSegments', ['memberId']], ['memberSegmentsAgg', ['memberId']], ['memberEnrichmentCache', ['memberId']], + ['memberEnrichments', ['memberId']], ['memberIdentities', ['memberId']], - ['memberSegmentAffiliations', ['memberId']], ['memberToMerge', ['memberId', 'toMergeId']], ['memberToMergeRaw', ['memberId', 'toMergeId']], + ['memberBotSuggestions', ['memberId']], + ['memberNoBot', ['memberId']], ]) for (const table of Array.from(tablesToDelete.keys())) { diff --git a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts index 7e16dbd96a..d884a74b92 100644 --- a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts +++ b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts @@ -1,7 +1,7 @@ import { TINYBIRD_CONFIG } from '../conf' const TINYBIRD_API_URL = 'https://api.us-west-2.aws.tinybird.co/v0/datasources' -const DATA_SOURCES = ['activityRelations', 'members', 'memberIdentities'] +const DATA_SOURCES = ['activities', 'activityRelations', 'members', 'memberIdentities'] const TOKEN = TINYBIRD_CONFIG().token /** From d61ef9603b9323f313ea62dc5338b9307efdb228 Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Thu, 11 Sep 2025 12:51:47 +0100 Subject: [PATCH 04/13] chore: consolidate erasure scripts Signed-off-by: Joana Maia --- .../data_sink_worker/src/bin/erase-member.ts | 476 ++++++++---------- .../src/bin/erase-members-data-tinybird.ts | 164 +++++- .../src/bin/erasure-members-data-questdb.ts | 216 ++++++++ 3 files changed, 579 insertions(+), 277 deletions(-) create mode 100644 services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts diff --git a/services/apps/data_sink_worker/src/bin/erase-member.ts b/services/apps/data_sink_worker/src/bin/erase-member.ts index 76935cb1a4..f8be6bd1ed 100644 --- a/services/apps/data_sink_worker/src/bin/erase-member.ts +++ b/services/apps/data_sink_worker/src/bin/erase-member.ts @@ -1,36 +1,85 @@ -import fs from 'fs' -import path from 'path' +import * as readline from 'readline' -import { generateUUIDv1 } from '@crowd/common' import { SearchSyncWorkerEmitter } from '@crowd/common_services' import { DbStore, getDbConnection } from '@crowd/data-access-layer/src/database' import { getServiceChildLogger } from '@crowd/logging' import { QueueFactory } from '@crowd/queue' -import { MemberIdentityType } from '@crowd/types' import { DB_CONFIG, QUEUE_CONFIG } from '../conf' +/** + * Member Data Erasure Script (Database) + * + * This script completely removes a member and all associated data from the database + * for GDPR compliance and data deletion requests. It performs a comprehensive cleanup + * across multiple related tables while respecting foreign key constraints. + * + * WHAT THIS SCRIPT DOES: + * 1. Shows a detailed summary of all data to be deleted/modified + * 2. Requests user confirmation before proceeding + * 3. Performs the following operations in order: + * - Deletes from maintainersInternal (respects FK constraint with memberIdentities) + * - Deletes from all member-related tables (relations, segments, etc.) + * - Deletes the main member record + * - Triggers search index updates and organization re-sync + * + * FOREIGN KEY HANDLING: + * - maintainersInternal.identityId → memberIdentities.id + * Solution: Delete maintainersInternal records first before memberIdentities + * + * TABLES AFFECTED: + * - maintainersInternal (deleted by identityId from member's identities) + * - activityRelations, memberNoMerge, memberOrganizationAffiliationOverrides + * - memberOrganizations, memberSegmentAffiliations, memberSegments, memberSegmentsAgg + * - memberEnrichmentCache, memberEnrichments, memberIdentities + * - memberToMerge, memberToMergeRaw, memberBotSuggestions, memberNoBot + * - members (main record) + * + * SEARCH INDEX UPDATES: + * - Removes member from search indexes + * - Re-syncs any affected organizations + * + * USAGE: + * npm run script erase-member + * + * SAFETY FEATURES: + * - Shows detailed deletion summary before proceeding + * - Requires explicit user confirmation (Y/n) + * - Runs in a database transaction for atomicity + * - Comprehensive error handling and logging + */ + /* eslint-disable @typescript-eslint/no-explicit-any */ const log = getServiceChildLogger('erase-member') +/** + * Prompts the user for Y/n confirmation via command line input + */ +async function promptConfirmation(message: string): Promise { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }) + + return new Promise((resolve) => { + rl.question(`${message} (Y/n): `, (answer) => { + rl.close() + resolve(answer.toLowerCase() === 'y' || answer.toLowerCase() === 'yes' || answer === '') + }) + }) +} + const processArguments = process.argv.slice(2) -if (processArguments.length === 0 || processArguments.length % 2 !== 0) { - log.error( - ` - Expected argument in pairs which can be any of the following: - - ids ", , ..." - - email john@doe.com - - name "John Doe" - - (e.g. lfid someusername) - `, - ) +if (processArguments.length !== 1) { + log.error('Expected exactly one argument: memberId') process.exit(1) } +const memberId = processArguments[0] + setImmediate(async () => { - const manualCheckFile = `manual_check_member_ids.txt` const dbConnection = await getDbConnection(DB_CONFIG()) const store = new DbStore(log, dbConnection) const queueClient = QueueFactory.createQueueService(QUEUE_CONFIG()) @@ -38,268 +87,164 @@ setImmediate(async () => { const searchSyncWorkerEmitter = new SearchSyncWorkerEmitter(queueClient, log) await searchSyncWorkerEmitter.init() - const pairs = [] - for (let i = 0; i < processArguments.length; i += 2) { - pairs.push({ - type: processArguments[i], - value: processArguments[i + 1], - }) - } - - log.info( - `Erasing member based on input data: [${pairs - .map((p) => `${p.type} "${p.value}"`) - .join(', ')}]`, - ) - - const idParams = pairs.filter((p) => p.type === 'ids') - const idsToDelete: string[] = [] - for (const param of idParams) { - idsToDelete.push(...param.value.split(',').map((id) => id.trim())) - } + log.info(`Erasing member with ID: ${memberId}`) const orgDataMap: Map = new Map() const memberDataMap: Map = new Map() - if (idsToDelete.length > 0) { - for (const memberId of idsToDelete) { - try { - await store.transactionally(async (t) => { - // get organization id for a member to sync later - let orgResults: any[] - if (orgDataMap.has(memberId)) { - orgResults = orgDataMap.get(memberId) - } else { - orgResults = await store - .connection() - .any( - `select distinct "organizationId" from "activityRelations" where "memberId" = $(memberId)`, - { - memberId, - }, - ) - orgDataMap.set(memberId, orgResults) - } - - let memberData: any - if (memberDataMap.has(memberId)) { - memberData = memberDataMap.get(memberId) - } else { - memberData = await store - .connection() - .one(`select * from members where id = $(memberId)`, { - memberId, - }) - memberDataMap.set(memberId, memberData) - } + try { + // Show deletion summary and get confirmation + const summary = await getDeletionSummary(store, memberId) + console.log(summary) - log.info('CLEANUP ACTIVITIES...') + const proceed = await promptConfirmation('Do you want to proceed with the deletion?') - // delete the member and everything around it - await deleteMemberFromDb(t, memberId) + if (!proceed) { + log.info('Deletion cancelled by user') + process.exit(0) + } - await searchSyncWorkerEmitter.triggerRemoveMember(memberId, true) + await store.transactionally(async (t) => { + // get organization id for a member to sync later + let orgResults: any[] + if (orgDataMap.has(memberId)) { + orgResults = orgDataMap.get(memberId) + } else { + orgResults = await store + .connection() + .any( + `select distinct "organizationId" from "activityRelations" where "memberId" = $(memberId)`, + { + memberId, + }, + ) + orgDataMap.set(memberId, orgResults) + } - if (orgResults.length > 0) { - for (const orgResult of orgResults) { - if (orgResult.organizationId) { - await searchSyncWorkerEmitter.triggerOrganizationSync( - orgResult.organizationId, - true, - ) - } - } - } + let memberData: any + if (memberDataMap.has(memberId)) { + memberData = memberDataMap.get(memberId) + } else { + memberData = await store.connection().one(`select * from members where id = $(memberId)`, { + memberId, }) - } catch (err) { - log.error(err, { memberId }, 'Failed to erase member identity!') + memberDataMap.set(memberId, memberData) } - } - } else { - const nameIdentity = pairs.find((p) => p.type === 'name') - const otherIdentities = pairs.filter((p) => p.type !== 'name') - - if (otherIdentities.length > 0) { - const conditions: string[] = [] - const params: any = {} - let index = 0 - for (const pair of otherIdentities) { - params[`value_${index}`] = pair.value - if (pair.type === 'email') { - conditions.push( - `(type = '${MemberIdentityType.EMAIL}' and lower(value) = lower($(value_${index})))`, - ) - } else { - params[`platform_${index}`] = (pair.type as string).toLowerCase() - conditions.push( - `(platform = $(platform_${index}) and lower(value) = lower($(value_${index})))`, - ) - } - index++ - } + log.info('CLEANUP ACTIVITIES...') - const query = `select * from "memberIdentities" where ${conditions.join(' or ')}` - const existingIdentities = await store.connection().any(query, params) - - if (existingIdentities.length > 0) { - log.info(`Found ${existingIdentities.length} existing identities.`) - - const deletedMemberIds = [] - - for (const eIdentity of existingIdentities) { - try { - await store.transactionally(async (t) => { - // get organization id for a member to sync later - let orgResults: any[] - if (orgDataMap.has(eIdentity.memberId)) { - orgResults = orgDataMap.get(eIdentity.memberId) - } else { - orgResults = await store - .connection() - .any( - `select distinct "organizationId" from "activityRelations" where "memberId" = $(memberId)`, - { - memberId: eIdentity.memberId, - }, - ) - orgDataMap.set(eIdentity.memberId, orgResults) - } - - let memberData: any - if (memberDataMap.has(eIdentity.memberId)) { - memberData = memberDataMap.get(eIdentity.memberId) - } else { - memberData = await store - .connection() - .one(`select * from members where id = $(memberId)`, { - memberId: eIdentity.memberId, - }) - memberDataMap.set(eIdentity.memberId, memberData) - } - - // mark identity for erasure - await markIdentityForErasure(t, eIdentity.platform, eIdentity.type, eIdentity.value) - - if (!deletedMemberIds.includes(eIdentity.memberId)) { - if (eIdentity.verified) { - log.info({ tenantId: memberData.tenantId }, 'CLEANUP ACTIVITIES...') - - // delete the member and everything around it - await deleteMemberFromDb(t, eIdentity.memberId) - - // track so we don't delete the same member twice - deletedMemberIds.push(eIdentity.memberId) - - await searchSyncWorkerEmitter.triggerRemoveMember(eIdentity.memberId, true) - } else { - // just delete the identity - await deleteMemberIdentity( - t, - eIdentity.memberId, - eIdentity.platform, - eIdentity.type, - eIdentity.value, - ) - await searchSyncWorkerEmitter.triggerMemberSync(eIdentity.memberId, true) - } - - if (orgResults.length > 0) { - for (const orgResult of orgResults) { - if (orgResult.organizationId) { - await searchSyncWorkerEmitter.triggerOrganizationSync( - orgResult.organizationId, - true, - ) - } - } - } - } - }) - } catch (err) { - log.error(err, { eIdentity }, 'Failed to erase member identity!') - } - } - } - } + // delete the member and everything around it + await deleteMemberFromDb(t, memberId) - if (nameIdentity) { - const results = await store - .connection() - .any(`select id from members where lower("displayName") = lower($(name))`, { - name: nameIdentity.value.trim(), - }) + await searchSyncWorkerEmitter.triggerRemoveMember(memberId, true) - if (results.length > 0) { - addLinesToFile(manualCheckFile, [ - `name: ${nameIdentity.value}, member ids: [${results.map((r) => r.id).join(', ')}]`, - ]) - log.warn( - `Found ${results.length} members with name: ${ - nameIdentity.value - }! Manual check required for member ids: [${results.map((r) => r.id).join(', ')}]!`, - ) + if (orgResults.length > 0) { + for (const orgResult of orgResults) { + if (orgResult.organizationId) { + await searchSyncWorkerEmitter.triggerOrganizationSync(orgResult.organizationId, true) + } + } } - } + }) + } catch (err) { + log.error(err, { memberId }, 'Failed to erase member!') } process.exit(0) }) -async function markIdentityForErasure( - store: DbStore, - platform: string, - type: string, - value: string, -): Promise { - await store.connection().none( - ` - insert into "requestedForErasureMemberIdentities" (id, platform, type, value) - values ($(id), $(platform), $(type), $(value)) - `, - { - id: generateUUIDv1(), - platform, - type, - value, - }, - ) -} - -async function deleteMemberIdentity( - store: DbStore, - memberId: string, - platform: string, - type: string, - value: string, -): Promise { - const result = await store.connection().result( - `delete from "memberIdentities" where - "memberId" = $(memberId) and - platform = $(platform) and - type = $(type) and - value = $(value)`, - { +/** + * Generates a comprehensive summary of all data that will be deleted or modified + * for the specified member. Queries each table to provide exact record counts. + * + * @param store - Database store instance + * @param memberId - The member ID to analyze + * @returns Formatted summary string showing what will be affected + */ +async function getDeletionSummary(store: DbStore, memberId: string): Promise { + let summary = `\n=== DELETION SUMMARY FOR MEMBER ${memberId} ===\n` + + // Count activities that will be updated (objectMemberId set to null) + const activityRelationsUpdate = await store + .connection() + .one(`select count(*) as count from "activityRelations" where "objectMemberId" = $(memberId)`, { memberId, - platform, - type, - value, - }, + }) + if (parseInt(activityRelationsUpdate.count) > 0) { + summary += `- ${activityRelationsUpdate.count} activityRelations will have objectMemberId/objectMemberUsername cleared\n` + } + + // Count maintainersInternal records to be deleted + const maintainersCount = await store.connection().one( + `select count(*) as count from "maintainersInternal" where "identityId" in ( + select id from "memberIdentities" where "memberId" = $(memberId) + )`, + { memberId }, ) + if (parseInt(maintainersCount.count) > 0) { + summary += `- ${maintainersCount.count} maintainersInternal records will be deleted\n` + } - if (result.rowCount === 0) { - throw new Error( - `Failed to delete member identity - memberId ${memberId}, platform: ${platform}, type: ${type}, value: ${value}!`, - ) + // Count records in each table to be deleted + const tablesToDelete: Map = new Map([ + ['activityRelations', ['memberId']], + ['memberNoMerge', ['memberId', 'noMergeId']], + ['memberOrganizationAffiliationOverrides', ['memberId']], + ['memberOrganizations', ['memberId']], + ['memberSegmentAffiliations', ['memberId']], + ['memberSegments', ['memberId']], + ['memberSegmentsAgg', ['memberId']], + ['memberEnrichmentCache', ['memberId']], + ['memberEnrichments', ['memberId']], + ['memberIdentities', ['memberId']], + ['memberToMerge', ['memberId', 'toMergeId']], + ['memberToMergeRaw', ['memberId', 'toMergeId']], + ['memberBotSuggestions', ['memberId']], + ['memberNoBot', ['memberId']], + ]) + + for (const [table, memberIdColumns] of tablesToDelete) { + const condition = memberIdColumns.map((c) => `"${c}" = $(memberId)`).join(' or ') + const result = await store + .connection() + .one(`select count(*) as count from "${table}" where ${condition}`, { memberId }) + if (parseInt(result.count) > 0) { + summary += `- ${result.count} records from ${table}\n` + } } + + // Count main member record + const memberExists = await store + .connection() + .one(`select count(*) as count from members where id = $(memberId)`, { memberId }) + if (parseInt(memberExists.count) > 0) { + summary += `- 1 member record\n` + } + + summary += `\n` + return summary } +/** + * Performs the actual deletion of a member and all associated data from the database. + * This function handles the complex deletion order required by foreign key constraints. + * + * DELETION ORDER: + * 1. Clear activityRelations.objectMemberId references (update, not delete) + * 2. Delete maintainersInternal records (by identityId from memberIdentities) + * 3. Delete from all member-related tables + * 4. Delete the main member record + * + * @param store - Database store instance (should be within a transaction) + * @param memberId - The member ID to delete + */ export async function deleteMemberFromDb(store: DbStore, memberId: string): Promise { let result = await store.connection().result( ` - update activities set + update "activityRelations" set "objectMemberId" = null, - "objectMemberUsername" = null + "objectMemberUsername" = null, + "updatedAt" = now() where "objectMemberId" is not null and "objectMemberId" = $(memberId) `, { @@ -309,11 +254,23 @@ export async function deleteMemberFromDb(store: DbStore, memberId: string): Prom if (result.rowCount > 0) { log.info( - `Cleared ${result.rowCount} activities."objectMemberId" and activities."objectMemberUsername" for memberId ${memberId}!`, + `Cleared ${result.rowCount} activityRelations."objectMemberId" and activityRelations."objectMemberUsername" for memberId ${memberId}!`, ) } - // TODO: Understand what should be done with maintainersInternal + // Delete from maintainersInternal first (foreign key constraint with memberIdentities.id) + result = await store.connection().result( + `delete from "maintainersInternal" where "identityId" in ( + select id from "memberIdentities" where "memberId" = $(memberId) + )`, + { memberId }, + ) + + if (result.rowCount > 0) { + log.info( + `Deleted ${result.rowCount} rows from table maintainersInternal for member ${memberId}!`, + ) + } const tablesToDelete: Map = new Map([ ['activities', ['memberId']], @@ -358,24 +315,3 @@ export async function deleteMemberFromDb(store: DbStore, memberId: string): Prom throw new Error(`Failed to delete member - memberId ${memberId}!`) } } - -function addLinesToFile(filePath: string, lines: string[]) { - try { - // Ensure the directory exists - fs.mkdirSync(path.dirname(filePath), { recursive: true }) - - // Check if the file exists - try { - fs.accessSync(filePath) - - // File exists, append lines - fs.appendFileSync(filePath, lines.join('\n') + '\n') - } catch (error) { - // File doesn't exist, create it and write lines - fs.writeFileSync(filePath, lines.join('\n') + '\n') - } - } catch (err) { - log.error(err, { filePath }, 'Error while writing to file!') - throw err - } -} diff --git a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts index d884a74b92..1ad77ac583 100644 --- a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts +++ b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts @@ -1,16 +1,54 @@ +import * as readline from 'readline' + import { TINYBIRD_CONFIG } from '../conf' const TINYBIRD_API_URL = 'https://api.us-west-2.aws.tinybird.co/v0/datasources' -const DATA_SOURCES = ['activities', 'activityRelations', 'members', 'memberIdentities'] +const DATA_SOURCES = ['activityRelations', 'members', 'maintainersInternal', 'memberIdentities'] const TOKEN = TINYBIRD_CONFIG().token /** - * This uses "Delete Data Selectively" from Tinybird: + * Member Data Erasure Script (Tinybird Analytics Platform) + * + * This script removes member data from Tinybird datasources for GDPR compliance + * and data deletion requests. It complements the database deletion script by + * cleaning up analytical data stored in Tinybird. + * + * WHAT THIS SCRIPT DOES: + * 1. Shows a detailed summary of records to be deleted from each Tinybird datasource + * 2. Requests user confirmation before proceeding + * 3. Deletes data from Tinybird datasources in the correct order to respect dependencies + * 4. Handles special cases like maintainersInternal which requires identityId-based deletion + * + * TINYBIRD INTEGRATION: + * Uses "Delete Data Selectively" API from Tinybird: * https://www.tinybird.co/docs/classic/get-data-in/data-operations/replace-and-delete-data#delete-data-selectively * - * It deletes member data for GDPR compliance from the `members` and `activityRelations` datasources. - * All datasources created from pipes based on these tables will reflect the deletions within one hour, - * as the relevant copy pipes are scheduled to run hourly. + * DATASOURCES AFFECTED (in deletion order): + * 1. activityRelations - Activity relationship records (deleted by memberId) + * 2. members - Member profile data (deleted by memberId) + * 3. maintainersInternal - Repository maintainer records (deleted by identityId from member's identities) + * 4. memberIdentities - Member identity records (deleted by memberId) + * + * FOREIGN KEY HANDLING: + * - maintainersInternal.identityId → memberIdentities.id + * Solution: Use subquery in delete condition - 'identityId IN (SELECT id FROM memberIdentities WHERE memberId = ?)' + * + * DOWNSTREAM EFFECTS: + * All datasources created from pipes based on these tables will reflect the deletions + * after the relevant copy pipes run (typically scheduled hourly). + * + * USAGE: + * npm run script erase-members-data-tinybird + * + * REQUIREMENTS: + * - TINYBIRD_TOKEN environment variable must be set + * - Token must have delete permissions on the specified datasources + * + * SAFETY FEATURES: + * - Shows detailed deletion summary with record counts before proceeding + * - Requires explicit user confirmation (Y/n) + * - Graceful error handling for API failures + * - Special validation for maintainersInternal dependencies */ const args = process.argv.slice(2) @@ -22,10 +60,108 @@ if (args.length !== 1) { const memberId = args[0] +/** + * Prompts the user for Y/n confirmation via command line input + */ +async function promptConfirmation(message: string): Promise { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }) + + return new Promise((resolve) => { + rl.question(`${message} (Y/n): `, (answer) => { + rl.close() + resolve(answer.toLowerCase() === 'y' || answer.toLowerCase() === 'yes' || answer === '') + }) + }) +} + +/** + * Queries Tinybird to get the count of records matching a condition in a specific datasource + * + * @param tableName - The Tinybird datasource name + * @param condition - SQL WHERE condition to count matching records + * @returns Number of matching records, or 0 if query fails + */ +async function getRecordCount(tableName: string, condition: string): Promise { + const query = `SELECT count() as count FROM ${tableName} WHERE ${condition}` + const url = `https://api.us-west-2.aws.tinybird.co/v0/sql` + + const params = new URLSearchParams({ + q: query, + }) + + const response = await fetch(`${url}?${params}`, { + headers: { + Authorization: `Bearer ${TOKEN}`, + }, + }) + + if (!response.ok) { + console.warn(`Failed to get count for ${tableName}: ${response.statusText}`) + return 0 + } + + const data = (await response.json()) as { data?: Array<{ count: number }> } + return data.data?.[0]?.count || 0 +} + +/** + * Generates a comprehensive summary of all data that will be deleted from Tinybird + * datasources for the specified member. Queries each datasource to provide exact record counts. + * + * Handles special logic for maintainersInternal using a subquery to count records + * by identityId from the member's identities. + * + * @param memberId - The member ID to analyze + * @returns Formatted summary string showing what will be deleted from each datasource + */ +async function getTinybirdDeletionSummary(memberId: string): Promise { + let summary = `\n=== TINYBIRD DELETION SUMMARY FOR MEMBER ${memberId} ===\n` + + for (const table of DATA_SOURCES) { + let condition: string + + if (table === 'maintainersInternal') { + // Use subquery to count maintainersInternal records by identityId + condition = `identityId IN (SELECT id FROM memberIdentities WHERE memberId = '${memberId}')` + } else { + condition = `memberId = '${memberId}'` + } + + const count = await getRecordCount(table, condition) + if (count > 0) { + summary += `- ${count} records from ${table}\n` + } + } + + summary += `\n` + return summary +} + +/** + * Deletes member data from a specific Tinybird datasource using the appropriate condition. + * + * For most datasources, deletes by memberId directly. + * For maintainersInternal, uses a subquery to delete by identityId from the member's identities. + * + * @param tableName - The Tinybird datasource name + * @param memberId - The member ID to delete data for + */ async function deleteFromDataSource(tableName: string, memberId: string) { const url = `${TINYBIRD_API_URL}/${tableName}/delete` + let deleteCondition: string + + if (tableName === 'maintainersInternal') { + // Delete maintainersInternal using subquery to get identityIds from memberIdentities + deleteCondition = `identityId IN (SELECT id FROM memberIdentities WHERE memberId = '${memberId}')` + } else { + deleteCondition = `memberId = '${memberId}'` + } + const body = new URLSearchParams({ - delete_condition: `memberId = '${memberId}'`, + delete_condition: deleteCondition, }) const response = await fetch(url, { @@ -52,7 +188,21 @@ async function main() { process.exit(1) } - for (const table of DATA_SOURCES) { + // Show deletion summary and get confirmation + const summary = await getTinybirdDeletionSummary(memberId) + console.log(summary) + + const proceed = await promptConfirmation('Do you want to proceed with the Tinybird deletion?') + + if (!proceed) { + console.log('Deletion cancelled by user') + process.exit(0) + } + + // Process in order to respect foreign key constraints - maintainersInternal before memberIdentities + const orderedTables = ['activityRelations', 'members', 'maintainersInternal', 'memberIdentities'] + + for (const table of orderedTables) { try { await deleteFromDataSource(table, memberId) } catch (err) { diff --git a/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts b/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts new file mode 100644 index 0000000000..6f3ba31426 --- /dev/null +++ b/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts @@ -0,0 +1,216 @@ +import * as readline from 'readline' + +import { DbStore, getDbConnection } from '@crowd/data-access-layer/src/database' +import { getServiceChildLogger } from '@crowd/logging' + +import { DB_CONFIG } from '../conf' + +/** + * Member Data Soft Deletion Script (QuestDB Analytics) + * + * This script performs soft deletion of member data in QuestDB for GDPR compliance + * and data deletion requests. Unlike hard deletion scripts, this marks records as + * deleted and clears sensitive references while preserving analytical structure. + * + * WHAT THIS SCRIPT DOES: + * 1. Shows a detailed summary of all data to be soft-deleted/modified in QuestDB + * 2. Requests user confirmation before proceeding + * 3. Performs the following operations: + * - Sets deletedAt timestamp on activities records matching memberId + * - Clears objectMemberId and objectMemberUsername references in activities + * - Updates updatedAt timestamp to reflect the changes + * + * SOFT DELETION APPROACH: + * - Records are not physically deleted, but marked with deletedAt timestamp + * - This preserves referential integrity and analytical structure + * - Queries with proper deletedAt filtering will exclude these records + * - Allows for potential data recovery if needed + * + * TABLES AFFECTED: + * - activities: Records where memberId matches are marked as deleted + * - activities: Records where objectMemberId matches have references cleared + * + * QUESTDB CONSIDERATIONS: + * - QuestDB uses update statements to modify existing records + * - deletedAt is set to current timestamp (NOW()) + * - updatedAt is also updated to reflect the modification time + * + * USAGE: + * npm run script erasure-members-data-questdb + * + * REQUIREMENTS: + * - QuestDB database connection configured in DB_CONFIG + * - Proper permissions to UPDATE activities table + * + * SAFETY FEATURES: + * - Shows detailed summary with record counts before proceeding + * - Requires explicit user confirmation (Y/n) + * - Comprehensive error handling and logging + * - Preserves data for potential recovery + * + * RELATIONSHIP TO OTHER SCRIPTS: + * - Complements erase-member.ts (PostgreSQL hard deletion) + * - Complements erase-members-data-tinybird.ts (Tinybird hard deletion) + * - Should be run before hard deletion scripts if data recovery might be needed + */ + +/* eslint-disable @typescript-eslint/no-explicit-any */ + +const log = getServiceChildLogger('erasure-members-data-questdb') + +/** + * Prompts the user for Y/n confirmation via command line input + */ +async function promptConfirmation(message: string): Promise { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }) + + return new Promise((resolve) => { + rl.question(`${message} (Y/n): `, (answer) => { + rl.close() + resolve(answer.toLowerCase() === 'y' || answer.toLowerCase() === 'yes' || answer === '') + }) + }) +} + +/** + * Generates a comprehensive summary of all data that will be soft-deleted or modified + * in QuestDB for the specified member. Queries the activities table to provide exact counts. + * + * @param store - Database store instance + * @param memberId - The member ID to analyze + * @returns Formatted summary string showing what will be affected + */ +async function getQuestDbDeletionSummary(store: DbStore, memberId: string): Promise { + let summary = `\n=== QUESTDB SOFT DELETION SUMMARY FOR MEMBER ${memberId} ===\n` + + // Count activities that will be marked as deleted (where memberId matches) + const activitiesToDelete = await store + .connection() + .one( + `SELECT count(*) as count FROM activities WHERE "memberId" = $(memberId) AND "deletedAt" IS NULL`, + { + memberId, + }, + ) + if (parseInt(activitiesToDelete.count) > 0) { + summary += `- ${activitiesToDelete.count} activities will be marked as deleted (deletedAt set)\n` + } + + // Count activities that will have object member references cleared + const activitiesToClear = await store + .connection() + .one( + `SELECT count(*) as count FROM activities WHERE "objectMemberId" = $(memberId) AND "deletedAt" IS NULL`, + { + memberId, + }, + ) + if (parseInt(activitiesToClear.count) > 0) { + summary += `- ${activitiesToClear.count} activities will have objectMemberId/objectMemberUsername cleared\n` + } + + // Check for overlap (records that will have both operations applied) + const overlappingRecords = await store + .connection() + .one( + `SELECT count(*) as count FROM activities WHERE "memberId" = $(memberId) AND "objectMemberId" = $(memberId) AND "deletedAt" IS NULL`, + { memberId }, + ) + if (parseInt(overlappingRecords.count) > 0) { + summary += `- ${overlappingRecords.count} activities will have both operations applied (marked deleted AND references cleared)\n` + } + + summary += `\n` + return summary +} + +/** + * Performs the actual soft deletion of member data in QuestDB activities table. + * This function handles both marking records as deleted and clearing object member references. + * + * OPERATIONS PERFORMED: + * 1. Mark activities as deleted where memberId matches + * 2. Clear objectMemberId and objectMemberUsername where they reference the member + * + * @param store - Database store instance (should be within a transaction) + * @param memberId - The member ID to soft delete + */ +async function softDeleteMemberFromQuestDb(store: DbStore, memberId: string): Promise { + // Mark activities as deleted where memberId matches + let result = await store.connection().result( + ` + UPDATE activities SET + "deletedAt" = NOW(), + "updatedAt" = NOW() + WHERE "memberId" = $(memberId) AND "deletedAt" IS NULL + `, + { memberId }, + ) + + if (result.rowCount > 0) { + log.info(`Marked ${result.rowCount} activities as deleted for memberId ${memberId}`) + } + + // Clear objectMemberId and objectMemberUsername references + result = await store.connection().result( + ` + UPDATE activities SET + "objectMemberId" = NULL, + "objectMemberUsername" = NULL, + "updatedAt" = NOW() + WHERE "objectMemberId" = $(memberId) AND "deletedAt" IS NULL + `, + { memberId }, + ) + + if (result.rowCount > 0) { + log.info( + `Cleared objectMember references in ${result.rowCount} activities for memberId ${memberId}`, + ) + } +} + +const processArguments = process.argv.slice(2) + +if (processArguments.length !== 1) { + log.error('Expected exactly one argument: memberId') + process.exit(1) +} + +const memberId = processArguments[0] + +setImmediate(async () => { + const dbConnection = await getDbConnection(DB_CONFIG()) + const store = new DbStore(log, dbConnection) + + log.info(`Soft deleting member data from QuestDB for member ID: ${memberId}`) + + try { + // Show deletion summary and get confirmation + const summary = await getQuestDbDeletionSummary(store, memberId) + console.log(summary) + + const proceed = await promptConfirmation( + 'Do you want to proceed with the QuestDB soft deletion?', + ) + + if (!proceed) { + log.info('Soft deletion cancelled by user') + process.exit(0) + } + + await store.transactionally(async (t) => { + // Perform soft deletion operations + await softDeleteMemberFromQuestDb(t, memberId) + }) + + log.info('QuestDB soft deletion completed successfully') + } catch (err) { + log.error(err, { memberId }, 'Failed to soft delete member from QuestDB!') + } + + process.exit(0) +}) From 369140d3c0d9ecdf166758821188441bb8168026 Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Thu, 11 Sep 2025 13:50:00 +0100 Subject: [PATCH 05/13] chore: further cleanup Signed-off-by: Joana Maia --- .../config/custom-environment-variables.json | 3 -- .../data_sink_worker/src/bin/erase-member.ts | 44 ++++++++++++++++++- .../src/bin/erase-members-data-tinybird.ts | 17 +++---- .../apps/data_sink_worker/src/conf/index.ts | 12 ----- 4 files changed, 47 insertions(+), 29 deletions(-) diff --git a/services/apps/data_sink_worker/config/custom-environment-variables.json b/services/apps/data_sink_worker/config/custom-environment-variables.json index 6666602960..07554b54d9 100644 --- a/services/apps/data_sink_worker/config/custom-environment-variables.json +++ b/services/apps/data_sink_worker/config/custom-environment-variables.json @@ -33,8 +33,5 @@ }, "github": { "isSnowflakeEnabled": "CROWD_GITHUB_IS_SNOWFLAKE_ENABLED" - }, - "tinybird": { - "token": "CROWD_TINYBIRD_TOKEN" } } diff --git a/services/apps/data_sink_worker/src/bin/erase-member.ts b/services/apps/data_sink_worker/src/bin/erase-member.ts index f8be6bd1ed..7dfdc74c94 100644 --- a/services/apps/data_sink_worker/src/bin/erase-member.ts +++ b/services/apps/data_sink_worker/src/bin/erase-member.ts @@ -18,6 +18,7 @@ import { DB_CONFIG, QUEUE_CONFIG } from '../conf' * 1. Shows a detailed summary of all data to be deleted/modified * 2. Requests user confirmation before proceeding * 3. Performs the following operations in order: + * - Archives member identities to requestedForErasureMemberIdentities (separate step) * - Deletes from maintainersInternal (respects FK constraint with memberIdentities) * - Deletes from all member-related tables (relations, segments, etc.) * - Deletes the main member record @@ -29,6 +30,7 @@ import { DB_CONFIG, QUEUE_CONFIG } from '../conf' * * TABLES AFFECTED: * - maintainersInternal (deleted by identityId from member's identities) + * - requestedForErasureMemberIdentities (memberIdentities are inserted here before deletion) * - activityRelations, memberNoMerge, memberOrganizationAffiliationOverrides * - memberOrganizations, memberSegmentAffiliations, memberSegments, memberSegmentsAgg * - memberEnrichmentCache, memberEnrichments, memberIdentities @@ -133,6 +135,9 @@ setImmediate(async () => { log.info('CLEANUP ACTIVITIES...') + // Archive member identities before deletion + await archiveMemberIdentities(t, memberId) + // delete the member and everything around it await deleteMemberFromDb(t, memberId) @@ -209,7 +214,11 @@ async function getDeletionSummary(store: DbStore, memberId: string): Promise 0) { - summary += `- ${result.count} records from ${table}\n` + if (table === 'memberIdentities') { + summary += `- ${result.count} records from ${table} (will be inserted into requestedForErasureMemberIdentities first)\n` + } else { + summary += `- ${result.count} records from ${table}\n` + } } } @@ -225,6 +234,36 @@ async function getDeletionSummary(store: DbStore, memberId: string): Promise { + const insertResult = await store.connection().result( + ` + INSERT INTO "requestedForErasureMemberIdentities" ( + id, "memberId", platform, value, "sourceId", "integrationId", type, verified, "createdAt", "updatedAt" + ) + SELECT id, "memberId", platform, value, "sourceId", "integrationId", type, verified, "createdAt", NOW() + FROM "memberIdentities" + WHERE "memberId" = $(memberId) + `, + { memberId }, + ) + + if (insertResult.rowCount > 0) { + log.info( + `Archived ${insertResult.rowCount} memberIdentities to requestedForErasureMemberIdentities for member ${memberId}`, + ) + } + + return insertResult.rowCount +} + /** * Performs the actual deletion of a member and all associated data from the database. * This function handles the complex deletion order required by foreign key constraints. @@ -232,7 +271,7 @@ async function getDeletionSummary(store: DbStore, memberId: string): Promise `"${c}" = $(memberId)`).join(' or ') result = await store .connection() diff --git a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts index 1ad77ac583..647bd5564f 100644 --- a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts +++ b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts @@ -1,10 +1,7 @@ import * as readline from 'readline' -import { TINYBIRD_CONFIG } from '../conf' - const TINYBIRD_API_URL = 'https://api.us-west-2.aws.tinybird.co/v0/datasources' const DATA_SOURCES = ['activityRelations', 'members', 'maintainersInternal', 'memberIdentities'] -const TOKEN = TINYBIRD_CONFIG().token /** * Member Data Erasure Script (Tinybird Analytics Platform) @@ -38,10 +35,10 @@ const TOKEN = TINYBIRD_CONFIG().token * after the relevant copy pipes run (typically scheduled hourly). * * USAGE: - * npm run script erase-members-data-tinybird + * npm run script erase-members-data-tinybird * * REQUIREMENTS: - * - TINYBIRD_TOKEN environment variable must be set + * - Tinybird token must be provided as command line argument * - Token must have delete permissions on the specified datasources * * SAFETY FEATURES: @@ -53,12 +50,13 @@ const TOKEN = TINYBIRD_CONFIG().token const args = process.argv.slice(2) -if (args.length !== 1) { - console.error('Usage: deleteMemberTinybird.ts ') +if (args.length !== 2) { + console.error('Usage: erase-members-data-tinybird.ts ') process.exit(1) } const memberId = args[0] +const TOKEN = args[1] /** * Prompts the user for Y/n confirmation via command line input @@ -183,11 +181,6 @@ async function deleteFromDataSource(tableName: string, memberId: string) { } async function main() { - if (!TOKEN) { - console.error('TINYBIRD_TOKEN environment variable not set!') - process.exit(1) - } - // Show deletion summary and get confirmation const summary = await getTinybirdDeletionSummary(memberId) console.log(summary) diff --git a/services/apps/data_sink_worker/src/conf/index.ts b/services/apps/data_sink_worker/src/conf/index.ts index 2bcd514af6..ec71246f7b 100644 --- a/services/apps/data_sink_worker/src/conf/index.ts +++ b/services/apps/data_sink_worker/src/conf/index.ts @@ -20,10 +20,6 @@ export interface IWorkerConfig { queuePriorityLevel: QueuePriorityLevel } -export interface ITinybirdConfig { - token: string -} - let workerSettings: IWorkerConfig export const WORKER_SETTINGS = (): IWorkerConfig => { if (workerSettings) return workerSettings @@ -84,11 +80,3 @@ export const GITHUB_CONFIG = (): IGithubConfig => { githubConfig = config.get('github') return githubConfig } - -let tinybirdConfig: ITinybirdConfig -export const TINYBIRD_CONFIG = (): ITinybirdConfig => { - if (tinybirdConfig) return tinybirdConfig - - tinybirdConfig = config.get('tinybird') - return tinybirdConfig -} From 21417123499b95b528424f28b0bf630f3a477833 Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Thu, 11 Sep 2025 14:34:23 +0100 Subject: [PATCH 06/13] chore: further cleanup Signed-off-by: Joana Maia --- .../src/bin/erasure-members-data-questdb.ts | 150 ++++++++++-------- 1 file changed, 80 insertions(+), 70 deletions(-) diff --git a/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts b/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts index 6f3ba31426..376586fb10 100644 --- a/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts +++ b/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts @@ -1,57 +1,61 @@ import * as readline from 'readline' -import { DbStore, getDbConnection } from '@crowd/data-access-layer/src/database' +import { generateUUIDv1 } from '@crowd/common' +import { DbStore } from '@crowd/data-access-layer/src/database' import { getServiceChildLogger } from '@crowd/logging' - -import { DB_CONFIG } from '../conf' +import { getClientSQL } from '@crowd/questdb' /** - * Member Data Soft Deletion Script (QuestDB Analytics) + * Member Data Anonymization Script (QuestDB Analytics) * - * This script performs soft deletion of member data in QuestDB for GDPR compliance - * and data deletion requests. Unlike hard deletion scripts, this marks records as - * deleted and clears sensitive references while preserving analytical structure. + * This script performs anonymization and soft deletion of member data in QuestDB for GDPR compliance + * and data deletion requests. Unlike hard deletion scripts, this replaces identifying data with + * anonymous dummy values while preserving analytical structure and marking records as deleted. * * WHAT THIS SCRIPT DOES: - * 1. Shows a detailed summary of all data to be soft-deleted/modified in QuestDB + * 1. Shows a detailed summary of all data to be anonymized/soft-deleted in QuestDB * 2. Requests user confirmation before proceeding * 3. Performs the following operations: + * - Replaces memberId with random UUID and username with deleted-{UUID} + * - Replaces objectMemberId with random UUID and objectMemberUsername with deleted-{UUID} * - Sets deletedAt timestamp on activities records matching memberId - * - Clears objectMemberId and objectMemberUsername references in activities * - Updates updatedAt timestamp to reflect the changes * - * SOFT DELETION APPROACH: + * ANONYMIZATION APPROACH: * - Records are not physically deleted, but marked with deletedAt timestamp - * - This preserves referential integrity and analytical structure + * - Identifying data (memberId, username) is replaced with anonymous dummy values + * - Uses random UUIDs to ensure no collision with real data + * - Different UUIDs are used for member vs objectMember references to prevent correlation + * - Preserves referential integrity and analytical structure * - Queries with proper deletedAt filtering will exclude these records - * - Allows for potential data recovery if needed * * TABLES AFFECTED: - * - activities: Records where memberId matches are marked as deleted - * - activities: Records where objectMemberId matches have references cleared + * - activities: Records where memberId matches are anonymized and marked deleted + * - activities: Records where objectMemberId matches have object references anonymized * * QUESTDB CONSIDERATIONS: * - QuestDB uses update statements to modify existing records * - deletedAt is set to current timestamp (NOW()) * - updatedAt is also updated to reflect the modification time + * - Anonymized data uses format: memberId = random-uuid, username = deleted-{random-uuid} * * USAGE: * npm run script erasure-members-data-questdb * * REQUIREMENTS: - * - QuestDB database connection configured in DB_CONFIG + * - QuestDB database connection configured via CROWD_QUESTDB_SQL_* environment variables * - Proper permissions to UPDATE activities table * * SAFETY FEATURES: - * - Shows detailed summary with record counts before proceeding + * - Shows detailed anonymization summary with record counts before proceeding * - Requires explicit user confirmation (Y/n) * - Comprehensive error handling and logging - * - Preserves data for potential recovery + * - Data is anonymized, not destroyed, allowing for analytical continuity * * RELATIONSHIP TO OTHER SCRIPTS: * - Complements erase-member.ts (PostgreSQL hard deletion) * - Complements erase-members-data-tinybird.ts (Tinybird hard deletion) - * - Should be run before hard deletion scripts if data recovery might be needed + * - Can be run independently to anonymize QuestDB data while preserving analytics */ /* eslint-disable @typescript-eslint/no-explicit-any */ @@ -76,99 +80,107 @@ async function promptConfirmation(message: string): Promise { } /** - * Generates a comprehensive summary of all data that will be soft-deleted or modified + * Generates a comprehensive summary of all data that will be anonymized and soft-deleted * in QuestDB for the specified member. Queries the activities table to provide exact counts. * - * @param store - Database store instance + * @param qdbStore - QuestDB store instance * @param memberId - The member ID to analyze - * @returns Formatted summary string showing what will be affected + * @returns Formatted summary string showing what will be anonymized */ -async function getQuestDbDeletionSummary(store: DbStore, memberId: string): Promise { - let summary = `\n=== QUESTDB SOFT DELETION SUMMARY FOR MEMBER ${memberId} ===\n` +async function getQuestDbDeletionSummary(qdbStore: DbStore, memberId: string): Promise { + let summary = `\n=== QUESTDB ANONYMIZATION SUMMARY FOR MEMBER ${memberId} ===\n` - // Count activities that will be marked as deleted (where memberId matches) - const activitiesToDelete = await store + // Count activities that will be anonymized and marked as deleted (where memberId matches) + const activitiesToAnonymize = await qdbStore .connection() .one( `SELECT count(*) as count FROM activities WHERE "memberId" = $(memberId) AND "deletedAt" IS NULL`, - { - memberId, - }, + { memberId }, ) - if (parseInt(activitiesToDelete.count) > 0) { - summary += `- ${activitiesToDelete.count} activities will be marked as deleted (deletedAt set)\n` + if (parseInt(activitiesToAnonymize.count) > 0) { + summary += `- ${activitiesToAnonymize.count} activities will be anonymized (memberId → random UUID, username → deleted-{UUID}) and marked deleted\n` } - // Count activities that will have object member references cleared - const activitiesToClear = await store + // Count activities that will have object member references anonymized + const activitiesToAnonymizeObject = await qdbStore .connection() .one( `SELECT count(*) as count FROM activities WHERE "objectMemberId" = $(memberId) AND "deletedAt" IS NULL`, - { - memberId, - }, + { memberId }, ) - if (parseInt(activitiesToClear.count) > 0) { - summary += `- ${activitiesToClear.count} activities will have objectMemberId/objectMemberUsername cleared\n` + if (parseInt(activitiesToAnonymizeObject.count) > 0) { + summary += `- ${activitiesToAnonymizeObject.count} activities will have objectMember references anonymized (objectMemberId → random UUID, objectMemberUsername → deleted-{UUID})\n` } // Check for overlap (records that will have both operations applied) - const overlappingRecords = await store + const overlappingRecords = await qdbStore .connection() .one( `SELECT count(*) as count FROM activities WHERE "memberId" = $(memberId) AND "objectMemberId" = $(memberId) AND "deletedAt" IS NULL`, { memberId }, ) if (parseInt(overlappingRecords.count) > 0) { - summary += `- ${overlappingRecords.count} activities will have both operations applied (marked deleted AND references cleared)\n` + summary += `- ${overlappingRecords.count} activities will have both member and objectMember data anonymized\n` } - summary += `\n` + summary += `\nNOTE: Different random UUIDs will be used for member vs objectMember references to prevent correlation.\n\n` return summary } /** - * Performs the actual soft deletion of member data in QuestDB activities table. - * This function handles both marking records as deleted and clearing object member references. + * Performs anonymization and soft deletion of member data in QuestDB activities table. + * This function replaces identifying data with dummy values and marks records as deleted. * * OPERATIONS PERFORMED: - * 1. Mark activities as deleted where memberId matches - * 2. Clear objectMemberId and objectMemberUsername where they reference the member + * 1. Replace memberId with random UUID and username with deleted-${uuid} where memberId matches + * 2. Replace objectMemberId with random UUID and objectMemberUsername with deleted-${uuid} where they reference the member * - * @param store - Database store instance (should be within a transaction) - * @param memberId - The member ID to soft delete + * @param qdbStore - QuestDB store instance + * @param memberId - The member ID to anonymize and soft delete */ -async function softDeleteMemberFromQuestDb(store: DbStore, memberId: string): Promise { - // Mark activities as deleted where memberId matches - let result = await store.connection().result( +async function softDeleteMemberFromQuestDb(qdbStore: DbStore, memberId: string): Promise { + // Generate random UUID for anonymization + const anonymousUuid = generateUUIDv1() + const anonymousUsername = `deleted-${anonymousUuid}` + + // Anonymize activities where memberId matches + let result = await qdbStore.connection().result( ` UPDATE activities SET + "memberId" = $(anonymousUuid), + "username" = $(anonymousUsername), "deletedAt" = NOW(), "updatedAt" = NOW() WHERE "memberId" = $(memberId) AND "deletedAt" IS NULL `, - { memberId }, + { memberId, anonymousUuid, anonymousUsername }, ) if (result.rowCount > 0) { - log.info(`Marked ${result.rowCount} activities as deleted for memberId ${memberId}`) + log.info( + `Anonymized and marked ${result.rowCount} activities as deleted for memberId ${memberId}`, + ) } - // Clear objectMemberId and objectMemberUsername references - result = await store.connection().result( + // Generate separate UUID for object member references to avoid correlation + const objectAnonymousUuid = generateUUIDv1() + const objectAnonymousUsername = `deleted-${objectAnonymousUuid}` + + // Anonymize objectMemberId and objectMemberUsername references + result = await qdbStore.connection().result( ` UPDATE activities SET - "objectMemberId" = NULL, - "objectMemberUsername" = NULL, + "objectMemberId" = $(objectAnonymousUuid), + "objectMemberUsername" = $(objectAnonymousUsername), "updatedAt" = NOW() WHERE "objectMemberId" = $(memberId) AND "deletedAt" IS NULL `, - { memberId }, + { memberId, objectAnonymousUuid, objectAnonymousUsername }, ) if (result.rowCount > 0) { log.info( - `Cleared objectMember references in ${result.rowCount} activities for memberId ${memberId}`, + `Anonymized objectMember references in ${result.rowCount} activities for memberId ${memberId}`, ) } } @@ -183,33 +195,31 @@ if (processArguments.length !== 1) { const memberId = processArguments[0] setImmediate(async () => { - const dbConnection = await getDbConnection(DB_CONFIG()) - const store = new DbStore(log, dbConnection) + const qdbConnection = await getClientSQL() + const qdbStore = new DbStore(log, qdbConnection) - log.info(`Soft deleting member data from QuestDB for member ID: ${memberId}`) + log.info(`Anonymizing member data in QuestDB for member ID: ${memberId}`) try { - // Show deletion summary and get confirmation - const summary = await getQuestDbDeletionSummary(store, memberId) + // Show anonymization summary and get confirmation + const summary = await getQuestDbDeletionSummary(qdbStore, memberId) console.log(summary) const proceed = await promptConfirmation( - 'Do you want to proceed with the QuestDB soft deletion?', + 'Do you want to proceed with the QuestDB anonymization?', ) if (!proceed) { - log.info('Soft deletion cancelled by user') + log.info('Anonymization cancelled by user') process.exit(0) } - await store.transactionally(async (t) => { - // Perform soft deletion operations - await softDeleteMemberFromQuestDb(t, memberId) - }) + // Perform anonymization operations + await softDeleteMemberFromQuestDb(qdbStore, memberId) - log.info('QuestDB soft deletion completed successfully') + log.info('QuestDB member anonymization completed successfully') } catch (err) { - log.error(err, { memberId }, 'Failed to soft delete member from QuestDB!') + log.error(err, { memberId }, 'Failed to anonymize member data in QuestDB!') } process.exit(0) From 4c2d7be15bbed728cf666fe21cca2282944c9a19 Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Thu, 11 Sep 2025 14:44:26 +0100 Subject: [PATCH 07/13] chore: skip summary for questdb --- .../src/bin/erasure-members-data-questdb.ts | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts b/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts index 376586fb10..fa54398321 100644 --- a/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts +++ b/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts @@ -40,7 +40,7 @@ import { getClientSQL } from '@crowd/questdb' * - Anonymized data uses format: memberId = random-uuid, username = deleted-{random-uuid} * * USAGE: - * npm run script erasure-members-data-questdb + * npm run script erasure-members-data-questdb [--no-summary] * * REQUIREMENTS: * - QuestDB database connection configured via CROWD_QUESTDB_SQL_* environment variables @@ -187,12 +187,13 @@ async function softDeleteMemberFromQuestDb(qdbStore: DbStore, memberId: string): const processArguments = process.argv.slice(2) -if (processArguments.length !== 1) { - log.error('Expected exactly one argument: memberId') +if (processArguments.length < 1 || processArguments.length > 2) { + log.error('Usage: erasure-members-data-questdb [--no-summary]') process.exit(1) } const memberId = processArguments[0] +const skipSummary = processArguments.includes('--no-summary') setImmediate(async () => { const qdbConnection = await getClientSQL() @@ -201,12 +202,14 @@ setImmediate(async () => { log.info(`Anonymizing member data in QuestDB for member ID: ${memberId}`) try { - // Show anonymization summary and get confirmation - const summary = await getQuestDbDeletionSummary(qdbStore, memberId) - console.log(summary) + if (!skipSummary) { + // Show anonymization summary and get confirmation + const summary = await getQuestDbDeletionSummary(qdbStore, memberId) + console.log(summary) + } const proceed = await promptConfirmation( - 'Do you want to proceed with the QuestDB anonymization?', + 'Do you want to proceed with the QuestDB anonymization and soft deletion?', ) if (!proceed) { From 41896d5e88a69bb524505bee52d6084e675c964d Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Thu, 11 Sep 2025 15:05:49 +0100 Subject: [PATCH 08/13] fix: logging message --- .../src/bin/erasure-members-data-questdb.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts b/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts index fa54398321..4db9433984 100644 --- a/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts +++ b/services/apps/data_sink_worker/src/bin/erasure-members-data-questdb.ts @@ -156,10 +156,14 @@ async function softDeleteMemberFromQuestDb(qdbStore: DbStore, memberId: string): { memberId, anonymousUuid, anonymousUsername }, ) + // NOTE: QuestDB UPDATE operations may report inaccurate rowCount due to PostgreSQL compatibility issues + // Also, QuestDB updates are asynchronous and may not be immediately visible in subsequent queries if (result.rowCount > 0) { log.info( - `Anonymized and marked ${result.rowCount} activities as deleted for memberId ${memberId}`, + `QuestDB reported updating ${result.rowCount} activities (note: QuestDB rowCount may be inaccurate for UPDATE operations)`, ) + } else { + log.info(`No activities found to anonymize for memberId ${memberId}`) } // Generate separate UUID for object member references to avoid correlation @@ -180,8 +184,10 @@ async function softDeleteMemberFromQuestDb(qdbStore: DbStore, memberId: string): if (result.rowCount > 0) { log.info( - `Anonymized objectMember references in ${result.rowCount} activities for memberId ${memberId}`, + `QuestDB reported updating ${result.rowCount} objectMember activities (note: QuestDB rowCount may be inaccurate for UPDATE operations)`, ) + } else { + log.info(`No objectMember activities found to anonymize for memberId ${memberId}`) } } From 1fffaa0f11d439230bf665fcd815cdb832e22b00 Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Thu, 11 Sep 2025 15:50:28 +0100 Subject: [PATCH 09/13] fix: queries in TB --- .../data_sink_worker/src/bin/erase-members-data-tinybird.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts index 647bd5564f..30edaa225c 100644 --- a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts +++ b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts @@ -124,6 +124,9 @@ async function getTinybirdDeletionSummary(memberId: string): Promise { if (table === 'maintainersInternal') { // Use subquery to count maintainersInternal records by identityId condition = `identityId IN (SELECT id FROM memberIdentities WHERE memberId = '${memberId}')` + } else if (table === 'members') { + // Members table uses 'id' as the primary key, not 'memberId' + condition = `id = '${memberId}'` } else { condition = `memberId = '${memberId}'` } @@ -154,6 +157,9 @@ async function deleteFromDataSource(tableName: string, memberId: string) { if (tableName === 'maintainersInternal') { // Delete maintainersInternal using subquery to get identityIds from memberIdentities deleteCondition = `identityId IN (SELECT id FROM memberIdentities WHERE memberId = '${memberId}')` + } else if (tableName === 'members') { + // Members table uses 'id' as the primary key, not 'memberId' + deleteCondition = `id = '${memberId}'` } else { deleteCondition = `memberId = '${memberId}'` } From 35ee7fbd95b3f8440b1f3210c738ca97230427cb Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Thu, 11 Sep 2025 16:21:29 +0100 Subject: [PATCH 10/13] fix: sql query --- .../src/bin/erase-members-data-tinybird.ts | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts index 30edaa225c..c6be06e36d 100644 --- a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts +++ b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts @@ -83,21 +83,24 @@ async function promptConfirmation(message: string): Promise { * @returns Number of matching records, or 0 if query fails */ async function getRecordCount(tableName: string, condition: string): Promise { - const query = `SELECT count() as count FROM ${tableName} WHERE ${condition}` + const query = `SELECT count() as count FROM ${tableName} WHERE ${condition} FORMAT JSON` const url = `https://api.us-west-2.aws.tinybird.co/v0/sql` - const params = new URLSearchParams({ - q: query, - }) - - const response = await fetch(`${url}?${params}`, { + const response = await fetch(url, { + method: 'POST', headers: { Authorization: `Bearer ${TOKEN}`, + 'Content-Type': 'application/json', }, + body: JSON.stringify({ + q: query, + }), }) if (!response.ok) { - console.warn(`Failed to get count for ${tableName}: ${response.statusText}`) + const errorText = await response.text() + console.warn(`Failed to get count for ${tableName}: ${response.status} ${response.statusText}`) + console.warn(`Error response: ${errorText}`) return 0 } @@ -131,9 +134,14 @@ async function getTinybirdDeletionSummary(memberId: string): Promise { condition = `memberId = '${memberId}'` } + console.log(`Checking ${table} with condition: ${condition}`) const count = await getRecordCount(table, condition) + console.log(`${table}: ${count} records found (type: ${typeof count})`) + if (count > 0) { summary += `- ${count} records from ${table}\n` + } else { + console.log(`No records added to summary for ${table} - count was: ${count}`) } } From 88d8493fd53e20b2b684a58c60a432c9e20af5eb Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Thu, 11 Sep 2025 16:35:40 +0100 Subject: [PATCH 11/13] chore: add some failsafe mechanisms --- .../src/bin/erase-members-data-tinybird.ts | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts index c6be06e36d..82b65c2224 100644 --- a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts +++ b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts @@ -172,10 +172,40 @@ async function deleteFromDataSource(tableName: string, memberId: string) { deleteCondition = `memberId = '${memberId}'` } + // Safety check: ensure delete condition is not empty and contains the memberId + if (!deleteCondition || !deleteCondition.includes(memberId)) { + throw new Error(`Invalid delete condition generated: ${deleteCondition}`) + } + const body = new URLSearchParams({ delete_condition: deleteCondition, }) + // Log the complete request details before execution + console.log(`\n=== ABOUT TO DELETE FROM ${tableName.toUpperCase()} ===`) + console.log(`URL: ${url}`) + console.log(`Method: POST`) + console.log(`Headers:`) + console.log( + ` Authorization: Bearer ${TOKEN.substring(0, 20)}...${TOKEN.substring(TOKEN.length - 10)}`, + ) + console.log(` Content-Type: application/x-www-form-urlencoded`) + console.log(`Body:`) + console.log(` delete_condition: ${deleteCondition}`) + console.log(`\nEquivalent curl command:`) + console.log(`curl -X POST \\`) + console.log(` -H "Authorization: Bearer ${TOKEN}" \\`) + console.log(` -H "Content-Type: application/x-www-form-urlencoded" \\`) + console.log(` --data-urlencode 'delete_condition=${deleteCondition}' \\`) + console.log(` "${url}"`) + + // Ask for final confirmation for this specific deletion + const proceed = await promptConfirmation(`\nProceed with deleting from ${tableName}?`) + if (!proceed) { + console.log(`Skipped deletion from ${tableName}`) + return + } + const response = await fetch(url, { method: 'POST', headers: { From a26d715015527e6955d5c609498f3e74bc698790 Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Thu, 11 Sep 2025 17:02:00 +0100 Subject: [PATCH 12/13] chore: add some failsafe mechanisms --- .../data_sink_worker/src/bin/erase-member.ts | 29 +++++++++++++++++++ .../src/bin/erase-members-data-tinybird.ts | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/services/apps/data_sink_worker/src/bin/erase-member.ts b/services/apps/data_sink_worker/src/bin/erase-member.ts index 7dfdc74c94..376280536d 100644 --- a/services/apps/data_sink_worker/src/bin/erase-member.ts +++ b/services/apps/data_sink_worker/src/bin/erase-member.ts @@ -298,6 +298,18 @@ export async function deleteMemberFromDb(store: DbStore, memberId: string): Prom } // Delete from maintainersInternal first (foreign key constraint with memberIdentities.id) + const maintainersQuery = `delete from "maintainersInternal" where "identityId" in ( + select id from "memberIdentities" where "memberId" = '${memberId}' + )` + console.log(`\n=== ABOUT TO DELETE FROM MAINTAINERSINTERNAL ===`) + console.log(`Query: ${maintainersQuery}`) + const proceedMaintainers = await promptConfirmation( + 'Proceed with deleting from maintainersInternal?', + ) + if (!proceedMaintainers) { + throw new Error('User cancelled deletion from maintainersInternal') + } + result = await store.connection().result( `delete from "maintainersInternal" where "identityId" in ( select id from "memberIdentities" where "memberId" = $(memberId) @@ -338,6 +350,15 @@ export async function deleteMemberFromDb(store: DbStore, memberId: string): Prom } const condition = memberIdColumns.map((c) => `"${c}" = $(memberId)`).join(' or ') + const deleteQuery = `delete from "${table}" where ${condition.replace('$(memberId)', `'${memberId}'`)}` + console.log(`\n=== ABOUT TO DELETE FROM ${table.toUpperCase()} ===`) + console.log(`Query: ${deleteQuery}`) + const proceedTable = await promptConfirmation(`Proceed with deleting from ${table}?`) + if (!proceedTable) { + log.info(`Skipped deletion from ${table}`) + continue + } + result = await store .connection() .result(`delete from "${table}" where ${condition}`, { memberId }) @@ -347,6 +368,14 @@ export async function deleteMemberFromDb(store: DbStore, memberId: string): Prom } } + const finalDeleteQuery = `delete from members where id = '${memberId}'` + console.log(`\n=== ABOUT TO DELETE MAIN MEMBER RECORD ===`) + console.log(`Query: ${finalDeleteQuery}`) + const proceedFinal = await promptConfirmation('Proceed with deleting the main member record?') + if (!proceedFinal) { + throw new Error('User cancelled deletion of main member record') + } + result = await store .connection() .result(`delete from members where id = $(memberId)`, { memberId }) diff --git a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts index 82b65c2224..b3cbb1f624 100644 --- a/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts +++ b/services/apps/data_sink_worker/src/bin/erase-members-data-tinybird.ts @@ -237,7 +237,7 @@ async function main() { } // Process in order to respect foreign key constraints - maintainersInternal before memberIdentities - const orderedTables = ['activityRelations', 'members', 'maintainersInternal', 'memberIdentities'] + const orderedTables = ['activityRelations', 'maintainersInternal', 'memberIdentities', 'members'] for (const table of orderedTables) { try { From 727174c73ccf8afcba53d5783e74adb1eb8e5e70 Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Thu, 11 Sep 2025 17:09:02 +0100 Subject: [PATCH 13/13] fix: insert on requestedForErasureMemberIdentities --- services/apps/data_sink_worker/src/bin/erase-member.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/apps/data_sink_worker/src/bin/erase-member.ts b/services/apps/data_sink_worker/src/bin/erase-member.ts index 376280536d..c476a54bec 100644 --- a/services/apps/data_sink_worker/src/bin/erase-member.ts +++ b/services/apps/data_sink_worker/src/bin/erase-member.ts @@ -246,9 +246,9 @@ export async function archiveMemberIdentities(store: DbStore, memberId: string): const insertResult = await store.connection().result( ` INSERT INTO "requestedForErasureMemberIdentities" ( - id, "memberId", platform, value, "sourceId", "integrationId", type, verified, "createdAt", "updatedAt" + id, platform, value, type ) - SELECT id, "memberId", platform, value, "sourceId", "integrationId", type, verified, "createdAt", NOW() + SELECT id, platform, value, type FROM "memberIdentities" WHERE "memberId" = $(memberId) `,