@@ -9,11 +9,12 @@ Code related to permanently deleting projects.
99
1010import getLogger from "@cocalc/backend/logger" ;
1111import getPool from "@cocalc/database/pool" ;
12- import { callback2 } from "@cocalc/util/async-utils" ;
13- import { PostgreSQL } from "./types" ;
14- import { minutes_ago } from "@cocalc/util/misc" ;
1512import { getServerSettings } from "@cocalc/database/settings" ;
13+ import { callback2 } from "@cocalc/util/async-utils" ;
1614import { KUCALC_ON_PREMISES } from "@cocalc/util/db-schema/site-defaults" ;
15+ import { minutes_ago } from "@cocalc/util/misc" ;
16+ import { bulk_delete } from "./bulk-delete" ;
17+ import { PostgreSQL } from "./types" ;
1718
1819const log = getLogger ( "db:delete-projects" ) ;
1920
@@ -59,8 +60,9 @@ async function get_account_id(
5960}
6061
6162/*
62- This deletes all projects older than the given number of days, from the perspective of a user.
63- Another task has to run to actually get rid of the data, etc.
63+ This removes all users from all projects older than the given number of days and marked as deleted.
64+ In particular, users are no longer able to access that project.
65+ The "cleanup_old_projects_data" function has to run to actually get rid of the data, etc.
6466*/
6567export async function unlink_old_deleted_projects (
6668 db : PostgreSQL ,
@@ -70,7 +72,7 @@ export async function unlink_old_deleted_projects(
7072 query : "UPDATE projects" ,
7173 set : { users : null } ,
7274 where : [
73- "deleted = true" ,
75+ "deleted = true" ,
7476 "users IS NOT NULL" ,
7577 `last_edited <= NOW() - '${ age_d } days'::INTERVAL` ,
7678 ] ,
@@ -83,27 +85,32 @@ FROM projects as p
8385 INNER JOIN syncstrings as s
8486 ON p.project_id = s.project_id
8587WHERE p.deleted = true
88+ AND users IS NULL
8689 AND p.state ->> 'state' != 'deleted'
90+ ORDER BY
91+ p.project_id, s.string_id
8792` ;
8893
8994/*
90- This is more thorough than the above. It issues actual delete operations on data of projects marked as deleted.
95+ This more thorough delete procedure comes after the above.
96+ It issues actual delete operations on data of projects marked as deleted.
9197 When done, it sets the state.state to "deleted".
9298
9399 The operations involves deleting all syncstrings of that project (and associated with that, patches),
94- and only for on-prem setups, it also deletes all the data stored in the project on disk.
100+ and only for on-prem setups, it also deletes all the data stored in the project on disk and various tables .
95101
96- This function is called every couple of hours. Hence ensure it does not run longer than the given max_run_m time (minutes)
102+ This function is called every couple of hours. Hence it checks to not run longer than the given max_run_m time (minutes).
97103*/
98104export async function cleanup_old_projects_data (
99105 db : PostgreSQL ,
100- delay_ms = 50 ,
101106 max_run_m = 60 ,
102107) {
103108 const settings = await getServerSettings ( ) ;
104109 const on_prem = settings . kucalc === KUCALC_ON_PREMISES ;
110+ const L0 = log . extend ( "cleanup_old_projects_data" ) ;
111+ const L = L0 . debug ;
105112
106- log . debug ( "cleanup_old_projects_data" , { delay_ms , max_run_m, on_prem } ) ;
113+ log . debug ( "cleanup_old_projects_data" , { max_run_m, on_prem } ) ;
107114 const start_ts = new Date ( ) ;
108115
109116 const pool = getPool ( ) ;
@@ -115,34 +122,95 @@ export async function cleanup_old_projects_data(
115122 for ( const row of rows ) {
116123 const { project_id, string_id } = row ;
117124 if ( start_ts < minutes_ago ( max_run_m ) ) {
118- log . debug (
119- `cleanup_old_projects_data: too much time elapsed, breaking after ${ num } syncstrings` ,
120- ) ;
125+ L ( `too much time elapsed, breaking after ${ num } syncstrings` ) ;
121126 break ;
122127 }
123128
124- log . debug (
125- `cleanup_old_projects_data: deleting syncstring ${ project_id } /${ string_id } ` ,
126- ) ;
129+ L ( `deleting syncstring ${ project_id } /${ string_id } ` ) ;
127130 num += 1 ;
128131 await callback2 ( db . delete_syncstring , { string_id } ) ;
129132
130- // wait for the given amount of delay_ms millio seconds
131- await new Promise ( ( done ) => setTimeout ( done , delay_ms ) ) ;
133+ // wait a bit after deleting syncstrings, e.g. to let the standby db catch up
134+ await new Promise ( ( done ) => setTimeout ( done , 100 ) ) ;
132135
136+ // Q_CLEANUP_SYNCSTRINGS orders by project_id, hence we trigger project specific actions when the id changes
133137 if ( pid != project_id ) {
134138 pid = project_id ;
139+ const L2 = L0 . extend ( project_id ) . debug ;
140+
135141 if ( on_prem ) {
136- log . debug (
137- `cleanup_old_projects_data: deleting project data in ${ project_id } ` ,
138- ) ;
142+ L2 ( `cleanup_old_projects_data for project_id=${ project_id } ` ) ;
139143 // TODO: this only works on-prem, and requires the project files to be mounted
140144
141- log . debug ( `deleting all shared files in project ${ project_id } ` ) ;
145+ L2 ( `deleting all shared files in project ${ project_id } ` ) ;
142146 // TODO: do it directly like above, and also get rid of all those shares in the database
147+
148+ const delPublicPaths = await bulk_delete ( {
149+ table : "public_paths" ,
150+ field : "project_id" ,
151+ value : project_id ,
152+ } ) ;
153+ L2 ( `deleted public_paths ${ delPublicPaths . rowsDeleted } entries` ) ;
154+
155+ const delProjectLog = await bulk_delete ( {
156+ table : "project_log" ,
157+ field : "project_id" ,
158+ value : project_id ,
159+ } ) ;
160+ L2 ( `deleted project_log ${ delProjectLog . rowsDeleted } entries` ) ;
161+
162+ const delFileUse = await bulk_delete ( {
163+ table : "file_use" ,
164+ field : "project_id" ,
165+ value : project_id ,
166+ } ) ;
167+ L2 ( `deleted file_use ${ delFileUse . rowsDeleted } entries` ) ;
168+
169+ const delAccessLog = await bulk_delete ( {
170+ table : "file_access_log" ,
171+ field : "project_id" ,
172+ value : project_id ,
173+ } ) ;
174+ L2 ( `deleted file_access_log ${ delAccessLog . rowsDeleted } entries` ) ;
175+
176+ const delJupyterApiLog = await bulk_delete ( {
177+ table : "jupyter_api_log" ,
178+ field : "project_id" ,
179+ value : project_id ,
180+ } ) ;
181+ L2 ( `deleted jupyter_api_log ${ delJupyterApiLog . rowsDeleted } entries` ) ;
182+
183+ for ( const field of [
184+ "target_project_id" ,
185+ "source_project_id" ,
186+ ] as const ) {
187+ const delCopyPaths = await bulk_delete ( {
188+ table : "copy_paths" ,
189+ field,
190+ value : project_id ,
191+ } ) ;
192+ L2 ( `deleted copy_paths/${ field } ${ delCopyPaths . rowsDeleted } entries` ) ;
193+ }
194+
195+ const delListings = await bulk_delete ( {
196+ table : "listings" ,
197+ field : "project_id" ,
198+ id : "project_id" , // TODO listings has a more complex ID, is this a problem?
199+ value : project_id ,
200+ } ) ;
201+ L2 ( `deleted ${ delListings . rowsDeleted } listings` ) ;
202+
203+ const delInviteTokens = await bulk_delete ( {
204+ table : "project_invite_tokens" ,
205+ field : "project_id" ,
206+ value : project_id ,
207+ id : "token" ,
208+ } ) ;
209+ L2 ( `deleted ${ delInviteTokens . rowsDeleted } entries` ) ;
143210 }
144211
145212 // now, that we're done with that project, mark it as state.state ->> 'deleted'
213+ // in addition to the flag "deleted = true"
146214 await callback2 ( db . set_project_state , {
147215 project_id,
148216 state : "deleted" ,
0 commit comments