Skip to content

Commit 4a32851

Browse files
authored
Merge pull request #292 from ISISComputingGroup/database_delete_rows
Add database support script to delete rows
2 parents 3d0dbf7 + a9e1216 commit 4a32851

File tree

3 files changed

+132
-0
lines changed

3 files changed

+132
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
@echo off
2+
setlocal EnableDelayedExpansion
3+
set "SOURCE=\\isis.cclrc.ac.uk\inst$\Kits$\CompGroup\ICP\Releases"
4+
call "%~dp0install_or_update_uv.bat"
5+
call "%~dp0set_up_venv.bat"
6+
IF %errorlevel% neq 0 goto ERROR
7+
8+
git --version
9+
10+
IF %errorlevel% neq 0 (
11+
echo No installation of Git found on machine. Please download Git from https://git-scm.com/downloads before proceeding.
12+
goto ERROR
13+
)
14+
15+
REM Matches current MySQL version
16+
uv pip install mysql-connector-python==8.4.0
17+
18+
python -u part_truncate_archive.py %*
19+
IF %errorlevel% neq 0 goto ERROR
20+
call rmdir /s /q %UV_TEMP_VENV%
21+
22+
exit /b 0
23+
24+
:ERROR
25+
set errcode = %ERRORLEVEL%
26+
call rmdir /s /q %UV_TEMP_VENV%
27+
EXIT /b !errcode!
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import argparse
2+
import time
3+
from contextlib import closing
4+
5+
import mysql.connector
6+
7+
"""
8+
This script will delete rows from the mysql archive `sample` table
9+
that are more than `history` days old in batches of `limit` to improve performance.
10+
11+
This could be done after a database backup and be used instead of truncating
12+
the entire database. It will not reduce the size of the database file on disk, but the
13+
deleted rows will be re-used by the database and so the file should not grow.
14+
15+
A typical usage would be if the instrument already has information in mysql it would like
16+
to keep a bit longer, or is running. The backup is safe to do on a running as it uses
17+
`single transaction` mode, but if the backup takes 2 hours you'll have two hours of missed
18+
data after a truncate. Hence this script to leave some old data in mysql.
19+
"""
20+
21+
22+
def main() -> None:
23+
parser = argparse.ArgumentParser(description="Set query options")
24+
parser.add_argument(
25+
"--host", dest="host", action="store", help="Host (default: localhost)", default="127.0.0.1"
26+
)
27+
parser.add_argument(
28+
"--limit",
29+
dest="limit",
30+
action="store",
31+
type=int,
32+
help="Rows to delete each query (default: 1000)",
33+
default=1000,
34+
)
35+
parser.add_argument(
36+
"--sleep",
37+
dest="sleep",
38+
action="store",
39+
type=float,
40+
help="Seconds to sleep between queries (default: 0.5)",
41+
default=0.5,
42+
)
43+
parser.add_argument(
44+
"--history",
45+
dest="history",
46+
action="store",
47+
type=int,
48+
help="How many days to keep (default: 7)",
49+
default=7,
50+
)
51+
parser.add_argument(
52+
"--password", dest="password", action="store", help="mysql root password", default=""
53+
)
54+
parser.add_argument("--dry-run", dest="dry_run", action="store_true", help="dry run")
55+
56+
args = parser.parse_args()
57+
58+
# ignore pyright checking as oracle bug in type signature of close() method
59+
with closing(
60+
mysql.connector.connect(
61+
user="root", password=args.password, host=args.host, database="archive"
62+
) # pyright: ignore
63+
) as conn:
64+
# this is so we don't cache query results and keep getting the same answer
65+
conn.autocommit = True
66+
67+
with closing(conn.cursor(prepared=True)) as c:
68+
c.execute("SET SQL_LOG_BIN=0") # disable any binary logging for this session
69+
print(f"Looking for sample_id corresponding to {args.history} days ago")
70+
c.execute(
71+
"SELECT MAX(sample_id) FROM sample WHERE smpl_time < TIMESTAMPADD(DAY, -?, NOW())",
72+
(args.history,),
73+
)
74+
sample_id = c.fetchone()[0]
75+
c.execute(
76+
"SELECT COUNT(sample_id) FROM sample "
77+
"WHERE smpl_time < TIMESTAMPADD(DAY, -?, NOW())",
78+
(args.history,),
79+
)
80+
count_sample_id = c.fetchone()[0]
81+
print(
82+
f"ID of last row to delete is {sample_id} and there are {count_sample_id} rows "
83+
f"-> {int(1 + count_sample_id / args.limit)} delete operations"
84+
)
85+
print(
86+
f"This will take at least {args.sleep * count_sample_id / args.limit:.1f} "
87+
"seconds based on sleep time alone"
88+
)
89+
if args.dry_run:
90+
print("Exiting as dry-run")
91+
return
92+
rowcount = 1
93+
it = 0
94+
while rowcount > 0:
95+
c.execute(f"DELETE FROM sample WHERE sample_id < {sample_id} LIMIT {args.limit}")
96+
rowcount = c.rowcount
97+
print(f"{it % 10}", end="", flush=True)
98+
it += 1
99+
time.sleep(args.sleep)
100+
print("")
101+
102+
103+
if __name__ == "__main__":
104+
main()

installation_and_upgrade/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ pyepics
1212
epicscorelibs
1313
certifi # Needed in order for requests to find https certificates
1414
requests
15+
mysql-connector-python==8.4.0

0 commit comments

Comments
 (0)