Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@ dist
venv
.spyproject
.idea
site
site
.env
uv.lock
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "xml2db"
version = "0.12.5"
version = "0.12.6"
authors = [
{ name="Commission de régulation de l'énergie", email="opensource@cre.fr" },
]
Expand Down Expand Up @@ -36,3 +36,6 @@ markers = [
"dbtest: marks tests as integration tests requiring a database backend (deselect with '-m \"not dbtest\"')",
]
junit_family = "xunit2"

[tool.uv]
package = true
8 changes: 8 additions & 0 deletions src/xml2db/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ def _validate_config(self, cfg):
for key, exp_type, default in [
("as_columnstore", bool, False),
("row_numbers", bool, False),
("shorten_temp_table_names", bool, False),
("shorten_rel_table_names", bool, False),
("document_tree_hook", callable, None),
("document_tree_node_hook", callable, None),
("record_hash_column_name", str, "xml2db_record_hash"),
Expand Down Expand Up @@ -194,6 +196,12 @@ def _create_table_model(
A data model instance.
"""
table_config = self.tables_config.get(table_name, {})
table_config["shorten_temp_table_names"] = self.model_config[
"shorten_temp_table_names"
]
table_config["shorten_rel_table_names"] = self.model_config[
"shorten_rel_table_names"
]
if table_config.get("reuse", True):
return DataModelTableReused(
table_name,
Expand Down
9 changes: 8 additions & 1 deletion src/xml2db/table/duplicated_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,16 @@ def get_col(temp=False) -> Iterable[Column]:
)
)

temp_table_name = f"{prefix}{self.name}"
temp_table_name = (
self.truncate_long_name(temp_table_name)
if self.config.get("shorten_table_names")
else temp_table_name
)

# build temporary table
self.temp_table = Table(
f"{prefix}{self.name}",
temp_table_name,
self.metadata,
Column(f"pk_{self.name}", Integer),
*get_col(temp=True),
Expand Down
16 changes: 14 additions & 2 deletions src/xml2db/table/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,15 @@ def build_relation_tables(self) -> None:
)
prefix = f"temp_{self.table.temp_prefix}_"
if self.other_table.is_reused:
temp_table_name = f"{prefix}{self.rel_table_name}"
temp_table_name = (
self.table.truncate_long_name(temp_table_name)
if self.table.config.get("shorten_temp_table_names")
else temp_table_name
)

self.temp_rel_table = Table(
f"{prefix}{self.rel_table_name}",
temp_table_name,
self.table.metadata,
Column(f"temp_fk_{self.table.name}", Integer, nullable=False),
Column(f"fk_{self.table.name}", Integer),
Expand Down Expand Up @@ -132,8 +139,13 @@ def build_relation_tables(self) -> None:
),
)

table_name = (
self.table.truncate_long_name(self.rel_table_name)
if self.table.config.get("shorten_rel_table_names")
else self.rel_table_name
)
self.rel_table = Table(
self.rel_table_name,
table_name,
self.table.metadata,
Column(
f"fk_{self.table.name}",
Expand Down
9 changes: 8 additions & 1 deletion src/xml2db/table/reused_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,16 @@ def get_col(temp=False):
)
)

temp_table_name = f"{prefix}{self.name}"
temp_table_name = (
self.truncate_long_name(temp_table_name)
if self.config.get("shorten_temp_table_names")
else temp_table_name
)

# build temporary table
self.temp_table = Table(
f"{prefix}{self.name}",
temp_table_name,
self.metadata,
Column(f"pk_{self.name}", Integer),
Column(
Expand Down
65 changes: 65 additions & 0 deletions src/xml2db/table/table.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import hashlib
import re
from typing import Iterable, List, Any, Union, TYPE_CHECKING
import logging
import sqlalchemy
Expand Down Expand Up @@ -99,6 +101,9 @@ def _validate_config(self, cfg, db_type):
config = {
"reuse": check_type(cfg, "reuse", bool, True),
"as_columnstore": check_type(cfg, "as_columnstore", bool, False),
"shorten_table_names": check_type(
cfg, "shorten_table_names", bool, db_type == "postgresql"
),
}
if "extra_args" in cfg and not (
isinstance(cfg["extra_args"], list)
Expand Down Expand Up @@ -324,10 +329,13 @@ def create_tables(self, engine: sqlalchemy.engine.base.Engine, temp: bool = Fals
temp: if True, create temporary (prefixed) tables
"""
if temp:
logging.info(f"Creating temp table: {self.temp_table.name}")
self.temp_table.create(engine, checkfirst=True)
else:
logging.info(f"Creating table: {self.table.name}")
self.table.create(engine, checkfirst=True)
for relation in self.relations_n.values():
logging.info(f"Creating relation: {relation.name}")
relation.create_table(engine, temp)

def get_insert_temp_records_statements(
Expand Down Expand Up @@ -403,3 +411,60 @@ def get_entity_rel_diagram(self) -> List:
+ ["}"]
)
return [f" {line}" for line in out]

def truncate_long_name(self, table_name: str) -> str:
max_len = 63 # both postgres and mysql safe table name len
new_name = table_name

is_tmp = "temp" in table_name
suffix = f"_{hashlib.md5(table_name.encode('utf-8')).hexdigest()}"

if len(table_name) > max_len:
# extract words for camelCase and snake_case identifiers
s = re.sub(r"(?<=[a-z0-9])([A-Z])", r"_\1", table_name)
s = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", s)
words = [word for word in s.split("_") if word]

short_name = ""
shorter_name = ""
for word in words:
if len(short_name) + len(word) <= (max_len - 1):
if len(short_name) > 0:
short_name += "_"
short_name += f"{word}"
if len(shorter_name) + len(word) <= (max_len - 10):
if len(shorter_name) > 0:
shorter_name += "_"
shorter_name += f"{word}"

# check if sliced name already exists:
sentinel = False
if is_tmp:
# just cut the name up and append the full suffix
# this doesn't need to be human readable / usable
short_name = short_name[:30]
sentinel = True
else:
for tbl in self.data_model.tables.values():
if sentinel or tbl.name == short_name:
sentinel = True
break
for relation in tbl.relations_n.values():
if relation.rel_table_name == short_name:
sentinel = True
break

# an existing table or relation was found: append a
# random-ish suffix to help prevent name collisions
if sentinel:
# create a more useable/legible short table name
suffix = f"_{suffix[:8]}"
short_name = shorter_name
else:
# nothing was found so we can just run with the short name
suffix = ""

# finalize the new shortened name
new_name = f"{short_name}{suffix}"

return new_name