Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit b3084cc

Browse files
author
Sergey Vasilyev
committed
Remove unused code: random sampling
1 parent 3bc08e2 commit b3084cc

File tree

3 files changed

+1
-37
lines changed

3 files changed

+1
-37
lines changed

data_diff/abcs/mixins.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -141,24 +141,6 @@ def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable:
141141
"""
142142

143143

144-
@attrs.define(frozen=False)
145-
class AbstractMixin_RandomSample(AbstractMixin):
146-
@abstractmethod
147-
def random_sample_n(self, tbl: str, size: int) -> str:
148-
"""Take a random sample of the given size, i.e. return 'size' amount of rows"""
149-
150-
@abstractmethod
151-
def random_sample_ratio_approx(self, tbl: str, ratio: float) -> str:
152-
"""Take a random sample of the approximate size determined by the ratio (0..1), where 0 means no rows, and 1 means all rows
153-
154-
i.e. the actual mount of rows returned may vary by standard deviation.
155-
"""
156-
157-
# def random_sample_ratio(self, table: ITable, ratio: float):
158-
# """Take a random sample of the size determined by the ratio (0..1), where 0 means no rows, and 1 means all rows
159-
# """
160-
161-
162144
@attrs.define(frozen=False)
163145
class AbstractMixin_TimeTravel(AbstractMixin):
164146
@abstractmethod

data_diff/databases/base.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@
7777
from data_diff.abcs.mixins import AbstractMixin_TimeTravel, Compilable
7878
from data_diff.abcs.mixins import (
7979
AbstractMixin_Schema,
80-
AbstractMixin_RandomSample,
8180
AbstractMixin_NormalizeValue,
8281
AbstractMixin_OptimizerHints,
8382
)
@@ -218,16 +217,6 @@ def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable:
218217
)
219218

220219

221-
@attrs.define(frozen=False)
222-
class Mixin_RandomSample(AbstractMixin_RandomSample):
223-
def random_sample_n(self, tbl: ITable, size: int) -> ITable:
224-
# TODO use a more efficient algorithm, when the table count is known
225-
return tbl.order_by(Random()).limit(size)
226-
227-
def random_sample_ratio_approx(self, tbl: ITable, ratio: float) -> ITable:
228-
return tbl.where(Random() < ratio)
229-
230-
231220
@attrs.define(frozen=False)
232221
class Mixin_OptimizerHints(AbstractMixin_OptimizerHints):
233222
def optimizer_hints(self, hints: str) -> str:

data_diff/databases/duckdb.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from data_diff.abcs.mixins import (
2121
AbstractMixin_MD5,
2222
AbstractMixin_NormalizeValue,
23-
AbstractMixin_RandomSample,
2423
)
2524
from data_diff.databases.base import (
2625
Database,
@@ -44,7 +43,7 @@ def import_duckdb():
4443

4544

4645
@attrs.define(frozen=False)
47-
class Dialect(BaseDialect, Mixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue, AbstractMixin_RandomSample):
46+
class Dialect(BaseDialect, Mixin_Schema, AbstractMixin_MD5, AbstractMixin_NormalizeValue):
4847
name = "DuckDB"
4948
ROUNDS_ON_PREC_LOSS = False
5049
SUPPORTS_PRIMARY_KEY = True
@@ -120,12 +119,6 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str:
120119
def normalize_boolean(self, value: str, _coltype: Boolean) -> str:
121120
return self.to_string(f"{value}::INTEGER")
122121

123-
def random_sample_n(self, tbl: ITable, size: int) -> ITable:
124-
return code("SELECT * FROM ({tbl}) USING SAMPLE {size};", tbl=tbl, size=size)
125-
126-
def random_sample_ratio_approx(self, tbl: ITable, ratio: float) -> ITable:
127-
return code("SELECT * FROM ({tbl}) USING SAMPLE {percent}%;", tbl=tbl, percent=int(100 * ratio))
128-
129122

130123
@attrs.define(frozen=False, init=False, kw_only=True)
131124
class DuckDB(Database):

0 commit comments

Comments
 (0)