Skip to content

Commit e1d86d9

Browse files
authored
[scene_manager] Allow setting delimiters for CSV output (#429)
* [cli] Add ability to customize delimiters for CSV output (#423) * [cli] Add tests for CSV delimiters and enforce constraints for delimiter lengths
1 parent 2c55a55 commit e1d86d9

File tree

8 files changed

+142
-27
lines changed

8 files changed

+142
-27
lines changed

scenedetect.cfg

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,14 @@
269269
# Display list of cut points generated from scene boundaries (yes/no).
270270
#display-cuts = yes
271271

272+
# Separator to use between columns in output file. Must be single (escaped)
273+
# ASCII character.
274+
#col-separator = ,
275+
276+
# Separator to use between rows in output file. Must be (escaped) ASCII
277+
# characters.
278+
#row-separator = \n
279+
272280
# Format to use for list of cut points (frames, seconds, timecode).
273281
#cut-format = timecode
274282

scenedetect/_cli/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,6 +1114,7 @@ def list_scenes_command(
11141114
output_dir = ctx.config.get_value("list-scenes", "output", output)
11151115
name_format = ctx.config.get_value("list-scenes", "filename", filename)
11161116
list_scenes_args = {
1117+
"col_separator": ctx.config.get_value("list-scenes", "col-separator"),
11171118
"cut_format": ctx.config.get_value("list-scenes", "cut-format"),
11181119
"display_scenes": ctx.config.get_value("list-scenes", "display-scenes"),
11191120
"display_cuts": ctx.config.get_value("list-scenes", "display-cuts"),
@@ -1122,6 +1123,7 @@ def list_scenes_command(
11221123
"skip_cuts": ctx.config.get_value("list-scenes", "skip-cuts", skip_cuts),
11231124
"output_dir": output_dir,
11241125
"quiet": ctx.config.get_value("list-scenes", "quiet", quiet) or ctx.quiet_mode,
1126+
"row_separator": ctx.config.get_value("list-scenes", "row-separator"),
11251127
}
11261128
ctx.add_command(cli_commands.list_scenes, list_scenes_args)
11271129

scenedetect/_cli/commands.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ def list_scenes(
105105
display_scenes: bool,
106106
display_cuts: bool,
107107
cut_format: str,
108+
col_separator: str,
109+
row_separator: str,
108110
):
109111
"""Handles the `list-scenes` command."""
110112
# Write scene list CSV to if required.
@@ -125,6 +127,8 @@ def list_scenes(
125127
scene_list=scenes,
126128
include_cut_list=not skip_cuts,
127129
cut_list=cuts,
130+
col_separator=col_separator,
131+
row_separator=row_separator,
128132
)
129133
# Suppress output if requested.
130134
if quiet:

scenedetect/_cli/config.py

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,12 @@ def from_config(config_value: str, default: "ValidatedValue") -> "ValidatedValue
6060
"""
6161
raise NotImplementedError()
6262

63+
def __repr__(self) -> str:
64+
return str(self.value)
65+
66+
def __str__(self) -> str:
67+
return str(self.value)
68+
6369

6470
class TimecodeValue(ValidatedValue):
6571
"""Validator for timecode values in seconds (100.0), frames (100), or HH:MM:SS.
@@ -75,12 +81,6 @@ def __init__(self, value: Union[int, float, str]):
7581
def value(self) -> Union[int, float, str]:
7682
return self._value
7783

78-
def __repr__(self) -> str:
79-
return str(self.value)
80-
81-
def __str__(self) -> str:
82-
return str(self.value)
83-
8484
@staticmethod
8585
def from_config(config_value: str, default: "TimecodeValue") -> "TimecodeValue":
8686
try:
@@ -121,12 +121,6 @@ def max_val(self) -> Union[int, float]:
121121
"""Maximum value of the range."""
122122
return self._max_val
123123

124-
def __repr__(self) -> str:
125-
return str(self.value)
126-
127-
def __str__(self) -> str:
128-
return str(self.value)
129-
130124
@staticmethod
131125
def from_config(config_value: str, default: "RangeValue") -> "RangeValue":
132126
try:
@@ -163,9 +157,6 @@ def __init__(self, value: Union[str, ContentDetector.Components]):
163157
def value(self) -> Tuple[float, float, float, float]:
164158
return self._value
165159

166-
def __repr__(self) -> str:
167-
return str(self.value)
168-
169160
def __str__(self) -> str:
170161
return "%.3f, %.3f, %.3f, %.3f" % self.value
171162

@@ -199,9 +190,6 @@ def __init__(self, value: int):
199190
def value(self) -> int:
200191
return self._value
201192

202-
def __repr__(self) -> str:
203-
return str(self.value)
204-
205193
def __str__(self) -> str:
206194
if self.value is None:
207195
return "auto"
@@ -217,6 +205,42 @@ def from_config(config_value: str, default: "KernelSizeValue") -> "KernelSizeVal
217205
) from ex
218206

219207

208+
class EscapedString(ValidatedValue):
209+
"""Strings that can contain escape sequences, e.g. the literal \n."""
210+
211+
def __init__(self, value: str, length_limit: int = 0):
212+
self._value = value.encode("utf-8").decode("unicode_escape")
213+
if length_limit and len(self._value) > length_limit:
214+
raise OptionParseFailure(f"Value must be no longer than {length_limit} characters.")
215+
216+
@property
217+
def value(self) -> str:
218+
"""Get the value after validation."""
219+
return self._value
220+
221+
@staticmethod
222+
def from_config(
223+
config_value: str, default: "EscapedString", length_limit: int = 0
224+
) -> "EscapedString":
225+
try:
226+
return EscapedString(config_value, length_limit)
227+
except (UnicodeDecodeError, UnicodeEncodeError) as ex:
228+
raise OptionParseFailure(
229+
"Value must be valid UTF-8 string with escape characters."
230+
) from ex
231+
232+
233+
class EscapedChar(EscapedString):
234+
"""Strings that can contain escape sequences but can be a maximum of 1 character in length."""
235+
236+
def __init__(self, value: str):
237+
super().__init__(value, length_limit=1)
238+
239+
@staticmethod
240+
def from_config(config_value: str, default: "EscapedString") -> "EscapedChar":
241+
return EscapedString.from_config(config_value, default, length_limit=1)
242+
243+
220244
class TimecodeFormat(Enum):
221245
"""Format to display timecodes."""
222246

@@ -304,10 +328,12 @@ def format(self, timecode: FrameTimecode) -> str:
304328
},
305329
"list-scenes": {
306330
"cut-format": TimecodeFormat.TIMECODE,
331+
"col-separator": EscapedChar(","),
307332
"display-cuts": True,
308333
"display-scenes": True,
309334
"filename": "$VIDEO_NAME-Scenes.csv",
310335
"output": None,
336+
"row-separator": EscapedString("\n"),
311337
"no-output-file": False,
312338
"quiet": False,
313339
"skip-cuts": False,

scenedetect/_cli/context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def handle_options(
215215
raise click.Abort()
216216

217217
if self.config.config_dict:
218-
logger.debug("Current configuration:\n%s", str(self.config.config_dict))
218+
logger.debug("Current configuration:\n%s", str(self.config.config_dict).encode("utf-8"))
219219

220220
logger.debug("Parsing program options.")
221221
if stats is not None and frame_skip:

scenedetect/scene_manager.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,9 @@ def write_scene_list(
216216
scene_list: SceneList,
217217
include_cut_list: bool = True,
218218
cut_list: Optional[CutList] = None,
219-
) -> None:
219+
col_separator: str = ",",
220+
row_separator: str = "\n",
221+
):
220222
"""Writes the given list of scenes to an output file handle in CSV format.
221223
222224
Arguments:
@@ -227,8 +229,13 @@ def write_scene_list(
227229
cut_list: Optional list of FrameTimecode objects denoting the cut list (i.e. the frames
228230
in the video that need to be split to generate individual scenes). If not specified,
229231
the cut list is generated using the start times of each scene following the first one.
232+
col_separator: Delimiter to use between values. Must be single character.
233+
row_separator: Line terminator to use between rows.
234+
235+
Raises:
236+
TypeError: "delimiter" must be a 1-character string
230237
"""
231-
csv_writer = csv.writer(output_csv_file, lineterminator="\n")
238+
csv_writer = csv.writer(output_csv_file, delimiter=col_separator, lineterminator=row_separator)
232239
# If required, output the cutting list as the first row (i.e. before the header row).
233240
if include_cut_list:
234241
csv_writer.writerow(

tests/test_cli.py

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -294,24 +294,90 @@ def test_cli_list_scenes(tmp_path: Path):
294294
)
295295
== 0
296296
)
297-
# Add statsfile
297+
output_path = tmp_path.joinpath(f"{DEFAULT_VIDEO_NAME}-Scenes.csv")
298+
assert os.path.exists(output_path)
299+
EXPECTED_CSV_OUTPUT = """Timecode List:,00:00:03.754
300+
Scene Number,Start Frame,Start Timecode,Start Time (seconds),End Frame,End Timecode,End Time (seconds),Length (frames),Length (timecode),Length (seconds)
301+
1,49,00:00:02.002,2.002,90,00:00:03.754,3.754,42,00:00:01.752,1.752
302+
2,91,00:00:03.754,3.754,144,00:00:06.006,6.006,54,00:00:02.252,2.252
303+
"""
304+
assert output_path.read_text() == EXPECTED_CSV_OUTPUT
305+
306+
307+
def test_cli_list_scenes_skip_cuts(tmp_path: Path):
308+
"""Test `list-scenes` command with the -s/--skip-cuts option for RFC 4180 compliance."""
309+
# Regular invocation
298310
assert (
299311
invoke_scenedetect(
300-
"-i {VIDEO} -s {STATS} time {TIME} {DETECTOR} list-scenes",
312+
"-i {VIDEO} time {TIME} {DETECTOR} list-scenes -s",
301313
output_dir=tmp_path,
302314
)
303315
== 0
304316
)
305-
# Suppress output file
317+
output_path = tmp_path.joinpath(f"{DEFAULT_VIDEO_NAME}-Scenes.csv")
318+
assert os.path.exists(output_path)
319+
EXPECTED_CSV_OUTPUT = """Scene Number,Start Frame,Start Timecode,Start Time (seconds),End Frame,End Timecode,End Time (seconds),Length (frames),Length (timecode),Length (seconds)
320+
1,49,00:00:02.002,2.002,90,00:00:03.754,3.754,42,00:00:01.752,1.752
321+
2,91,00:00:03.754,3.754,144,00:00:06.006,6.006,54,00:00:02.252,2.252
322+
"""
323+
assert output_path.read_text() == EXPECTED_CSV_OUTPUT
324+
325+
326+
def test_cli_list_scenes_no_output(tmp_path: Path):
327+
"""Test `list-scenes` command with the -n flag."""
328+
output_path = tmp_path.joinpath(f"{DEFAULT_VIDEO_NAME}-Scenes.csv")
306329
assert (
307330
invoke_scenedetect(
308331
"-i {VIDEO} time {TIME} {DETECTOR} list-scenes -n",
309332
output_dir=tmp_path,
310333
)
311334
== 0
312335
)
313-
# TODO: Check for output files from regular invocation.
314-
# TODO: Delete scene list and ensure is not recreated using -n.
336+
assert not os.path.exists(output_path)
337+
338+
339+
def test_cli_list_scenes_custom_delimiter(tmp_path: Path):
340+
"""Test `list-scenes` command with custom delimiters set in a config file."""
341+
config_path = tmp_path.joinpath("config.cfg")
342+
config_path.write_text("""
343+
[list-scenes]
344+
col-separator = |
345+
row-separator = \\t
346+
""")
347+
assert (
348+
invoke_scenedetect(
349+
f"-i {{VIDEO}} -c {config_path} time {{TIME}} {{DETECTOR}} list-scenes",
350+
output_dir=tmp_path,
351+
)
352+
== 0
353+
)
354+
output_path = tmp_path.joinpath(f"{DEFAULT_VIDEO_NAME}-Scenes.csv")
355+
assert os.path.exists(output_path)
356+
EXPECTED_CSV_OUTPUT = """Timecode List:,00:00:03.754
357+
Scene Number,Start Frame,Start Timecode,Start Time (seconds),End Frame,End Timecode,End Time (seconds),Length (frames),Length (timecode),Length (seconds)
358+
1,49,00:00:02.002,2.002,90,00:00:03.754,3.754,42,00:00:01.752,1.752
359+
2,91,00:00:03.754,3.754,144,00:00:06.006,6.006,54,00:00:02.252,2.252
360+
"""
361+
EXPECTED_CSV_OUTPUT = EXPECTED_CSV_OUTPUT.replace(",", "|").replace("\n", "\t")
362+
assert output_path.read_text() == EXPECTED_CSV_OUTPUT
363+
364+
365+
def test_cli_list_scenes_rejects_multichar_col_separator(tmp_path: Path):
366+
"""Test `list-scenes` command with custom delimiters set in a config file."""
367+
config_path = tmp_path.joinpath("config.cfg")
368+
config_path.write_text("""
369+
[list-scenes]
370+
col-separator = ||
371+
""")
372+
assert (
373+
invoke_scenedetect(
374+
f"-i {{VIDEO}} -c {config_path} time {{TIME}} {{DETECTOR}} list-scenes",
375+
output_dir=tmp_path,
376+
)
377+
!= 0
378+
)
379+
output_path = tmp_path.joinpath(f"{DEFAULT_VIDEO_NAME}-Scenes.csv")
380+
assert not os.path.exists(output_path)
315381

316382

317383
@pytest.mark.skipif(condition=not is_ffmpeg_available(), reason="ffmpeg is not available")

website/pages/changelog.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -596,4 +596,6 @@ Development
596596
- [improvement] `save_to_csv` now works with paths from `pathlib`
597597
- [bugfix] Fix `SyntaxWarning` due to incorrect escaping [#400](https://github.com/Breakthrough/PySceneDetect/issues/400)
598598
- [bugfix] Fix `ContentDetector` crash when using callbacks [#416](https://github.com/Breakthrough/PySceneDetect/issues/416) [#420](https://github.com/Breakthrough/PySceneDetect/issues/420)
599-
599+
- [api] The `save_to_csv` function now works correctly with paths from the `pathlib` module
600+
- [api] Add `col_separator` and `row_separator` args to `write_scene_list` function in `scenedetect.scene_manager`
601+
- [feature] Add ability to configure CSV separators for rows/columns in config file [#423](https://github.com/Breakthrough/PySceneDetect/issues/423)

0 commit comments

Comments
 (0)