Skip to content

Commit a501c3b

Browse files
committed
[timecode] Add pts and time base properties
1 parent e86147f commit a501c3b

File tree

10 files changed

+99
-69
lines changed

10 files changed

+99
-69
lines changed

scenedetect/backends/opencv.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,8 @@ def timecode(self) -> Timecode:
206206

207207
@property
208208
def position(self) -> FrameTimecode:
209+
# TODO(https://scenedetect.com/issue/168): See if there is a better way to do this, or
210+
# add a config option before landing this.
209211
if _USE_PTS_IN_DEVELOPMENT:
210212
return FrameTimecode(timecode=self.timecode, fps=self.frame_rate)
211213
if self.frame_number < 1:
@@ -226,10 +228,7 @@ def seek(self, target: ty.Union[FrameTimecode, float, int]):
226228
if target < 0:
227229
raise ValueError("Target seek position cannot be negative!")
228230

229-
if _USE_PTS_IN_DEVELOPMENT:
230-
# TODO(https://scenedetect.com/issue/168): Shouldn't use frames for VFR video here.
231-
raise NotImplementedError()
232-
231+
# TODO(https://scenedetect.com/issue/168): Shouldn't use frames for VFR video here.
233232
# Have to seek one behind and call grab() after to that the VideoCapture
234233
# returns a valid timestamp when using CAP_PROP_POS_MSEC.
235234
target_frame_cv2 = (self.base_timecode + target).frame_num

scenedetect/backends/pyav.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
""":class:`VideoStreamAv` provides an adapter for the PyAV av.InputContainer object."""
1313

1414
import typing as ty
15+
from fractions import Fraction
1516
from logging import getLogger
1617

1718
import av
@@ -22,7 +23,6 @@
2223
from scenedetect.video_stream import FrameRateUnavailable, VideoOpenFailure, VideoStream
2324

2425
logger = getLogger("pyscenedetect")
25-
2626
VALID_THREAD_MODES = ["NONE", "SLICE", "FRAME", "AUTO"]
2727

2828

@@ -36,7 +36,7 @@ class VideoStreamAv(VideoStream):
3636
def __init__(
3737
self,
3838
path_or_io: ty.Union[ty.AnyStr, ty.BinaryIO],
39-
framerate: ty.Optional[float] = None,
39+
framerate: ty.Optional[ty.Union[float, Fraction]] = None,
4040
name: ty.Optional[str] = None,
4141
threading_mode: ty.Optional[str] = None,
4242
suppress_output: bool = False,
@@ -123,15 +123,14 @@ def __init__(
123123
)
124124
if frame_rate is None or frame_rate == 0:
125125
raise FrameRateUnavailable()
126-
# TODO: Refactor FrameTimecode to support raw timing rather than framerate based calculations.
127-
# See https://pyav.org/docs/develop/api/stream.html for details.
128-
frame_rate = frame_rate.numerator / float(frame_rate.denominator)
129126
if frame_rate < MAX_FPS_DELTA:
130127
raise FrameRateUnavailable()
131-
self._frame_rate: float = frame_rate
128+
self._frame_rate: Fraction = frame_rate
132129
else:
133130
assert framerate >= MAX_FPS_DELTA
134-
self._frame_rate: float = framerate
131+
self._frame_rate: Fraction = (
132+
framerate if isinstance(framerate, Fraction) else Fraction.from_float(framerate)
133+
)
135134

136135
# Calculate duration after we have set the framerate.
137136
self._duration_frames = self._get_duration()
@@ -212,6 +211,16 @@ def frame_number(self) -> int:
212211
return self.position.frame_num + 1
213212
return 0
214213

214+
@property
215+
def rate(self) -> Fraction:
216+
return self._video_stream.guessed_rate
217+
218+
@property
219+
def time_base(self) -> Fraction:
220+
if self._frame:
221+
return self._frame.time_base
222+
return None
223+
215224
@property
216225
def aspect_ratio(self) -> float:
217226
"""Pixel aspect ratio as a float (1.0 represents square pixels)."""
@@ -250,10 +259,7 @@ def seek(self, target: ty.Union[FrameTimecode, float, int]) -> None:
250259
raise ValueError("Target cannot be negative!")
251260
beginning = target == 0
252261

253-
if _USE_PTS_IN_DEVELOPMENT:
254-
# TODO(https://scenedetect.com/issue/168): Need to handle PTS here.
255-
raise NotImplementedError()
256-
262+
# TODO(https://scenedetect.com/issues/168): This breaks with PTS mode enabled.
257263
target = self.base_timecode + target
258264
if target >= 1:
259265
target = target - 1

scenedetect/common.py

Lines changed: 44 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -172,18 +172,21 @@ def __init__(
172172
TypeError: Thrown if either `timecode` or `fps` are unsupported types.
173173
ValueError: Thrown when specifying a negative timecode or framerate.
174174
"""
175-
# The following two properties are what is used to keep track of time
176-
# in a frame-specific manner. Note that once the framerate is set,
177-
# the value should never be modified (only read if required).
178-
# TODO(v1.0): Make these actual @properties.
179-
self._framerate: Fraction = None
175+
# NOTE: FrameTimecode will have either a `Timecode` representation, a `seconds`
176+
# representation, or only a frame number. We cache the calculated values for later use
177+
# for the parameters that are missing.
178+
self._rate: Fraction = None
179+
"""Rate at which time passes between frames, measured in frames/sec."""
180180
self._frame_num = None
181+
"""Frame number which may be estimated."""
181182
self._timecode: ty.Optional[Timecode] = None
183+
"""Presentation timestamp from the backend."""
182184
self._seconds: ty.Optional[float] = None
185+
"""An explicit point in time."""
183186

184187
# Copy constructor.
185188
if isinstance(timecode, FrameTimecode):
186-
self._framerate = timecode._framerate if fps is None else fps
189+
self._rate = timecode._rate if fps is None else fps
187190
self._frame_num = timecode._frame_num
188191
self._timecode = timecode._timecode
189192
self._seconds = timecode._seconds
@@ -196,15 +199,15 @@ def __init__(
196199
if fps is None:
197200
raise TypeError("fps is a required argument.")
198201
if isinstance(fps, FrameTimecode):
199-
self._framerate = fps._framerate
202+
self._rate = fps._rate
200203
elif isinstance(fps, float):
201204
if fps <= MAX_FPS_DELTA:
202205
raise ValueError("Framerate must be positive and greater than zero.")
203-
self._framerate = Fraction.from_float(fps)
206+
self._rate = Fraction.from_float(fps)
204207
elif isinstance(fps, Fraction):
205208
if float(fps) <= MAX_FPS_DELTA:
206209
raise ValueError("Framerate must be positive and greater than zero.")
207-
self._framerate = fps
210+
self._rate = fps
208211
else:
209212
raise TypeError(
210213
f"Wrong type for fps: {type(fps)} - expected float, Fraction, or FrameTimecode"
@@ -214,9 +217,11 @@ def __init__(
214217
if isinstance(timecode, Timecode):
215218
self._timecode = timecode
216219
return
220+
217221
# Process the timecode value, storing it as an exact number of frames only if required.
218222
if isinstance(timecode, str) and timecode.isdigit():
219223
timecode = int(timecode)
224+
220225
if isinstance(timecode, str):
221226
self._seconds = self._timecode_to_seconds(timecode)
222227
elif isinstance(timecode, float):
@@ -232,6 +237,8 @@ def __init__(
232237

233238
@property
234239
def frame_num(self) -> ty.Optional[int]:
240+
"""The frame number. This value will be an estimate if the video is VFR. Prefer using the
241+
`pts` property."""
235242
if self._timecode:
236243
# We need to audit anything currently using this property to guarantee temporal
237244
# consistency when handling VFR videos (i.e. no assumptions on fixed frame rate).
@@ -249,8 +256,24 @@ def frame_num(self) -> ty.Optional[int]:
249256
return self._frame_num
250257

251258
@property
252-
def framerate(self) -> ty.Optional[float]:
253-
return float(self._framerate)
259+
def framerate(self) -> float:
260+
"""The framerate to use for distance between frames and to calculate frame numbers.
261+
For a VFR video, this may just be the average framerate."""
262+
return float(self._rate)
263+
264+
@property
265+
def time_base(self) -> Fraction:
266+
"""The time base in which presentation time is calculated."""
267+
if self._timecode:
268+
return self._timecode.time_base
269+
return 1 / self._rate
270+
271+
@property
272+
def pts(self) -> int:
273+
"""The presentation timestamp of the frame in units of `time_base`."""
274+
if self._timecode:
275+
return self._timecode.pts
276+
return self.frame_num
254277

255278
def get_frames(self) -> int:
256279
"""[DEPRECATED] Get the current time/position in number of frames.
@@ -302,8 +325,7 @@ def seconds(self) -> float:
302325
return self._timecode.seconds
303326
if self._seconds:
304327
return self._seconds
305-
# Assume constant framerate if we don't have timing information.
306-
return float(self._frame_num) / self._framerate
328+
return float(self._frame_num / self._rate)
307329

308330
def get_seconds(self) -> float:
309331
"""[DEPRECATED] Get the frame's position in number of seconds.
@@ -372,7 +394,7 @@ def _seconds_to_frames(self, seconds: float) -> int:
372394
373395
*NOTE*: This will not be correct for variable framerate videos.
374396
"""
375-
return round(seconds * self._framerate)
397+
return round(seconds * self._rate)
376398

377399
def _parse_timecode_number(self, timecode: ty.Union[int, float]) -> int:
378400
"""Parse a timecode number, storing it as the exact number of frames.
@@ -406,7 +428,7 @@ def _timecode_to_seconds(self, input: str) -> float:
406428
Raises:
407429
ValueError: Value could not be parsed correctly.
408430
"""
409-
assert self._framerate is not None and self._framerate > MAX_FPS_DELTA
431+
assert self._rate is not None and self._rate > MAX_FPS_DELTA
410432
input = input.strip()
411433
# Exact number of frames N
412434
if input.isdigit():
@@ -452,7 +474,7 @@ def _get_other_as_frames(self, other: ty.Union[int, float, str, "FrameTimecode"]
452474
return self._seconds_to_frames(self._timecode_to_seconds(other))
453475
if isinstance(other, FrameTimecode):
454476
# If comparing two FrameTimecodes, they must have the same framerate for frame-based operations.
455-
if self._framerate and other._framerate and not self.equal_framerate(other._framerate):
477+
if self._rate and other._rate and not self.equal_framerate(other._rate):
456478
raise ValueError(
457479
"FrameTimecode instances require equal framerate for frame-based arithmetic."
458480
)
@@ -530,7 +552,7 @@ def __iadd__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> "FrameT
530552
time_base=timecode.time_base,
531553
)
532554
self._seconds = None
533-
self._framerate = None
555+
self._rate = None
534556
self._frame_num = None
535557
return self
536558

@@ -573,7 +595,7 @@ def __isub__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> "FrameT
573595
time_base=timecode.time_base,
574596
)
575597
self._seconds = None
576-
self._framerate = None
598+
self._rate = None
577599
self._frame_num = None
578600
return self
579601

@@ -610,8 +632,8 @@ def __repr__(self) -> str:
610632
if self._timecode:
611633
return f"{self.get_timecode()} [pts={self._timecode.pts}, time_base={self._timecode.time_base}]"
612634
if self._seconds is not None:
613-
return f"{self.get_timecode()} [seconds={self._seconds}, fps={self._framerate}]"
614-
return f"{self.get_timecode()} [frame_num={self._frame_num}, fps={self._framerate}]"
635+
return f"{self.get_timecode()} [seconds={self._seconds}, fps={self._rate}]"
636+
return f"{self.get_timecode()} [frame_num={self._frame_num}, fps={self._rate}]"
615637

616638
def __hash__(self) -> int:
617639
if self._timecode:
@@ -628,7 +650,7 @@ def _get_other_as_seconds(self, other: ty.Union[int, float, str, "FrameTimecode"
628650
if _USE_PTS_IN_DEVELOPMENT and other == 1:
629651
return self.seconds
630652
raise NotImplementedError()
631-
return float(other) / self._framerate
653+
return float(other) / self._rate
632654
if isinstance(other, float):
633655
return other
634656
if isinstance(other, str):
@@ -639,4 +661,4 @@ def _get_other_as_seconds(self, other: ty.Union[int, float, str, "FrameTimecode"
639661

640662

641663
def _compare_as_fixed(a: FrameTimecode, b: ty.Any) -> bool:
642-
return a._framerate is not None and isinstance(b, FrameTimecode) and b._framerate is not None
664+
return a._rate is not None and isinstance(b, FrameTimecode) and b._rate is not None

scenedetect/detector.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
import numpy
3232

33-
from scenedetect.common import _USE_PTS_IN_DEVELOPMENT, FrameTimecode
33+
from scenedetect.common import FrameTimecode
3434
from scenedetect.stats_manager import StatsManager
3535

3636

@@ -49,7 +49,7 @@ def __init__(self):
4949
def process_frame(
5050
self, timecode: FrameTimecode, frame_img: numpy.ndarray
5151
) -> ty.List[FrameTimecode]:
52-
"""Process the next frame. `frame_num` is assumed to be sequential.
52+
"""Process the next frame. `timecode` is assumed to be sequential.
5353
5454
Args:
5555
timecode: Timecode corresponding to the frame being processed.
@@ -74,9 +74,8 @@ def post_process(self, timecode: int) -> ty.List[FrameTimecode]:
7474

7575
@property
7676
def event_buffer_length(self) -> int:
77-
"""The amount of frames a given event can be buffered for, in time. Represents maximum
78-
amount any event can be behind `frame_number` in the result of :meth:`process_frame`.
79-
"""
77+
"""The amount of frames a given event can be buffered for, in time. This must be set to the
78+
amount of frames a detector might emit an event in the past."""
8079
return 0
8180

8281
# Frame Stats/Metrics
@@ -135,30 +134,32 @@ def max_behind(self) -> int:
135134
def filter(self, timecode: FrameTimecode, above_threshold: bool) -> ty.List[FrameTimecode]:
136135
if not self._filter_length > 0:
137136
return [timecode] if above_threshold else []
138-
if _USE_PTS_IN_DEVELOPMENT:
139-
raise NotImplementedError("TODO: Change filter to use units of time instead of frames.")
140137
if self._last_above is None:
141138
self._last_above = timecode
142139
if self._mode == FlashFilter.Mode.MERGE:
143-
return self._filter_merge(frame_num=timecode, above_threshold=above_threshold)
140+
return self._filter_merge(timecode=timecode, above_threshold=above_threshold)
144141
elif self._mode == FlashFilter.Mode.SUPPRESS:
145-
return self._filter_suppress(frame_num=timecode, above_threshold=above_threshold)
142+
return self._filter_suppress(timecode=timecode, above_threshold=above_threshold)
146143
raise RuntimeError("Unhandled FlashFilter mode.")
147144

148-
def _filter_suppress(self, frame_num: int, above_threshold: bool) -> ty.List[int]:
149-
min_length_met: bool = (frame_num - self._last_above) >= self._filter_length
145+
def _filter_suppress(self, timecode: FrameTimecode, above_threshold: bool) -> ty.List[int]:
146+
framerate = timecode.framerate
147+
assert framerate >= 0
148+
min_length_met: bool = (timecode - self._last_above) >= (self._filter_length / framerate)
150149
if not (above_threshold and min_length_met):
151150
return []
152151
# Both length and threshold requirements were satisfied. Emit the cut, and wait until both
153152
# requirements are met again.
154-
self._last_above = frame_num
155-
return [frame_num]
153+
self._last_above = timecode
154+
return [timecode]
156155

157-
def _filter_merge(self, frame_num: int, above_threshold: bool) -> ty.List[int]:
158-
min_length_met: bool = (frame_num - self._last_above) >= self._filter_length
156+
def _filter_merge(self, timecode: FrameTimecode, above_threshold: bool) -> ty.List[int]:
157+
framerate = timecode.framerate
158+
assert framerate >= 0
159+
min_length_met: bool = (timecode - self._last_above) >= (self._filter_length / framerate)
159160
# Ensure last frame is always advanced to the most recent one that was above the threshold.
160161
if above_threshold:
161-
self._last_above = frame_num
162+
self._last_above = timecode
162163
if self._merge_triggered:
163164
# This frame was under the threshold, see if enough frames passed to disable the filter.
164165
num_merged_frames = self._last_above - self._merge_start
@@ -174,9 +175,9 @@ def _filter_merge(self, frame_num: int, above_threshold: bool) -> ty.List[int]:
174175
if min_length_met:
175176
# Only allow the merge filter once the first cut is emitted.
176177
self._merge_enabled = True
177-
return [frame_num]
178+
return [timecode]
178179
# Start merging cuts until the length requirement is met.
179180
if self._merge_enabled:
180181
self._merge_triggered = True
181-
self._merge_start = frame_num
182+
self._merge_start = timecode
182183
return []

scenedetect/detectors/content_detector.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ def __init__(
137137
raise ValueError("kernel_size must be odd integer >= 3")
138138
self._kernel = numpy.ones((kernel_size, kernel_size), numpy.uint8)
139139
self._frame_score: ty.Optional[float] = None
140-
# TODO(https://scenedetect.com/issue/168): Handle timecodes in filter.
140+
# TODO(https://scenedetect.com/issue/168): Figure out a better long term plan for handling
141+
# `min_scene_len` which should be specified in seconds, not frames.
141142
self._flash_filter = FlashFilter(mode=filter_mode, length=min_scene_len)
142143

143144
def get_metrics(self):

scenedetect/output/image.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,8 @@ def image_save_thread(self, save_queue: queue.Queue, progress_bar: tqdm):
293293
def generate_timecode_list(self, scene_list: SceneList) -> ty.List[ty.Iterable[FrameTimecode]]:
294294
"""Generates a list of timecodes for each scene in `scene_list` based on the current config
295295
parameters."""
296-
framerate = scene_list[0][0]._framerate
296+
# TODO(v0.7): This needs to be fixed as part of PTS overhaul.
297+
framerate = scene_list[0][0].framerate
297298
# TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly.
298299
return [
299300
(
@@ -450,7 +451,7 @@ def save_images(
450451
image_num_format = "%0"
451452
image_num_format += str(math.floor(math.log(num_images, 10)) + 2) + "d"
452453

453-
framerate = scene_list[0][0]._framerate
454+
framerate = scene_list[0][0]._rate
454455

455456
# TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly.
456457
timecode_list = [

0 commit comments

Comments
 (0)