diff --git a/README.md b/README.md index d6fb0b2..8c05402 100644 --- a/README.md +++ b/README.md @@ -106,13 +106,43 @@ vod: width: 1920 height: 1080 bitrate: 5000 + 1080p_nvidia_gpu: + width: 1920 + height: 1080 + bitrate: 5000 + # Optional ffmpeg video overrides + encoder: h264_nvenc # default "libx264" + preset: p1 # default "faster" + profile: high # default "high" + level: auto # default "4.0" + extra-args: # optionally, additional ffmpeg video encoder arguments + - "-tune:v=ull" # can be passed either as combined args, and will be split + - "-rc:v" # or parameter ... + - "cbr" # ... and value on separate lines + # Optional filtergraph, start each chain with the special `[vin]` pad and end them in `[vout]` + filtergraph: + - "[vin]split=2[v1][v2]" # duplicate input + - "[v1]crop=iw/2:ih:0:0,hflip[left]" # left half mirrored horizontally + - "[v2]crop=iw/2:ih:iw/2:0,vflip[right]" # right half flipped vertically + - "[left][right]hstack[vout]" # join halves back together + + # HLS-VOD segment behaviour (optional) + segment-length: 4 # nominal segment length in seconds + segment-offset: 1 # allowed +/- tolerance in seconds + segment-buffer-min: 3 # min segments ahead of playhead + segment-buffer-max: 5 # max segments transcoded at once + # Use video keyframes as existing reference for chunks split # Using this might cause long probing times in order to get - # all keyframes - therefore they should be cached + # all keyframes - therefore they should be cached video-keyframes: false # Single audio profile used audio-profile: - bitrate: 192 # kbps + encoder: aac # default "aac", but "copy" is an alternative + bitrate: 192 # kbps + # Optional filtergraph, start each chain with the special `[ain]` pad and end them in `[aout]` + # filtergraph: + # - "[ain]asetrate=48000*1.5,aresample=48000[aout]" # Pitch the audio up by ~50 % (makes everyone sound like that famous mouse!) # If cache is enabled cache: true # If dir is empty, cache will be stored in the same directory as media source @@ -127,6 +157,57 @@ hls-proxy: my_server: http://192.168.1.34:9981 ``` +## Defining filter graphs on video and audio streams + +You can optionally define filtergraphs on video and audio profiles. This +allows you to modify the streams during the transcoding process. + +If you don't specify any filtergraphs, you get the video scaled to the +dimensions you specified and the first audio track from the input video. + +When you do supply a filtergraph: + +* start the chain at the source pads `[vin]` (video) or `[ain]` (audio) +* end the chain at `[vout]` or `[aout]` – these pads are what `-map` picks up + +Examples: + +```yaml +vod: + video-profiles: + 1080p: + width: 1920 + height: 1080 + bitrate: 5000 + filtergraph: + - "[vin]format=pix_fmts=yuv420p[vout]" # change pixel format to yuv420p +``` + +```yaml +vod: + audio-profile: + filtergraph: + - "[ain][0:a:1]amix=inputs=2[aout]" # mix second audio track into the first +``` + +### Implementation + +The transcoder always assembles a single FFmpeg `-filter_complex` that already contains **one video and one audio chain**: + +1. `[0:v]scale=…[vin]` – scales the first video stream and stores the result in pad `[vin]`. +2. `[0:a]anull[ain]` – passes the first audio stream through unchanged into pad `[ain]`. +3. If *no* extra filtergraph is supplied the code auto-adds `[vin]null[vout] ; [ain]anull[aout]` so the outputs exist. + +Both pads are then selected with: + +```sh +-map [vout] -map [aout]? +``` + +`-map` tells FFmpeg exactly which streams (by pad name or by input index) should +be written to the current output file. Being explicit prevents surprises when +inputs carry multiple audio/video streams. + ## Transcoding profiles for live streams go-transcode supports any formats that ffmpeg likes. We provide profiles out-of-the-box for h264+aac (mp4 container) for 360p, 540p, 720p and 1080p resolutions: `h264_360p`, `h264_540p`, `h264_720p` and `h264_1080p`. Profiles can have any name, but must match regex: `^[0-9A-Za-z_-]+$` diff --git a/hlsvod/manager.go b/hlsvod/manager.go index 662c41d..03105d5 100644 --- a/hlsvod/manager.go +++ b/hlsvod/manager.go @@ -53,15 +53,28 @@ type ManagerCtx struct { } func New(config Config) *ManagerCtx { + // apply defaults if zero + if config.SegmentLength == 0 { + config.SegmentLength = 4 + } + if config.SegmentOffset == 0 { + config.SegmentOffset = 1 + } + if config.SegmentBufferMin == 0 { + config.SegmentBufferMin = 3 + } + if config.SegmentBufferMax == 0 { + config.SegmentBufferMax = 5 + } ctx, cancel := context.WithCancel(context.Background()) return &ManagerCtx{ logger: log.With().Str("module", "hlsvod").Str("submodule", "manager").Logger(), config: config, - segmentLength: 4, - segmentOffset: 1, - segmentBufferMin: 3, - segmentBufferMax: 5, + segmentLength: config.SegmentLength, + segmentOffset: config.SegmentOffset, + segmentBufferMin: config.SegmentBufferMin, + segmentBufferMax: config.SegmentBufferMax, ctx: ctx, cancel: cancel, diff --git a/hlsvod/transcode.go b/hlsvod/transcode.go index 9bbe037..399a1e4 100644 --- a/hlsvod/transcode.go +++ b/hlsvod/transcode.go @@ -26,10 +26,20 @@ type VideoProfile struct { Width int Height int Bitrate int // in kilobytes + + // Optional FFmpeg overrides + Encoder string + Preset string + Profile string + Level string + ExtraArgs []string + FilterGraph []string } type AudioProfile struct { - Bitrate int // in kilobytes + Encoder string // audio encoder (e.g., "aac", "copy", "libopus") + Bitrate int // in kilobytes (0 means use encoder default) + FilterGraph []string // optional audio filtergraph chains } // returns a channel, that delivers name of the segments as they are encoded @@ -78,35 +88,95 @@ func TranscodeSegments(ctx context.Context, ffmpegBinary string, config Transcod "-sn", // No subtitles }...) - // Video specs + // Filtergraph (scaling + optional user graph) if config.VideoProfile != nil { profile := config.VideoProfile - var scale string + // Build scale expression producing [vin] source pad + var scaleExpr string if profile.Width >= profile.Height { - scale = fmt.Sprintf("scale=-2:%d", profile.Height) + scaleExpr = fmt.Sprintf("[0:v]scale=-2:%d[vin]", profile.Height) } else { - scale = fmt.Sprintf("scale=%d:-2", profile.Width) + scaleExpr = fmt.Sprintf("[0:v]scale=%d:-2[vin]", profile.Width) + } + + // Source audio pad + audioIn := "[0:a]anull[ain]" + + graphParts := []string{scaleExpr, audioIn} + + // Video filters + if len(profile.FilterGraph) > 0 { + graphParts = append(graphParts, profile.FilterGraph...) + } else { + graphParts = append(graphParts, "[vin]null[vout]") + } + // Audio filters + if config.AudioProfile != nil && len(config.AudioProfile.FilterGraph) > 0 { + graphParts = append(graphParts, config.AudioProfile.FilterGraph...) + } else { + graphParts = append(graphParts, "[ain]anull[aout]") + } + combinedFG := strings.Join(graphParts, ";") + // Add filter graph and explicit stream mapping (video & audio) + args = append(args, "-filter_complex", combinedFG) + args = append(args, "-map", "[vout]", "-map", "[aout]?") + } + + // Video specs + if config.VideoProfile != nil { + profile := config.VideoProfile + + // apply defaults if empty + encoder := profile.Encoder + if encoder == "" { + encoder = "libx264" + } + preset := profile.Preset + if preset == "" { + preset = "faster" + } + prof := profile.Profile + if prof == "" { + prof = "high" + } + lvl := profile.Level + if lvl == "" { + lvl = "4.0" } args = append(args, []string{ - "-vf", scale, - "-c:v", "libx264", - "-preset", "faster", - "-profile:v", "high", - "-level:v", "4.0", + "-c:v", encoder, + "-preset", preset, + "-profile:v", prof, + "-level:v", lvl, "-b:v", fmt.Sprintf("%dk", profile.Bitrate), }...) + + // extra args + if len(profile.ExtraArgs) > 0 { + extraArgs := make([]string, 0, len(profile.ExtraArgs)) + for _, arg := range profile.ExtraArgs { + // Split combined args like "-tune:v=ull" into "-tune:v", "ull" + if strings.Contains(arg, "=") { + extraArgs = append(extraArgs, strings.SplitN(arg, "=", 2)...) + } else { + extraArgs = append(extraArgs, arg) + } + } + args = append(args, extraArgs...) + } } // Audio specs if config.AudioProfile != nil { profile := config.AudioProfile - - args = append(args, []string{ - "-c:a", "aac", - "-b:a", fmt.Sprintf("%dk", profile.Bitrate), - }...) + if profile.Encoder != "" { + args = append(args, "-c:a", profile.Encoder) + if profile.Bitrate > 0 { + args = append(args, "-b:a", fmt.Sprintf("%dk", profile.Bitrate)) + } + } } // Segmenting specs diff --git a/hlsvod/types.go b/hlsvod/types.go index 7c4e651..18c78c5 100644 --- a/hlsvod/types.go +++ b/hlsvod/types.go @@ -14,6 +14,12 @@ type Config struct { VideoKeyframes bool AudioProfile *AudioProfile + // HLS-VOD segment parameters (override defaults from server) + SegmentLength float64 + SegmentOffset float64 + SegmentBufferMin int + SegmentBufferMax int + Cache bool CacheDir string // If not empty, cache will folder will be used instead of media path diff --git a/internal/api/hlsvod.go b/internal/api/hlsvod.go index 4c96481..f93cb2d 100644 --- a/internal/api/hlsvod.go +++ b/internal/api/hlsvod.go @@ -144,15 +144,27 @@ func (a *ApiManagerCtx) HlsVod(r chi.Router) { SegmentPrefix: profileID, VideoProfile: &hlsvod.VideoProfile{ - Width: profile.Width, - Height: profile.Height, - Bitrate: profile.Bitrate, + Width: profile.Width, + Height: profile.Height, + Bitrate: profile.Bitrate, + Encoder: profile.Encoder, + Preset: profile.Preset, + Profile: profile.Profile, + Level: profile.Level, + FilterGraph: profile.FilterGraph, + ExtraArgs: profile.ExtraArgs, }, VideoKeyframes: a.config.Vod.VideoKeyframes, AudioProfile: &hlsvod.AudioProfile{ Bitrate: a.config.Vod.AudioProfile.Bitrate, + FilterGraph: a.config.Vod.AudioProfile.FilterGraph, }, + SegmentLength: a.config.Vod.SegmentLength, + SegmentOffset: a.config.Vod.SegmentOffset, + SegmentBufferMin: a.config.Vod.SegmentBufferMin, + SegmentBufferMax: a.config.Vod.SegmentBufferMax, + Cache: a.config.Vod.Cache, CacheDir: a.config.Vod.CacheDir, diff --git a/internal/config/config.go b/internal/config/config.go index 532aae2..606b13a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -47,10 +47,20 @@ type VideoProfile struct { Width int `mapstructure:"width"` Height int `mapstructure:"height"` Bitrate int `mapstructure:"bitrate"` // in kilobytes + + // Optional FFmpeg overrides + Encoder string `mapstructure:"encoder"` + Preset string `mapstructure:"preset"` + Profile string `mapstructure:"profile"` + Level string `mapstructure:"level"` + ExtraArgs []string `mapstructure:"extra-args"` + FilterGraph []string `mapstructure:"filtergraph"` } type AudioProfile struct { - Bitrate int `mapstructure:"bitrate"` // in kilobytes + Encoder string `mapstructure:"encoder"` + Bitrate int `mapstructure:"bitrate"` // in kilobytes + FilterGraph []string `mapstructure:"filtergraph"` } type VOD struct { @@ -61,8 +71,15 @@ type VOD struct { AudioProfile AudioProfile `mapstructure:"audio-profile"` Cache bool `mapstructure:"cache"` CacheDir string `mapstructure:"cache-dir"` - FFmpegBinary string `mapstructure:"ffmpeg-binary"` - FFprobeBinary string `mapstructure:"ffprobe-binary"` + + // HLS-VOD segment parameters + SegmentLength float64 `mapstructure:"segment-length"` + SegmentOffset float64 `mapstructure:"segment-offset"` + SegmentBufferMin int `mapstructure:"segment-buffer-min"` + SegmentBufferMax int `mapstructure:"segment-buffer-max"` + + FFmpegBinary string `mapstructure:"ffmpeg-binary"` + FFprobeBinary string `mapstructure:"ffprobe-binary"` } type Enigma2 struct { @@ -142,6 +159,27 @@ func (Server) Init(cmd *cobra.Command) error { return err } + // HLS-VOD segment flags + cmd.PersistentFlags().Float64("vod-segment-length", 4, "HLS-VOD segment length in seconds") + if err := viper.BindPFlag("vod.segment-length", cmd.PersistentFlags().Lookup("vod-segment-length")); err != nil { + return err + } + + cmd.PersistentFlags().Float64("vod-segment-offset", 1, "HLS-VOD allowed deviation from segment length in seconds") + if err := viper.BindPFlag("vod.segment-offset", cmd.PersistentFlags().Lookup("vod-segment-offset")); err != nil { + return err + } + + cmd.PersistentFlags().Int("vod-segment-buffer-min", 3, "HLS-VOD minimum number of future segments maintained") + if err := viper.BindPFlag("vod.segment-buffer-min", cmd.PersistentFlags().Lookup("vod-segment-buffer-min")); err != nil { + return err + } + + cmd.PersistentFlags().Int("vod-segment-buffer-max", 5, "HLS-VOD maximum number of segments transcoded in a batch") + if err := viper.BindPFlag("vod.segment-buffer-max", cmd.PersistentFlags().Lookup("vod-segment-buffer-max")); err != nil { + return err + } + return nil } @@ -177,7 +215,26 @@ func (s *Server) Set() { panic(err) } - // defaults + // segment parameters populated from viper + s.Vod.SegmentLength = viper.GetFloat64("vod.segment-length") + s.Vod.SegmentOffset = viper.GetFloat64("vod.segment-offset") + s.Vod.SegmentBufferMin = viper.GetInt("vod.segment-buffer-min") + s.Vod.SegmentBufferMax = viper.GetInt("vod.segment-buffer-max") + + // defaults (HLS-VOD segment) + + if s.Vod.SegmentLength == 0 { + s.Vod.SegmentLength = 4 + } + if s.Vod.SegmentOffset == 0 { + s.Vod.SegmentOffset = 1 + } + if s.Vod.SegmentBufferMin == 0 { + s.Vod.SegmentBufferMin = 3 + } + if s.Vod.SegmentBufferMax == 0 { + s.Vod.SegmentBufferMax = 5 + } if s.Vod.TranscodeDir == "" { var err error @@ -211,6 +268,23 @@ func (s *Server) Set() { s.Vod.FFprobeBinary = "ffprobe" } + // apply defaults to each video profile + for k, vp := range s.Vod.VideoProfiles { + if vp.Encoder == "" { + vp.Encoder = "libx264" + } + if vp.Preset == "" { + vp.Preset = "faster" + } + if vp.Profile == "" { + vp.Profile = "high" + } + if vp.Level == "" { + vp.Level = "4.0" + } + s.Vod.VideoProfiles[k] = vp + } + // // HLS PROXY //