diff --git a/bindings/go/params.go b/bindings/go/params.go index d8dee57e331..7d6a30d7fea 100644 --- a/bindings/go/params.go +++ b/bindings/go/params.go @@ -47,6 +47,38 @@ func (p *Params) SetPrintTimestamps(v bool) { p.print_timestamps = toBool(v) } +// Voice Activity Detection (VAD) +func (p *Params) SetVAD(v bool) { + p.vad = toBool(v) +} + +func (p *Params) SetVADModelPath(path string) { + p.vad_model_path = C.CString(path) +} + +func (p *Params) SetVADThreshold(t float32) { + p.vad_params.threshold = C.float(t) +} + +func (p *Params) SetVADMinSpeechMs(ms int) { + p.vad_params.min_speech_duration_ms = C.int(ms) +} + +func (p *Params) SetVADMinSilenceMs(ms int) { + p.vad_params.min_silence_duration_ms = C.int(ms) +} + +func (p *Params) SetVADMaxSpeechSec(s float32) { + p.vad_params.max_speech_duration_s = C.float(s) +} + +func (p *Params) SetVADSpeechPadMs(ms int) { + p.vad_params.speech_pad_ms = C.int(ms) +} + +func (p *Params) SetVADSamplesOverlap(sec float32) { + p.vad_params.samples_overlap = C.float(sec) +} // Set language id func (p *Params) SetLanguage(lang int) error { diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/context.go index 294a0320c8a..d356d72cccb 100644 --- a/bindings/go/pkg/whisper/context.go +++ b/bindings/go/pkg/whisper/context.go @@ -80,6 +80,39 @@ func (context *context) SetTranslate(v bool) { context.params.SetTranslate(v) } +// Voice Activity Detection (VAD) +func (context *context) SetVAD(v bool) { + context.params.SetVAD(v) +} + +func (context *context) SetVADModelPath(path string) { + context.params.SetVADModelPath(path) +} + +func (context *context) SetVADThreshold(t float32) { + context.params.SetVADThreshold(t) +} + +func (context *context) SetVADMinSpeechMs(ms int) { + context.params.SetVADMinSpeechMs(ms) +} + +func (context *context) SetVADMinSilenceMs(ms int) { + context.params.SetVADMinSilenceMs(ms) +} + +func (context *context) SetVADMaxSpeechSec(s float32) { + context.params.SetVADMaxSpeechSec(s) +} + +func (context *context) SetVADSpeechPadMs(ms int) { + context.params.SetVADSpeechPadMs(ms) +} + +func (context *context) SetVADSamplesOverlap(sec float32) { + context.params.SetVADSamplesOverlap(sec) +} + func (context *context) SetSplitOnWord(v bool) { context.params.SetSplitOnWord(v) } diff --git a/bindings/go/pkg/whisper/interface.go b/bindings/go/pkg/whisper/interface.go index e3122c44b76..2b275dd3469 100644 --- a/bindings/go/pkg/whisper/interface.go +++ b/bindings/go/pkg/whisper/interface.go @@ -60,6 +60,15 @@ type Context interface { SetTemperature(t float32) // Set temperature SetTemperatureFallback(t float32) // Set temperature incrementation + SetVAD(v bool) + SetVADModelPath(path string) + SetVADThreshold(t float32) + SetVADMinSpeechMs(ms int) + SetVADMinSilenceMs(ms int) + SetVADMaxSpeechSec(s float32) + SetVADSpeechPadMs(ms int) + SetVADSamplesOverlap(sec float32) + // Process mono audio data and return any errors. // If defined, newly generated segments are passed to the // callback function during processing.