Skip to content

Commit 9f5ed26

Browse files
authored
go : Enable VAD for Go bindings (#3563)
* reset context.n so that NextSegment can be called for multiple Process calls * enable VAD params
1 parent a8f45ab commit 9f5ed26

File tree

3 files changed

+74
-0
lines changed

3 files changed

+74
-0
lines changed

bindings/go/params.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,38 @@ func (p *Params) SetPrintTimestamps(v bool) {
4747
p.print_timestamps = toBool(v)
4848
}
4949

50+
// Voice Activity Detection (VAD)
51+
func (p *Params) SetVAD(v bool) {
52+
p.vad = toBool(v)
53+
}
54+
55+
func (p *Params) SetVADModelPath(path string) {
56+
p.vad_model_path = C.CString(path)
57+
}
58+
59+
func (p *Params) SetVADThreshold(t float32) {
60+
p.vad_params.threshold = C.float(t)
61+
}
62+
63+
func (p *Params) SetVADMinSpeechMs(ms int) {
64+
p.vad_params.min_speech_duration_ms = C.int(ms)
65+
}
66+
67+
func (p *Params) SetVADMinSilenceMs(ms int) {
68+
p.vad_params.min_silence_duration_ms = C.int(ms)
69+
}
70+
71+
func (p *Params) SetVADMaxSpeechSec(s float32) {
72+
p.vad_params.max_speech_duration_s = C.float(s)
73+
}
74+
75+
func (p *Params) SetVADSpeechPadMs(ms int) {
76+
p.vad_params.speech_pad_ms = C.int(ms)
77+
}
78+
79+
func (p *Params) SetVADSamplesOverlap(sec float32) {
80+
p.vad_params.samples_overlap = C.float(sec)
81+
}
5082

5183
// Set language id
5284
func (p *Params) SetLanguage(lang int) error {

bindings/go/pkg/whisper/context.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,39 @@ func (context *context) SetTranslate(v bool) {
8080
context.params.SetTranslate(v)
8181
}
8282

83+
// Voice Activity Detection (VAD)
84+
func (context *context) SetVAD(v bool) {
85+
context.params.SetVAD(v)
86+
}
87+
88+
func (context *context) SetVADModelPath(path string) {
89+
context.params.SetVADModelPath(path)
90+
}
91+
92+
func (context *context) SetVADThreshold(t float32) {
93+
context.params.SetVADThreshold(t)
94+
}
95+
96+
func (context *context) SetVADMinSpeechMs(ms int) {
97+
context.params.SetVADMinSpeechMs(ms)
98+
}
99+
100+
func (context *context) SetVADMinSilenceMs(ms int) {
101+
context.params.SetVADMinSilenceMs(ms)
102+
}
103+
104+
func (context *context) SetVADMaxSpeechSec(s float32) {
105+
context.params.SetVADMaxSpeechSec(s)
106+
}
107+
108+
func (context *context) SetVADSpeechPadMs(ms int) {
109+
context.params.SetVADSpeechPadMs(ms)
110+
}
111+
112+
func (context *context) SetVADSamplesOverlap(sec float32) {
113+
context.params.SetVADSamplesOverlap(sec)
114+
}
115+
83116
func (context *context) SetSplitOnWord(v bool) {
84117
context.params.SetSplitOnWord(v)
85118
}

bindings/go/pkg/whisper/interface.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,15 @@ type Context interface {
6060
SetTemperature(t float32) // Set temperature
6161
SetTemperatureFallback(t float32) // Set temperature incrementation
6262

63+
SetVAD(v bool)
64+
SetVADModelPath(path string)
65+
SetVADThreshold(t float32)
66+
SetVADMinSpeechMs(ms int)
67+
SetVADMinSilenceMs(ms int)
68+
SetVADMaxSpeechSec(s float32)
69+
SetVADSpeechPadMs(ms int)
70+
SetVADSamplesOverlap(sec float32)
71+
6372
// Process mono audio data and return any errors.
6473
// If defined, newly generated segments are passed to the
6574
// callback function during processing.

0 commit comments

Comments
 (0)