@@ -60,6 +60,7 @@ struct server_params
6060 std::string public_path = " examples/server/public" ;
6161 std::string request_path = " " ;
6262 std::string inference_path = " /inference" ;
63+ std::string tmp_dir = " ." ;
6364
6465 int32_t port = 8080 ;
6566 int32_t read_timeout = 600 ;
@@ -174,6 +175,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
174175 fprintf (stderr, " --request-path PATH, [%-7s] Request path for all requests\n " , sparams.request_path .c_str ());
175176 fprintf (stderr, " --inference-path PATH, [%-7s] Inference path for all requests\n " , sparams.inference_path .c_str ());
176177 fprintf (stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server\n " , sparams.ffmpeg_converter ? " true" : " false" );
178+ fprintf (stderr, " --tmp-dir, [%-7s] Temporary directory for ffmpeg transcoded files\n " , sparams.tmp_dir .c_str ());
177179 fprintf (stderr, " -sns, --suppress-nst [%-7s] suppress non-speech tokens\n " , params.suppress_nst ? " true" : " false" );
178180 fprintf (stderr, " -nth N, --no-speech-thold N [%-7.2f] no speech threshold\n " , params.no_speech_thold );
179181 fprintf (stderr, " -ng, --no-gpu [%-7s] do not use gpu\n " , params.use_gpu ? " false" : " true" );
@@ -248,6 +250,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
248250 else if ( arg == " --request-path" ) { sparams.request_path = argv[++i]; }
249251 else if ( arg == " --inference-path" ) { sparams.inference_path = argv[++i]; }
250252 else if ( arg == " --convert" ) { sparams.ffmpeg_converter = true ; }
253+ else if ( arg == " --tmp-dir" ) { sparams.tmp_dir = argv[++i]; }
251254
252255 // Voice Activity Detection (VAD)
253256 else if ( arg == " --vad" ) { params.vad = true ; }
@@ -288,15 +291,17 @@ void check_ffmpeg_availibility() {
288291 }
289292}
290293
291- std::string generate_temp_filename (const std::string &prefix, const std::string &extension) {
294+ std::string generate_temp_filename (const std::string &path, const std::string & prefix, const std::string &extension) {
292295 auto now = std::chrono::system_clock::now ();
293296 auto now_time_t = std::chrono::system_clock::to_time_t (now);
294297
295298 static std::mt19937 rng{std::random_device{}()};
296299 std::uniform_int_distribution<long long > dist (0 , 1e9 );
297300
298301 std::stringstream ss;
299- ss << prefix
302+ ss << path
303+ << std::filesystem::path::preferred_separator
304+ << prefix
300305 << " -"
301306 << std::put_time (std::localtime (&now_time_t ), " %Y%m%d-%H%M%S" )
302307 << " -"
@@ -816,7 +821,7 @@ int main(int argc, char ** argv) {
816821 if (sparams.ffmpeg_converter ) {
817822 // if file is not wav, convert to wav
818823 // write to temporary file
819- const std::string temp_filename = generate_temp_filename (" whisper-server" , " .wav" );
824+ const std::string temp_filename = generate_temp_filename (sparams. tmp_dir , " whisper-server" , " .wav" );
820825 std::ofstream temp_file{temp_filename, std::ios::binary};
821826 temp_file << audio_file.content ;
822827 temp_file.close ();
0 commit comments