Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct server_params
std::string public_path = "examples/server/public";
std::string request_path = "";
std::string inference_path = "/inference";
std::string tmp_dir = ".";

int32_t port = 8080;
int32_t read_timeout = 600;
Expand Down Expand Up @@ -174,6 +175,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " --request-path PATH, [%-7s] Request path for all requests\n", sparams.request_path.c_str());
fprintf(stderr, " --inference-path PATH, [%-7s] Inference path for all requests\n", sparams.inference_path.c_str());
fprintf(stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server\n", sparams.ffmpeg_converter ? "true" : "false");
fprintf(stderr, " --tmp-dir, [%-7s] Temporary directory for ffmpeg transcoded files\n", sparams.tmp_dir.c_str());
fprintf(stderr, " -sns, --suppress-nst [%-7s] suppress non-speech tokens\n", params.suppress_nst ? "true" : "false");
fprintf(stderr, " -nth N, --no-speech-thold N [%-7.2f] no speech threshold\n", params.no_speech_thold);
fprintf(stderr, " -ng, --no-gpu [%-7s] do not use gpu\n", params.use_gpu ? "false" : "true");
Expand Down Expand Up @@ -248,6 +250,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
else if ( arg == "--request-path") { sparams.request_path = argv[++i]; }
else if ( arg == "--inference-path") { sparams.inference_path = argv[++i]; }
else if ( arg == "--convert") { sparams.ffmpeg_converter = true; }
else if ( arg == "--tmp-dir") { sparams.tmp_dir = argv[++i]; }

// Voice Activity Detection (VAD)
else if ( arg == "--vad") { params.vad = true; }
Expand Down Expand Up @@ -288,15 +291,17 @@ void check_ffmpeg_availibility() {
}
}

std::string generate_temp_filename(const std::string &prefix, const std::string &extension) {
std::string generate_temp_filename(const std::string &path, const std::string &prefix, const std::string &extension) {
auto now = std::chrono::system_clock::now();
auto now_time_t = std::chrono::system_clock::to_time_t(now);

static std::mt19937 rng{std::random_device{}()};
std::uniform_int_distribution<long long> dist(0, 1e9);

std::stringstream ss;
ss << prefix
ss << path
<< std::filesystem::path::preferred_separator
<< prefix
<< "-"
<< std::put_time(std::localtime(&now_time_t), "%Y%m%d-%H%M%S")
<< "-"
Expand Down Expand Up @@ -816,7 +821,7 @@ int main(int argc, char ** argv) {
if (sparams.ffmpeg_converter) {
// if file is not wav, convert to wav
// write to temporary file
const std::string temp_filename = generate_temp_filename("whisper-server", ".wav");
const std::string temp_filename = generate_temp_filename(sparams.tmp_dir, "whisper-server", ".wav");
std::ofstream temp_file{temp_filename, std::ios::binary};
temp_file << audio_file.content;
temp_file.close();
Expand Down
Loading