-
Notifications
You must be signed in to change notification settings - Fork 791
Add parakeet to examples/models #16349
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
8361c9a
ec96225
47ae3e0
a2a4687
7d39ed7
af801b1
fa22b18
1829505
54a6319
09cb1be
fa66bb4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,112 @@ | ||
| # Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| # All rights reserved. | ||
| # | ||
| # This source code is licensed under the BSD-style license found in the | ||
| # LICENSE file in the root directory of this source tree. | ||
|
|
||
| cmake_minimum_required(VERSION 3.24) | ||
| project(parakeet_runner) | ||
|
|
||
| set(CMAKE_CXX_STANDARD 17) | ||
| set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
|
|
||
| set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..) | ||
|
|
||
| include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) | ||
|
|
||
| if(CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$") | ||
| set(CMAKE_TOOLCHAIN_IOS ON) | ||
| else() | ||
| set(CMAKE_TOOLCHAIN_IOS OFF) | ||
| endif() | ||
|
|
||
| # Let files say "include <executorch/path/to/header.h>" | ||
| set(_common_include_directories ${EXECUTORCH_ROOT}/..) | ||
|
|
||
| # Need this for gflags | ||
| set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags) | ||
| find_package(gflags REQUIRED) | ||
|
|
||
| # Find executorch libraries | ||
| list(APPEND CMAKE_FIND_ROOT_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../..) | ||
| find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH) | ||
| executorch_target_link_options_shared_lib(executorch) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think you need this
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have ET as a dep which part are you saying I dont need? |
||
|
|
||
| set(link_libraries executorch gflags) | ||
|
|
||
| # Common ops for all builds | ||
| list(APPEND link_libraries optimized_native_cpu_ops_lib cpublas eigen_blas) | ||
| executorch_target_link_options_shared_lib(optimized_native_cpu_ops_lib) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably not needed
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its used in the preprocessor iiuc |
||
|
|
||
| # CPU-only builds need quantized and custom ops | ||
| if(NOT EXECUTORCH_BUILD_CUDA AND MSVC) | ||
| list(APPEND link_libraries quantized_ops_lib custom_ops) | ||
| executorch_target_link_options_shared_lib(quantized_ops_lib) | ||
| executorch_target_link_options_shared_lib(custom_ops) | ||
| endif() | ||
|
|
||
| # XNNPACK | ||
| if(TARGET xnnpack_backend) | ||
| set(xnnpack_backend_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod) | ||
| if(TARGET kleidiai) | ||
| list(APPEND xnnpack_backend_libs kleidiai) | ||
| endif() | ||
| list(APPEND link_libraries ${xnnpack_backend_libs}) | ||
| executorch_target_link_options_shared_lib(xnnpack_backend) | ||
| endif() | ||
|
|
||
| # Needed for cpuinfo where it uses android specific log lib | ||
| if(ANDROID) | ||
| list(APPEND link_libraries log) | ||
| endif() | ||
|
|
||
| # Add the required ExecuTorch extensions | ||
| list( | ||
| APPEND | ||
| link_libraries | ||
| extension_llm_runner | ||
| extension_module | ||
| extension_data_loader | ||
| extension_tensor | ||
| extension_flat_tensor | ||
| tokenizers::tokenizers | ||
| ) | ||
|
|
||
| # Link CUDA backend | ||
| if(EXECUTORCH_BUILD_CUDA) | ||
| find_package(CUDAToolkit REQUIRED) | ||
| list(APPEND link_libraries aoti_cuda_backend) | ||
| if(NOT MSVC) | ||
| executorch_target_link_options_shared_lib(aoti_cuda_backend) | ||
| endif() | ||
| endif() | ||
|
|
||
| if(EXECUTORCH_BUILD_METAL) | ||
| list(APPEND link_libraries metal_backend) | ||
| executorch_target_link_options_shared_lib(metal_backend) | ||
| endif() | ||
|
|
||
| add_executable(parakeet_runner main.cpp) | ||
| if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") | ||
| target_link_options_gc_sections(parakeet_runner) | ||
| if(NOT APPLE AND NOT MSVC) | ||
| target_link_options(parakeet_runner PRIVATE "LINKER:-s") | ||
| endif() | ||
| endif() | ||
|
|
||
| target_include_directories( | ||
| parakeet_runner PUBLIC ${_common_include_directories} | ||
| ) | ||
| target_link_libraries(parakeet_runner PUBLIC ${link_libraries}) | ||
| target_compile_options(parakeet_runner PUBLIC ${_common_compile_options}) | ||
|
|
||
| # On Windows, copy required DLLs to the executable directory | ||
| if(MSVC AND EXECUTORCH_BUILD_CUDA) | ||
| add_custom_command( | ||
| TARGET parakeet_runner | ||
| POST_BUILD | ||
| COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:aoti_cuda_shims> | ||
| $<TARGET_FILE_DIR:parakeet_runner> | ||
| COMMENT "Copying aoti_cuda_shims.dll to parakeet_runner directory" | ||
| ) | ||
| endif() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,110 @@ | ||
| { | ||
| "version": 6, | ||
| "configurePresets": [ | ||
| { | ||
| "name": "parakeet-base", | ||
| "hidden": true, | ||
| "binaryDir": "${sourceDir}/../../../cmake-out/examples/models/parakeet", | ||
| "cacheVariables": { | ||
| "CMAKE_BUILD_TYPE": "Release", | ||
| "CMAKE_FIND_ROOT_PATH": "${sourceDir}/../../../cmake-out", | ||
| "CMAKE_PREFIX_PATH": "${sourceDir}/../../../cmake-out" | ||
| } | ||
| }, | ||
| { | ||
| "name": "parakeet-cpu", | ||
| "displayName": "Parakeet runner (CPU)", | ||
| "inherits": ["parakeet-base"] | ||
| }, | ||
| { | ||
| "name": "parakeet-cuda", | ||
| "displayName": "Parakeet runner (CUDA)", | ||
| "inherits": ["parakeet-base"], | ||
| "cacheVariables": { | ||
| "EXECUTORCH_BUILD_CUDA": "ON" | ||
| }, | ||
| "condition": { | ||
| "type": "inList", | ||
| "string": "${hostSystemName}", | ||
| "list": ["Linux", "Windows"] | ||
| } | ||
| }, | ||
| { | ||
| "name": "parakeet-metal", | ||
| "displayName": "Parakeet runner (Metal)", | ||
| "inherits": ["parakeet-base"], | ||
| "cacheVariables": { | ||
| "EXECUTORCH_BUILD_METAL": "ON" | ||
| }, | ||
| "condition": { | ||
| "lhs": "${hostSystemName}", | ||
| "type": "equals", | ||
| "rhs": "Darwin" | ||
| } | ||
| } | ||
| ], | ||
| "buildPresets": [ | ||
| { | ||
| "name": "parakeet-cpu", | ||
| "displayName": "Build Parakeet runner (CPU)", | ||
| "configurePreset": "parakeet-cpu", | ||
| "targets": ["parakeet_runner"] | ||
| }, | ||
| { | ||
| "name": "parakeet-cuda", | ||
| "displayName": "Build Parakeet runner (CUDA)", | ||
| "configurePreset": "parakeet-cuda", | ||
| "targets": ["parakeet_runner"] | ||
| }, | ||
| { | ||
| "name": "parakeet-metal", | ||
| "displayName": "Build Parakeet runner (Metal)", | ||
| "configurePreset": "parakeet-metal", | ||
| "targets": ["parakeet_runner"] | ||
| } | ||
| ], | ||
| "workflowPresets": [ | ||
| { | ||
| "name": "parakeet-cpu", | ||
| "displayName": "Configure and build Parakeet runner (CPU)", | ||
| "steps": [ | ||
| { | ||
| "type": "configure", | ||
| "name": "parakeet-cpu" | ||
| }, | ||
| { | ||
| "type": "build", | ||
| "name": "parakeet-cpu" | ||
| } | ||
| ] | ||
| }, | ||
| { | ||
| "name": "parakeet-cuda", | ||
| "displayName": "Configure and build Parakeet runner (CUDA)", | ||
| "steps": [ | ||
| { | ||
| "type": "configure", | ||
| "name": "parakeet-cuda" | ||
| }, | ||
| { | ||
| "type": "build", | ||
| "name": "parakeet-cuda" | ||
| } | ||
| ] | ||
| }, | ||
| { | ||
| "name": "parakeet-metal", | ||
| "displayName": "Configure and build Parakeet runner (Metal)", | ||
| "steps": [ | ||
| { | ||
| "type": "configure", | ||
| "name": "parakeet-metal" | ||
| }, | ||
| { | ||
| "type": "build", | ||
| "name": "parakeet-metal" | ||
| } | ||
| ] | ||
| } | ||
| ] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| # Parakeet TDT Export for ExecuTorch | ||
|
|
||
| Export [nvidia/parakeet-tdt-0.6b-v3](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3) speech recognition model to ExecuTorch. | ||
|
|
||
| ## Installation | ||
|
|
||
| ```bash | ||
| pip install nemo_toolkit[asr] torchaudio | ||
| ``` | ||
|
|
||
| ## Export | ||
|
|
||
| Export the model: | ||
| ```bash | ||
| python export_parakeet_tdt.py | ||
| ``` | ||
|
|
||
| Test transcription on an audio file and compare eager vs lowered results: | ||
| ```bash | ||
| python export_parakeet_tdt.py --audio /path/to/audio.wav | ||
| ``` | ||
|
|
||
| ### Export Arguments | ||
|
|
||
| | Argument | Description | | ||
| |----------|-------------| | ||
| | `--output-dir` | Output directory for exports (default: `./parakeet_tdt_exports`) | | ||
| | `--backend` | Backend for acceleration: `portable`, `xnnpack`, `cuda`, `cuda-windows` (default: `portable`) | | ||
| | `--audio` | Path to audio file for transcription test | | ||
|
|
||
| **Note:** The preprocessor is always lowered with the portable backend regardless of the `--backend` setting. | ||
|
|
||
| ## C++ Runner | ||
|
|
||
| ### Building | ||
|
|
||
| First, build ExecuTorch with the LLM preset from the executorch root directory: | ||
|
|
||
| ```bash | ||
| cmake --workflow --preset llm-release | ||
| ``` | ||
|
|
||
| Then build the parakeet runner: | ||
|
|
||
| ```bash | ||
| cd examples/models/parakeet | ||
| cmake --workflow --preset parakeet-cpu | ||
| ``` | ||
|
|
||
| Available presets: | ||
| - `parakeet-cpu` - CPU-only build | ||
| - `parakeet-cuda` - CUDA acceleration (Linux/Windows) | ||
| - `parakeet-metal` - Metal acceleration (macOS) | ||
|
|
||
| ### Running | ||
|
|
||
| From the executorch root directory: | ||
|
|
||
| ```bash | ||
| ./cmake-out/examples/models/parakeet/parakeet_runner \ | ||
| --model_path examples/models/parakeet/parakeet_tdt_exports/parakeet_tdt.pte \ | ||
| --audio_path /path/to/audio.wav \ | ||
| --tokenizer_path examples/models/parakeet/tokenizer.model | ||
| ``` | ||
|
|
||
| ### Runner Arguments | ||
|
|
||
| | Argument | Description | | ||
| |----------|-------------| | ||
| | `--model_path` | Path to Parakeet model (.pte) | | ||
| | `--audio_path` | Path to input audio file (.wav) | | ||
| | `--tokenizer_path` | Path to tokenizer file (default: `tokenizer.json`) | | ||
| | `--data_path` | Path to data file (.ptd) for delegate data (optional, required for CUDA) | |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this necessary?