diff --git a/CMakePresets.json b/CMakePresets.json
index 4d8b70f08b2..c8fba2b6a41 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -309,6 +309,14 @@
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/arm_ethosu_linux.cmake",
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/arm/ethos-u-setup/aarch64-linux-musl-toolchain.cmake"
}
+ },
+ {
+ "name": "esp-baremetal",
+ "displayName": "Build ExecuTorch for ESP baremetal",
+ "inherits": ["common"],
+ "cacheVariables": {
+ "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/esp_baremetal.cmake"
+ }
}
],
"buildPresets": [
diff --git a/examples/espressif/README.md b/examples/espressif/README.md
new file mode 100644
index 00000000000..5c345b4d98f
--- /dev/null
+++ b/examples/espressif/README.md
@@ -0,0 +1,278 @@
+# ExecuTorch Executor Runner for Espressif ESP32/ESP32-S3
+
+> **:warning: **This example is not tested in CI. Use at your own risk.****
+
+This example demonstrates how to run an ExecuTorch model on Espressif ESP32 and
+ESP32-S3 microcontrollers. It is based on the
+[Arm Cortex-M executor runner](../arm/executor_runner/) and adapted for the
+ESP-IDF build system and ESP32 memory architecture.
+
+## Supported Targets
+
+| Chip | CPU | Internal SRAM | PSRAM (optional) |
+|----------|---------------|---------------|------------------|
+| ESP32 | Xtensa LX6 (dual-core, 240MHz) | ~520KB | 4-8MB |
+| ESP32-S3 | Xtensa LX7 (dual-core, 240MHz) | ~512KB | 2-32MB (Octal) |
+
+## Prerequisites
+
+1. **ESP-IDF v5.1+**: Install the ESP-IDF toolchain following the
+ [official guide](https://docs.espressif.com/projects/esp-idf/en/stable/esp32/get-started/).
+
+2. **ExecuTorch**: Clone and set up ExecuTorch:
+ ```bash
+ git clone https://github.com/pytorch/executorch.git
+ cd executorch
+ pip install -e .
+ ```
+
+3. **Cross-compiled ExecuTorch libraries**: Build ExecuTorch for the ESP32
+ target. See the [Cross-Compilation](#cross-compiling-executorch) section.
+
+4. **A .pte model file**: Export a PyTorch model to the ExecuTorch `.pte`
+ format. For small models suitable for ESP32, consider:
+ - A simple add/multiply model
+ - MobileNet V2 (quantized, with PSRAM)
+ - Custom small models
+
+## Project Structure
+
+```
+examples/espressif/
+├── README.md # This file
+├── build.sh # Build helper script
+├── executor_runner/
+│ ├── CMakeLists.txt # Component/standalone CMake build
+│ ├── esp_executor_runner.cpp # Main executor runner
+│ ├── esp_memory_allocator.h # Custom memory allocator
+│ ├── esp_memory_allocator.cpp
+│ ├── esp_perf_monitor.h # Performance monitoring
+│ ├── esp_perf_monitor.cpp
+│ └── pte_to_header.py # Convert .pte to C header
+└── project/
+ ├── CMakeLists.txt # ESP-IDF project file
+ ├── sdkconfig.defaults # Default ESP-IDF configuration
+ ├── sdkconfig.defaults.esp32s3 # ESP32-S3 specific config
+ ├── partitions.csv # Example partition table; adjust app partition size for your board and model
+ └── main/
+ ├── CMakeLists.txt # Main component
+ └── main.cpp # Entry point
+```
+
+## Quick Start
+
+The following example has been tested only on an ESP32-S3 dev board with 8 MB of Octal PSRAM. You may need to adjust the `sdkconfig` file for your specific board.
+
+### 1. Export a simple model
+
+```python
+import torch
+from executorch.exir import to_edge
+
+class SimpleModel(torch.nn.Module):
+ def forward(self, x):
+ return x + x
+
+model = SimpleModel()
+example_input = (torch.randn(1, 8),)
+
+# Export to ExecuTorch
+exported = torch.export.export(model, example_input)
+edge = to_edge(exported)
+et_program = edge.to_executorch()
+
+with open("simple_add.pte", "wb") as f:
+ f.write(et_program.buffer)
+```
+
+### 2. Convert the model to a C header
+
+```bash
+python3 examples/espressif/executor_runner/pte_to_header.py \
+ --pte simple_add.pte \
+ --outdir examples/espressif/project/
+```
+
+### 3. Build with ESP-IDF
+
+```bash
+# Source ESP-IDF environment
+. $IDF_PATH/export.sh
+
+# Using the build script:
+./examples/espressif/build.sh --target esp32s3 --pte simple_add.pte
+
+# Or manually:
+cd examples/espressif/project
+idf.py set-target esp32s3
+idf.py build
+```
+
+### 4. Flash and Monitor
+
+```bash
+cd examples/espressif/project
+idf.py -p /dev/ttyUSB0 flash monitor
+```
+
+You should see output like:
+```
+Starting executorch runner !
+I [executorch:esp_executor_runner.cpp:237 et_pal_init()] ESP32 ExecuTorch runner initialized. Free heap: 6097812 bytes.
+I [executorch:esp_executor_runner.cpp:242 et_pal_init()] PSRAM available. Free PSRAM: 5764716 bytes.
+I [executorch:esp_executor_runner.cpp:1047 executor_runner_main()] PTE @ 0x3c05f9f0 [----ET12]
+I [executorch:esp_executor_runner.cpp:568 runner_init()] PTE Model data loaded. Size: 952 bytes.
+I [executorch:esp_executor_runner.cpp:583 runner_init()] Model buffer loaded, has 1 methods
+I [executorch:esp_executor_runner.cpp:593 runner_init()] Running method forward
+I [executorch:esp_executor_runner.cpp:604 runner_init()] Setup Method allocator pool. Size: 2097152 bytes.
+I [executorch:esp_executor_runner.cpp:620 runner_init()] Setting up planned buffer 0, size 64.
+I [executorch:esp_executor_runner.cpp:716 runner_init()] Method 'forward' loaded.
+I [executorch:esp_executor_runner.cpp:718 runner_init()] Preparing inputs...
+I [executorch:esp_executor_runner.cpp:780 runner_init()] Input prepared.
+I [executorch:esp_executor_runner.cpp:979 run_model()] Starting running 1 inferences...
+I [executorch:esp_perf_monitor.cpp:41 StopMeasurements()] Profiler report:
+I [executorch:esp_perf_monitor.cpp:42 StopMeasurements()] Number of inferences: 1
+I [executorch:esp_perf_monitor.cpp:43 StopMeasurements()] Total CPU cycles: 49545 (49545.00 per inference)
+I [executorch:esp_perf_monitor.cpp:48 StopMeasurements()] Total wall time: 205 us (205.00 us per inference)
+I [executorch:esp_perf_monitor.cpp:53 StopMeasurements()] Average inference time: 0.205 ms
+I [executorch:esp_perf_monitor.cpp:59 StopMeasurements()] Free heap: 6097576 bytes
+I [executorch:esp_perf_monitor.cpp:63 StopMeasurements()] Min free heap ever: 6097576 bytes
+I [executorch:esp_executor_runner.cpp:999 run_model()] 1 inferences finished
+I [executorch:esp_executor_runner.cpp:867 print_outputs()] 1 outputs:
+Output[0][0]: (float) 2.000000
+Output[0][1]: (float) 2.000000
+Output[0][2]: (float) 2.000000
+Output[0][3]: (float) 2.000000
+Output[0][4]: (float) 2.000000
+Output[0][5]: (float) 2.000000
+Output[0][6]: (float) 2.000000
+Output[0][7]: (float) 2.000000
+
+```
+
+## Cross-Compiling ExecuTorch
+
+ExecuTorch needs to be cross-compiled for the ESP32 target (Xtensa architecture).
+
+### Using the ESP-IDF toolchain
+
+```bash
+# Set up the cross-compilation toolchain
+export IDF_TARGET=esp32s3 # or esp32
+
+# Configure ExecuTorch build for ESP32
+#Make sure to adjust the list of ops for your model or alter to use one of the selective build methods
+cmake --preset esp-baremetal -B cmake-out-esp \
+ -DCMAKE_TOOLCHAIN_FILE=$IDF_PATH/tools/cmake/toolchain-${IDF_TARGET}.cmake \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DEXECUTORCH_BUILD_DEVTOOLS=ON \
+ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=OFF \
+ -DEXECUTORCH_SELECT_OPS_LIST="aten::add.out," \
+ .
+
+cmake --build cmake-out-esp -j$(nproc)
+cmake --build cmake-out-esp --target install
+```
+
+## Memory Considerations
+
+### ESP32 (no PSRAM)
+- Total available SRAM: ~520KB (shared between code and data)
+- Recommended method allocator pool: 128-256KB
+- Recommended scratch pool: 64-128KB
+- **Only very small models will fit!**
+
+### ESP32 / ESP32-S3 with PSRAM
+- Internal SRAM: ~512KB (used for code and fast data)
+- PSRAM: 2-32MB (used for model data and large buffers)
+- Recommended method allocator pool: 1-4MB
+- Recommended scratch pool: 256KB-1MB
+
+### Configuring Memory Pools
+
+Memory pool sizes auto-adjust based on PSRAM availability. Override with:
+
+```cmake
+# In your project CMakeLists.txt or via idf.py menuconfig
+set(ET_ESP_METHOD_ALLOCATOR_POOL_SIZE "1048576") # 1MB
+set(ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE "524288") # 512KB
+```
+
+Or as compile definitions:
+```bash
+idf.py build -DET_ESP_METHOD_ALLOCATOR_POOL_SIZE=1048576
+```
+
+## Loading Models
+
+### Compiled-in (default)
+The model `.pte` file is converted to a C array and compiled into the firmware.
+This is the simplest approach but increases firmware size.
+
+### Filesystem (SPIFFS/LittleFS)
+For larger models, load from the filesystem at runtime:
+
+1. Add `-DFILESYSTEM_LOAD=ON` to your build
+2. Create a SPIFFS partition with your model:
+ ```bash
+ # Add to partitions.csv:
+ # storage, data, spiffs, , 0x200000
+
+ # Create and flash SPIFFS image:
+ $IDF_PATH/components/spiffs/spiffsgen.py 0x200000 model_dir spiffs.bin
+ esptool.py write_flash 0x210000 spiffs.bin
+ ```
+
+## Configuration Options
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `ET_NUM_INFERENCES` | 1 | Number of inference runs |
+| `ET_LOG_DUMP_INPUT` | OFF | Log input tensor values |
+| `ET_LOG_DUMP_OUTPUT` | ON | Log output tensor values |
+| `ET_BUNDLE_IO` | OFF | Enable BundleIO test support |
+| `ET_EVENT_TRACER_ENABLED` | OFF | Enable ETDump profiling |
+| `FILESYSTEM_LOAD` | OFF | Load model from filesystem |
+| `ET_ESP_METHOD_ALLOCATOR_POOL_SIZE` | Auto | Method allocator size |
+| `ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE` | Auto | Scratch allocator size |
+
+## Differences from the Arm Example
+
+| Feature | Arm (Cortex-M) | ESP32/ESP32-S3 |
+|---------|----------------|----------------|
+| Build system | Bare-metal CMake + Arm toolchain | ESP-IDF (FreeRTOS-based) |
+| NPU | Ethos-U55/U65/U85 | None (CPU only) |
+| Memory | ITCM/DTCM/SRAM/DDR via linker script | IRAM/DRAM/PSRAM via ESP-IDF |
+| Performance monitor | ARM PMU + Ethos-U PMU | CPU cycle counter + esp_timer |
+| Semihosting | FVP simulator filesystem access | SPIFFS/LittleFS/SD filesystem |
+| Entry point | `main()` bare-metal | `app_main()` via FreeRTOS |
+| Timing | ARM_PMU_Get_CCNTR() | esp_cpu_get_cycle_count() |
+
+## Troubleshooting
+
+### Model too large for flash
+- Use filesystem loading (`FILESYSTEM_LOAD=ON`) with SPIFFS or SD card
+- Quantize the model to reduce size
+- Use a simpler/smaller model architecture
+
+### Out of memory during inference
+- Enable PSRAM if your board has it (`CONFIG_SPIRAM=y`)
+- Increase memory pool sizes
+- Use a smaller model
+- Check `log_mem_status()` output for memory usage details
+
+### Build errors with ExecuTorch libraries
+- Ensure ExecuTorch was cross-compiled with the same ESP-IDF toolchain
+- Check that `ET_BUILD_DIR_PATH` points to the correct build directory
+- Verify the target architecture matches (Xtensa LX6 for ESP32, LX7 for ESP32-S3)
+
+### Watchdog timer resets
+- Long inference times may trigger the task watchdog
+- Disable with `CONFIG_ESP_TASK_WDT_EN=n` in sdkconfig
+- Or increase the timeout: `CONFIG_ESP_TASK_WDT_TIMEOUT_S=30`
+
+## License
+
+This project is licensed under the BSD-style license found in the
+[LICENSE](../../../LICENSE) file in the root directory of the ExecuTorch
+source tree.
diff --git a/examples/espressif/build.sh b/examples/espressif/build.sh
new file mode 100755
index 00000000000..fd23aa0d7c2
--- /dev/null
+++ b/examples/espressif/build.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Build script for the ExecuTorch ESP32 executor runner example.
+#
+# Prerequisites:
+# - ESP-IDF v5.1+ installed and sourced (. $IDF_PATH/export.sh)
+# - ExecuTorch cross-compiled for the ESP32 target
+# - Python 3.8+
+#
+# Usage:
+# ./build.sh [--target esp32|esp32s3] [--pte ] [--clean]
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ET_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
+PROJECT_DIR="${SCRIPT_DIR}/project"
+TARGET="esp32s3"
+PTE_FILE=""
+CLEAN=false
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --target)
+ TARGET="$2"
+ shift 2
+ ;;
+ --pte)
+ PTE_FILE="$2"
+ shift 2
+ ;;
+ --clean)
+ CLEAN=true
+ shift
+ ;;
+ --help|-h)
+ echo "Usage: $0 [--target esp32|esp32s3] [--pte ] [--clean]"
+ echo ""
+ echo "Options:"
+ echo " --target ESP32 target chip (default: esp32s3)"
+ echo " --pte Path to the .pte model file to embed"
+ echo " --clean Clean build directory before building"
+ exit 0
+ ;;
+ *)
+ echo "Unknown option: $1"
+ exit 1
+ ;;
+ esac
+done
+
+# Validate environment
+if [ -z "${IDF_PATH:-}" ]; then
+ echo "ERROR: IDF_PATH is not set. Please source ESP-IDF:"
+ echo " . \$IDF_PATH/export.sh"
+ exit 1
+fi
+
+echo "=== ExecuTorch ESP32 Executor Runner Build ==="
+echo "Target: ${TARGET}"
+echo "ExecuTorch root: ${ET_ROOT}"
+echo "ESP-IDF: ${IDF_PATH}"
+
+# Convert PTE to header if provided
+if [ -n "${PTE_FILE}" ]; then
+ if [ ! -f "${PTE_FILE}" ]; then
+ echo "ERROR: PTE file not found: ${PTE_FILE}"
+ exit 1
+ fi
+
+ echo "Converting PTE to header: ${PTE_FILE}"
+ HEADER_DIR="${PROJECT_DIR}"
+ mkdir -p "${HEADER_DIR}"
+ python3 "${SCRIPT_DIR}/executor_runner/pte_to_header.py" \
+ --pte "${PTE_FILE}" \
+ --outdir "${HEADER_DIR}"
+ echo "Model header generated: ${HEADER_DIR}/model_pte.h"
+fi
+
+# Navigate to project directory
+cd "${PROJECT_DIR}"
+
+# Clean if requested
+if [ "${CLEAN}" = true ]; then
+ echo "Cleaning build directory..."
+ rm -rf build sdkconfig
+fi
+# Set target
+echo "Setting target to ${TARGET}..."
+idf.py set-target "${TARGET}"
+
+# Build
+echo "Building..."
+idf.py build
+
+echo ""
+echo "=== Build complete ==="
+echo ""
+echo "To flash and monitor:"
+echo " cd ${PROJECT_DIR}"
+echo " idf.py -p /dev/ttyUSB0 flash monitor"
+echo ""
+echo "To just monitor:"
+echo " idf.py -p /dev/ttyUSB0 monitor"
diff --git a/examples/espressif/executor_runner/CMakeLists.txt b/examples/espressif/executor_runner/CMakeLists.txt
new file mode 100644
index 00000000000..63d701d38f1
--- /dev/null
+++ b/examples/espressif/executor_runner/CMakeLists.txt
@@ -0,0 +1,305 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# ESP-IDF component CMakeLists.txt for the ExecuTorch executor runner.
+#
+# This file defines the executor_runner as an ESP-IDF component.
+# It is designed to work with the ESP-IDF build system (idf.py build).
+#
+# Project structure expected:
+# my_project/
+# ├── CMakeLists.txt (project-level, uses this as a component)
+# ├── main/
+# │ └── CMakeLists.txt (main component, depends on executor_runner)
+# └── components/
+# └── executor_runner/ (this component - symlink or copy)
+#
+# Or you can use this CMakeLists.txt directly as a standalone CMake build
+# for cross-compilation testing.
+
+cmake_minimum_required(VERSION 3.16)
+
+# ─── Option: ESP-IDF component mode vs. standalone CMake mode ───
+if(ESP_PLATFORM)
+ # ═══════════════════════════════════════════════════════════════
+ # ESP-IDF Component Build
+ # ═══════════════════════════════════════════════════════════════
+ idf_component_register(
+ SRCS
+ "esp_executor_runner.cpp"
+ "esp_pal.cpp"
+ "esp_memory_allocator.cpp"
+ "esp_perf_monitor.cpp"
+ INCLUDE_DIRS
+ "."
+ REQUIRES
+ esp_timer
+ esp_system
+ spiffs
+ )
+
+ # ExecuTorch pre-built library paths
+ set(ET_DIR_PATH
+ "${CMAKE_CURRENT_SOURCE_DIR}/../../.."
+ CACHE PATH "Path to ExecuTorch source dir"
+ )
+ set(ET_BUILD_DIR_PATH
+ "${ET_DIR_PATH}/cmake-out-esp"
+ CACHE PATH "Path to ExecuTorch build/install dir for ESP target"
+ )
+ set(ET_PTE_FILE_PATH
+ ""
+ CACHE PATH "Path to ExecuTorch model .pte file"
+ )
+ set(PYTHON_EXECUTABLE
+ "python3"
+ CACHE PATH "Python executable"
+ )
+
+ set(ET_NUM_INFERENCES
+ "10"
+ CACHE STRING "Number of inferences to run"
+ )
+ option(ET_LOG_DUMP_INPUT "Dump input in log" OFF)
+ option(ET_LOG_DUMP_OUTPUT "Dump output in log" ON)
+ option(ET_BUNDLE_IO "Set to compile in BundleIO support" OFF)
+ set(ET_ATOL "0.01" CACHE STRING "Absolute tolerance for BundleIO testing")
+ set(ET_RTOL "0.01" CACHE STRING "Relative tolerance for BundleIO testing")
+ option(ET_DUMP_OUTPUTS "Collect and print outputs as base64 in log" OFF)
+ option(ET_DUMP_INTERMEDIATE_OUTPUTS "Collect and print intermediate outputs" OFF)
+ set(ET_DEBUG_BUFFER_SIZE "65536" CACHE STRING "Size of ETDump debug buffer")
+ option(FILESYSTEM_LOAD "Load model from filesystem instead of compiled-in data" OFF)
+
+ # Directory containing the generated model_pte.h header.
+ # By default this is the project source directory (where build.sh places it),
+ # but it can be overridden if you generate the header elsewhere.
+ set(ET_MODEL_HEADER_DIR
+ "${CMAKE_SOURCE_DIR}"
+ CACHE PATH "Directory containing the generated model_pte.h header"
+ )
+
+ # Memory pool sizes
+ set(ET_ESP_METHOD_ALLOCATOR_POOL_SIZE "" CACHE STRING
+ "Method allocator pool size (empty = auto based on PSRAM availability)")
+ set(ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE "" CACHE STRING
+ "Scratch temp allocator pool size (empty = auto based on PSRAM availability)")
+
+ # Find pre-built ExecuTorch libraries.
+ # TARGETS_GLOBAL is needed because ESP-IDF's project.cmake resolves link
+ # dependencies from the top-level project scope, but find_package runs
+ # inside this component's directory scope. Without GLOBAL, the imported
+ # targets (executorch, portable_kernels, etc.) are invisible at the
+ # project level and you get "No target executorch" errors.
+ set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL TRUE)
+ find_package(
+ executorch REQUIRED HINTS "${ET_BUILD_DIR_PATH}/lib/cmake/ExecuTorch"
+ )
+
+ # Convert pte to header if not using filesystem loading
+ if(NOT FILESYSTEM_LOAD AND ET_PTE_FILE_PATH)
+ add_custom_target(
+ gen_model_header DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/model_pte.h
+ )
+ add_custom_command(
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/model_pte.h
+ COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/pte_to_header.py
+ --pte ${ET_PTE_FILE_PATH}
+ --outdir ${CMAKE_CURRENT_BINARY_DIR}
+ DEPENDS ${ET_PTE_FILE_PATH}
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ )
+ add_dependencies(${COMPONENT_LIB} gen_model_header)
+ endif()
+
+ # Include directories
+ target_include_directories(
+ ${COMPONENT_LIB}
+ PRIVATE
+ ${ET_DIR_PATH}/..
+ ${ET_DIR_PATH}/runtime/core/portable_type/c10
+ ${CMAKE_CURRENT_BINARY_DIR}
+ ${ET_MODEL_HEADER_DIR}
+ )
+
+ # Link ExecuTorch libraries
+ set(esp_runner_libs)
+ list(APPEND esp_runner_libs
+ extension_runner_util
+ executorch
+ executorch_selected_kernels
+ )
+
+ if(TARGET xnnpack_backend)
+ list(APPEND esp_runner_libs xnnpack_backend)
+ endif()
+
+ if(EXECUTORCH_ENABLE_EVENT_TRACER)
+ target_compile_definitions(${COMPONENT_LIB} PUBLIC ET_EVENT_TRACER_ENABLED)
+ list(APPEND esp_runner_libs etdump flatccrt)
+ endif()
+
+ if(ET_BUNDLE_IO)
+ list(APPEND esp_runner_libs bundled_program)
+ endif()
+
+ target_link_libraries(${COMPONENT_LIB} PUBLIC ${esp_runner_libs})
+
+ # Compile definitions
+ target_compile_definitions(
+ ${COMPONENT_LIB} PRIVATE C10_USING_CUSTOM_GENERATED_MACROS
+ )
+
+ if(ET_NUM_INFERENCES)
+ target_compile_definitions(
+ ${COMPONENT_LIB} PUBLIC ET_NUM_INFERENCES=${ET_NUM_INFERENCES}
+ )
+ endif()
+
+ if(ET_LOG_DUMP_INPUT)
+ target_compile_definitions(${COMPONENT_LIB} PUBLIC ET_LOG_DUMP_INPUT)
+ endif()
+
+ if(ET_LOG_DUMP_OUTPUT)
+ target_compile_definitions(${COMPONENT_LIB} PUBLIC ET_LOG_DUMP_OUTPUT)
+ endif()
+
+ if(ET_BUNDLE_IO)
+ target_compile_definitions(${COMPONENT_LIB} PUBLIC ET_BUNDLE_IO)
+ endif()
+
+ if(ET_ATOL)
+ target_compile_definitions(${COMPONENT_LIB} PUBLIC ET_ATOL=${ET_ATOL})
+ endif()
+
+ if(ET_RTOL)
+ target_compile_definitions(${COMPONENT_LIB} PUBLIC ET_RTOL=${ET_RTOL})
+ endif()
+
+ if(ET_DUMP_OUTPUTS)
+ target_compile_definitions(${COMPONENT_LIB} PUBLIC ET_DUMP_OUTPUTS)
+ endif()
+
+ if(ET_DUMP_INTERMEDIATE_OUTPUTS)
+ target_compile_definitions(
+ ${COMPONENT_LIB} PUBLIC ET_DUMP_INTERMEDIATE_OUTPUTS
+ )
+ endif()
+
+ if(ET_DEBUG_BUFFER_SIZE)
+ target_compile_definitions(
+ ${COMPONENT_LIB} PUBLIC ET_DEBUG_BUFFER_SIZE=${ET_DEBUG_BUFFER_SIZE}
+ )
+ endif()
+
+ if(FILESYSTEM_LOAD)
+ target_compile_definitions(${COMPONENT_LIB} PUBLIC FILESYSTEM_LOAD)
+ endif()
+
+ if(ET_ESP_METHOD_ALLOCATOR_POOL_SIZE)
+ target_compile_definitions(
+ ${COMPONENT_LIB}
+ PUBLIC ET_ESP_METHOD_ALLOCATOR_POOL_SIZE=${ET_ESP_METHOD_ALLOCATOR_POOL_SIZE}
+ )
+ endif()
+
+ if(ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE)
+ target_compile_definitions(
+ ${COMPONENT_LIB}
+ PUBLIC ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE=${ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE}
+ )
+ endif()
+
+else()
+ # ═══════════════════════════════════════════════════════════════
+ # Standalone CMake Build (for host testing / cross-compilation)
+ # ═══════════════════════════════════════════════════════════════
+ project(esp_executor_runner)
+
+ set(ET_DIR_PATH
+ "${CMAKE_CURRENT_SOURCE_DIR}/../../.."
+ CACHE PATH "Path to ExecuTorch dir"
+ )
+ include(${ET_DIR_PATH}/tools/cmake/Utils.cmake)
+ set(ET_BUILD_DIR_PATH
+ "${ET_DIR_PATH}/cmake-out"
+ CACHE PATH "Path to ExecuTorch build/install dir"
+ )
+ set(ET_INCLUDE_PATH
+ "${ET_DIR_PATH}/.."
+ CACHE PATH "Path to ExecuTorch headers"
+ )
+ set(ET_PTE_FILE_PATH
+ ""
+ CACHE PATH "Path to ExecuTorch model pte"
+ )
+ set(PYTHON_EXECUTABLE
+ "python3"
+ CACHE PATH "Python executable"
+ )
+
+ set(ET_NUM_INFERENCES "1" CACHE STRING "Number of inferences to run")
+ option(ET_LOG_DUMP_OUTPUT "Dump output in log" ON)
+
+ if(NOT DEFINED ET_PTE_FILE_PATH OR ET_PTE_FILE_PATH STREQUAL "")
+ message(FATAL_ERROR "ET_PTE_FILE_PATH must be set to the .pte model file")
+ endif()
+
+ find_package(
+ executorch REQUIRED HINTS "${ET_BUILD_DIR_PATH}/lib/cmake/ExecuTorch"
+ )
+
+ # Convert pte to header
+ add_custom_target(
+ gen_model_header DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/model_pte.h
+ )
+ add_custom_command(
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/model_pte.h
+ COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/pte_to_header.py --pte
+ ${ET_PTE_FILE_PATH} --outdir ${CMAKE_CURRENT_BINARY_DIR}
+ DEPENDS ${ET_PTE_FILE_PATH}
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ )
+
+ add_executable(esp_executor_runner)
+ target_sources(
+ esp_executor_runner PRIVATE
+ esp_executor_runner.cpp
+ esp_pal.cpp
+ esp_perf_monitor.cpp
+ esp_memory_allocator.cpp
+ )
+
+ target_link_libraries(
+ esp_executor_runner PUBLIC
+ extension_runner_util
+ executorch
+ portable_kernels
+ )
+
+ target_include_directories(
+ esp_executor_runner
+ PRIVATE
+ ${ET_INCLUDE_PATH}
+ ${ET_DIR_PATH}/runtime/core/portable_type/c10
+ ${CMAKE_CURRENT_BINARY_DIR}
+ )
+
+ target_compile_definitions(
+ esp_executor_runner PRIVATE C10_USING_CUSTOM_GENERATED_MACROS
+ )
+
+ if(ET_NUM_INFERENCES)
+ target_compile_definitions(
+ esp_executor_runner PUBLIC ET_NUM_INFERENCES=${ET_NUM_INFERENCES}
+ )
+ endif()
+
+ if(ET_LOG_DUMP_OUTPUT)
+ target_compile_definitions(esp_executor_runner PUBLIC ET_LOG_DUMP_OUTPUT)
+ endif()
+
+ add_dependencies(esp_executor_runner gen_model_header)
+endif()
diff --git a/examples/espressif/executor_runner/esp_executor_runner.cpp b/examples/espressif/executor_runner/esp_executor_runner.cpp
new file mode 100644
index 00000000000..6b95e16b768
--- /dev/null
+++ b/examples/espressif/executor_runner/esp_executor_runner.cpp
@@ -0,0 +1,1240 @@
+/* Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/* This is an example ExecuTorch runner for Espressif ESP32 and ESP32-S3 chips.
+ * It is inspired by the Arm Cortex-M example runner and adapted for the
+ * ESP-IDF build system and ESP32 memory architecture.
+ *
+ * Some defines used to configure the code:
+ *
+ * ET_ESP_METHOD_ALLOCATOR_POOL_SIZE - Size of memory area used when
+ * setting up the model.
+ * ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE - Size of memory area used when
+ * running inferences (scratch).
+ * ET_NUM_INFERENCES - Number of times to run the inference.
+ * ET_LOG_DUMP_INPUT - Control if you want input to be dumped to the log.
+ * ET_LOG_DUMP_OUTPUT - Control if you want output to be dumped to the log.
+ *
+ * Devtool BundleIO: Use Bundle PTE with input and reference output included
+ * to check if it matches.
+ *
+ * ET_BUNDLE_IO - Build in Devtools BundleIO support. Makes it possible
+ * to use bpte with bundled input and output ref data.
+ * ET_ATOL - The atol used to compare output and ref data.
+ * ET_RTOL - The rtol used to compare output and ref data.
+ *
+ * Devtools ETDump: Speed and dumping output
+ *
+ * ET_EVENT_TRACER_ENABLED - Build in Devtools ETDump event trace code
+ * to generate cycle data.
+ * ET_DUMP_OUTPUTS - Collect and print outputs as a base64
+ * buffer in the log.
+ * ET_DUMP_INTERMEDIATE_OUTPUTS - Collect and print intermediate outputs.
+ * ET_DEBUG_BUFFER_SIZE - Override size of memory area used by
+ * ET_DUMP_OUTPUTS /
+ * ET_DUMP_INTERMEDIATE_OUTPUTS.
+ *
+ * ESP32 Memory Notes:
+ * - ESP32 has ~520KB internal SRAM, optionally 4-8MB PSRAM.
+ * - ESP32-S3 has ~512KB internal SRAM, optionally 2-32MB PSRAM (octal).
+ * - For larger models, PSRAM is required. Memory pools are placed in
+ * PSRAM when available using EXT_RAM_BSS_ATTR.
+ * - The model .pte data is converted to a C array and compiled in,
+ * or can be loaded from SPIFFS/LittleFS/SD card filesystem.
+ *
+ * FILESYSTEM_LOAD - When defined, the runner will load the .pte model
+ * from the filesystem (SPIFFS/LittleFS/SD) instead of
+ * compiled-in data. Useful for larger models that don't
+ * fit in flash as a C array.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "esp_executor_runner.h"
+#include "esp_memory_allocator.h"
+#include "esp_perf_monitor.h"
+
+#if defined(ESP_PLATFORM)
+#include
+#include
+#include
+#include
+#include
+#endif
+
+#if defined(ET_BUNDLE_IO)
+#include
+#endif
+
+#if defined(ET_EVENT_TRACER_ENABLED)
+#include
+
+#if defined(ET_DUMP_INTERMEDIATE_OUTPUTS) || defined(ET_DUMP_OUTPUTS)
+#include
+
+#if !defined(ET_DEBUG_BUFFER_SIZE)
+#define ET_DEBUG_BUFFER_SIZE (64 * 1024)
+#endif
+
+#endif // ET_DUMP_INTERMEDIATE_OUTPUTS || ET_DUMP_OUTPUTS
+
+#endif // ET_EVENT_TRACER_ENABLED
+
+#if defined(FILESYSTEM_LOAD)
+#include
+#if defined(ESP_PLATFORM)
+#include
+#endif
+#else
+/* When not loading from filesystem, include the model as a compiled-in
+ * C array. This header is generated by the build process from the .pte file
+ * specified in ET_PTE_FILE_PATH. */
+#include "model_pte.h"
+#endif
+
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using executorch::extension::BufferDataLoader;
+using executorch::runtime::Error;
+using executorch::runtime::EValue;
+using executorch::runtime::HierarchicalAllocator;
+using executorch::runtime::MemoryAllocator;
+using executorch::runtime::MemoryManager;
+using executorch::runtime::Method;
+using executorch::runtime::MethodMeta;
+using executorch::runtime::Program;
+using executorch::runtime::Result;
+using executorch::runtime::Span;
+using executorch::runtime::Tag;
+using executorch::runtime::TensorInfo;
+using executorch::runtime::toString;
+
+#if defined(ET_BUNDLE_IO)
+using executorch::bundled_program::compute_method_output_error_stats;
+using executorch::bundled_program::ErrorStats;
+using executorch::bundled_program::verify_method_outputs;
+#endif
+
+#if defined(ET_EVENT_TRACER_ENABLED)
+using executorch::etdump::BufferDataSink;
+using executorch::etdump::ETDumpGen;
+using executorch::etdump::ETDumpResult;
+using executorch::runtime::EventTracerDebugLogLevel;
+using torch::executor::etdump_result;
+#endif
+
+/**
+ * Memory pool sizes for the ExecuTorch runtime.
+ *
+ * ESP32: ~520KB internal SRAM total. With PSRAM: 4-8MB external.
+ * ESP32-S3: ~512KB internal SRAM total. With PSRAM: 2-32MB external.
+ *
+ * For models that fit in internal SRAM, use smaller pool sizes.
+ * For larger models, enable PSRAM and increase these values.
+ *
+ * Default: 256KB method allocator, 128KB scratch (suitable for small models).
+ * With PSRAM: These can be increased significantly.
+ */
+#if !defined(ET_ESP_METHOD_ALLOCATOR_POOL_SIZE)
+#if defined(CONFIG_SPIRAM)
+/* With PSRAM available, use larger pools */
+#define ET_ESP_METHOD_ALLOCATOR_POOL_SIZE (2 * 1024 * 1024)
+#else
+/* Internal SRAM only - conservative defaults */
+#define ET_ESP_METHOD_ALLOCATOR_POOL_SIZE (256 * 1024)
+#endif
+#endif
+
+#if !defined(ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE)
+#if defined(CONFIG_SPIRAM)
+#define ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE (512 * 1024)
+#else
+#define ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE (128 * 1024)
+#endif
+#endif
+
+/**
+ * Memory pool placement.
+ * On ESP32 with PSRAM, place large buffers in external RAM.
+ * EXT_RAM_BSS_ATTR places the buffer in PSRAM .bss section.
+ */
+#if defined(CONFIG_SPIRAM) && defined(ESP_PLATFORM)
+#include
+// Use PSRAM for large allocations
+static const size_t method_allocation_pool_size =
+ ET_ESP_METHOD_ALLOCATOR_POOL_SIZE;
+static uint8_t __attribute__((aligned(16)))
+method_allocation_pool[ET_ESP_METHOD_ALLOCATOR_POOL_SIZE] EXT_RAM_BSS_ATTR;
+
+static const size_t temp_allocation_pool_size =
+ ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE;
+static uint8_t __attribute__((aligned(16)))
+temp_allocation_pool[ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE] EXT_RAM_BSS_ATTR;
+#else
+// Internal SRAM allocation
+static const size_t method_allocation_pool_size =
+ ET_ESP_METHOD_ALLOCATOR_POOL_SIZE;
+static uint8_t __attribute__((
+ aligned(16))) method_allocation_pool[ET_ESP_METHOD_ALLOCATOR_POOL_SIZE];
+
+static const size_t temp_allocation_pool_size =
+ ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE;
+static uint8_t __attribute__((
+ aligned(16))) temp_allocation_pool[ET_ESP_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE];
+#endif
+
+#if defined(FILESYSTEM_LOAD)
+static char* model_pte = nullptr;
+static size_t model_pte_size = 0;
+#endif
+
+#if defined(ET_BUNDLE_IO)
+static const size_t testset_idx = 0;
+
+#if defined(ET_ATOL)
+static const float et_atol = ET_ATOL;
+#else
+static const float et_atol = 0.01;
+#endif
+
+#if defined(ET_RTOL)
+static const float et_rtol = ET_RTOL;
+#else
+static const float et_rtol = 0.01;
+#endif
+#endif // ET_BUNDLE_IO
+
+#if defined(ET_NUM_INFERENCES)
+static const int num_inferences = ET_NUM_INFERENCES;
+#else
+static const int num_inferences = 10;
+#endif
+
+namespace {
+
+/// Lightweight heapless container that constructs and stores a T in-place.
+/// Useful when you want to avoid heap allocations but need to delay
+/// construction.
+template
+class Box {
+ public:
+ Box() = default;
+
+ ~Box() {
+ if (has_value) {
+ ptr()->~T();
+ }
+ }
+
+ Box(const Box&) = delete;
+ Box& operator=(const Box&) = delete;
+
+ template
+ void reset(Args&&... args) {
+ if (has_value) {
+ reinterpret_cast(mem)->~T();
+ }
+ new (mem) T(std::forward(args)...);
+ has_value = true;
+ }
+
+ T& value() {
+ return *ptr();
+ }
+
+ const T& value() const {
+ return *ptr();
+ }
+
+ T* operator->() {
+ return ptr();
+ }
+
+ const T* operator->() const {
+ return ptr();
+ }
+
+ private:
+ alignas(T) uint8_t mem[sizeof(T)];
+ bool has_value = false;
+
+ T* ptr() {
+ return reinterpret_cast(mem);
+ }
+
+ const T* ptr() const {
+ return reinterpret_cast(mem);
+ }
+};
+
+template
+void fill_tensor_with_default_value(Tensor& tensor) {
+ ValueType fill_value{};
+ if constexpr (std::is_same_v) {
+ fill_value = true;
+ } else {
+ fill_value = ValueType(1);
+ }
+
+ ValueType* data_ptr = tensor.mutable_data_ptr();
+ std::fill(data_ptr, data_ptr + tensor.numel(), fill_value);
+}
+
+Error prepare_input_tensors(Method& method, MemoryAllocator& allocator) {
+ MethodMeta method_meta = method.method_meta();
+ size_t num_inputs = method_meta.num_inputs();
+
+ EValue* input_evalues = allocator.allocateList(num_inputs);
+ ET_CHECK_OR_RETURN_ERROR(
+ input_evalues != nullptr,
+ MemoryAllocationFailed,
+ "Could not allocate memory for input evalues.");
+
+ Error err = method.get_inputs(input_evalues, num_inputs);
+ ET_CHECK_OK_OR_RETURN_ERROR(err);
+
+ for (size_t i = 0; i < num_inputs; i++) {
+ auto tag = method_meta.input_tag(i);
+ ET_CHECK_OK_OR_RETURN_ERROR(tag.error());
+
+ if (tag.get() != Tag::Tensor) {
+ ET_LOG(
+ Debug,
+ "Skipping non-tensor input %lu",
+ static_cast(i));
+ continue;
+ }
+
+ // Fill tensors with default values (1) when no input data is provided
+ if (input_evalues[i].isTensor()) {
+ Tensor& tensor = input_evalues[i].toTensor();
+ switch (tensor.scalar_type()) {
+#define HANDLE_SCALAR_TYPE(cpp_type, scalar_name) \
+ case ScalarType::scalar_name: \
+ fill_tensor_with_default_value(tensor); \
+ break;
+ ET_FORALL_SCALAR_TYPES(HANDLE_SCALAR_TYPE)
+#undef HANDLE_SCALAR_TYPE
+ default:
+ ET_LOG(
+ Error, "Unhandled ScalarType %s", toString(tensor.scalar_type()));
+ err = Error::InvalidArgument;
+ break;
+ }
+ } else {
+ printf("Input[%lu]: Not Tensor\n", static_cast(i));
+ }
+ }
+
+ return err;
+}
+
+#if defined(FILESYSTEM_LOAD)
+/**
+ * Load a binary file from the filesystem.
+ * Supports SPIFFS, LittleFS, or SD card mounted filesystems.
+ */
+std::pair load_file_from_fs(
+ const char* filepath,
+ MemoryAllocator& allocator) {
+ FILE* fp = fopen(filepath, "rb");
+ if (!fp) {
+ ET_LOG(Fatal, "Could not open file %s (errno: %d)", filepath, errno);
+ return std::make_pair(nullptr, 0);
+ }
+
+ if (fseek(fp, 0, SEEK_END) != 0) {
+ ET_LOG(
+ Fatal, "Failed to seek to end of file %s (errno: %d)", filepath, errno);
+ fclose(fp);
+ return std::make_pair(nullptr, 0);
+ }
+ auto file_size = ftell(fp);
+ if (file_size <= 0) {
+ ET_LOG(
+ Fatal,
+ "Failed to determine valid size for file %s (size: %ld, errno: %d)",
+ filepath,
+ static_cast(file_size),
+ errno);
+ fclose(fp);
+ return std::make_pair(nullptr, 0);
+ }
+
+ if (fseek(fp, 0, SEEK_SET) != 0) {
+ ET_LOG(
+ Fatal,
+ "Failed to seek to beginning of file %s (errno: %d)",
+ filepath,
+ errno);
+ fclose(fp);
+ return std::make_pair(nullptr, 0);
+ }
+ const size_t size = static_cast(file_size);
+ char* buffer = static_cast(allocator.allocate(size));
+ if (buffer == nullptr) {
+ ET_LOG(
+ Fatal,
+ "Failed to allocate %lu bytes for file %s",
+ static_cast(size),
+ filepath);
+ fclose(fp);
+ return std::make_pair(nullptr, 0);
+ }
+
+ auto read_size = fread(buffer, 1, size, fp);
+ if (read_size != size) {
+ ET_LOG(
+ Fatal,
+ "Partial read of %s: got %lu of %lu bytes",
+ filepath,
+ static_cast(read_size),
+ static_cast(size));
+ fclose(fp);
+ return std::make_pair(nullptr, 0);
+ }
+ fclose(fp);
+ return std::make_pair(buffer, read_size);
+}
+
+#if defined(ESP_PLATFORM)
+/**
+ * Initialize SPIFFS filesystem for loading model files.
+ */
+bool init_spiffs(const char* base_path, const char* partition_label) {
+ esp_vfs_spiffs_conf_t conf = {
+ .base_path = base_path,
+ .partition_label = partition_label,
+ .max_files = 5,
+ .format_if_mount_failed = false,
+ };
+
+ esp_err_t ret = esp_vfs_spiffs_register(&conf);
+ if (ret != ESP_OK) {
+ if (ret == ESP_FAIL) {
+ ET_LOG(Error, "Failed to mount SPIFFS filesystem");
+ } else if (ret == ESP_ERR_NOT_FOUND) {
+ ET_LOG(Error, "SPIFFS partition not found");
+ } else {
+ ET_LOG(Error, "SPIFFS init failed: %s", esp_err_to_name(ret));
+ }
+ return false;
+ }
+
+ size_t total = 0, used = 0;
+ ret = esp_spiffs_info(partition_label, &total, &used);
+ if (ret == ESP_OK) {
+ ET_LOG(
+ Info,
+ "SPIFFS: total=%lu, used=%lu",
+ static_cast(total),
+ static_cast(used));
+ }
+ return true;
+}
+#endif // ESP_PLATFORM
+#endif // FILESYSTEM_LOAD
+
+/// Holds all state needed for setup and run phases
+struct RunnerContext {
+ RunnerContext() = default;
+ RunnerContext(const RunnerContext& ctx) = delete;
+ RunnerContext& operator=(const RunnerContext& ctx) = delete;
+
+ const char* method_name = nullptr;
+ size_t planned_buffer_memsize = 0;
+ size_t method_loaded_memsize = 0;
+ size_t executor_membase = 0;
+ size_t program_data_len = 0;
+ size_t input_memsize = 0;
+ size_t pte_size = 0;
+ bool bundle_io = false;
+ Box loader;
+ Box program;
+ Box method_allocator;
+ Box temp_allocator;
+ std::vector> planned_spans;
+ Box planned_memory;
+ Box memory_manager;
+ Box> method;
+#if defined(ET_EVENT_TRACER_ENABLED)
+ Box etdump_gen;
+#if defined(ET_DUMP_INTERMEDIATE_OUTPUTS) || defined(ET_DUMP_OUTPUTS)
+ void* debug_buffer;
+#endif
+#endif
+};
+
+void runner_init(RunnerContext& ctx, size_t pte_size) {
+ const void* program_data = model_pte;
+ ctx.program_data_len = pte_size;
+ ctx.pte_size = pte_size;
+
+#if defined(ET_BUNDLE_IO)
+ ctx.bundle_io = executorch::bundled_program::is_bundled_program(
+ reinterpret_cast(model_pte), ctx.pte_size);
+ if (ctx.bundle_io) {
+ Error status = executorch::bundled_program::get_program_data(
+ reinterpret_cast(model_pte),
+ ctx.pte_size,
+ &program_data,
+ &ctx.program_data_len);
+ ET_CHECK_MSG(
+ status == Error::Ok,
+ "get_program_data() from bundle PTE failed: 0x%x",
+ (unsigned int)status);
+ }
+#endif
+
+ ctx.loader.reset(program_data, ctx.program_data_len);
+ auto& loader = ctx.loader.value();
+ ET_LOG(
+ Info,
+ "PTE Model data loaded. Size: %lu bytes.",
+ static_cast(ctx.program_data_len));
+
+ // Parse the program file
+ Result program_result = Program::load(&loader);
+ ET_CHECK_MSG(
+ program_result.ok(),
+ "Program loading failed @ %p: 0x%" PRIx32,
+ program_data,
+ static_cast(program_result.error()));
+ ctx.program.reset(std::move(program_result.get()));
+ Program& program = ctx.program.value();
+
+ ET_LOG(
+ Info,
+ "Model buffer loaded, has %lu methods",
+ static_cast(program.num_methods()));
+
+ {
+ const auto method_name_result = program.get_method_name(0);
+ ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
+ ctx.method_name = *method_name_result;
+ }
+ ET_LOG(Info, "Running method %s", ctx.method_name);
+
+ Result method_meta = program.method_meta(ctx.method_name);
+ ET_CHECK_MSG(
+ method_meta.ok(),
+ "Failed to get method_meta for %s: 0x%x",
+ ctx.method_name,
+ (unsigned int)method_meta.error());
+
+ ET_LOG(
+ Info,
+ "Setup Method allocator pool. Size: %lu bytes.",
+ static_cast(method_allocation_pool_size));
+
+ ctx.method_allocator.reset(
+ method_allocation_pool_size, method_allocation_pool);
+
+ ctx.planned_spans.clear();
+ size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers();
+ ctx.planned_spans.reserve(num_memory_planned_buffers);
+ size_t planned_buffer_membase = ctx.method_allocator->used_size();
+
+ for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
+ size_t buffer_size =
+ static_cast(method_meta->memory_planned_buffer_size(id).get());
+ ET_LOG(
+ Info,
+ "Setting up planned buffer %lu, size %lu.",
+ static_cast(id),
+ static_cast(buffer_size));
+
+ uint8_t* buffer = reinterpret_cast(
+ ctx.method_allocator->allocate(buffer_size, 16UL));
+ ET_CHECK_MSG(
+ buffer != nullptr,
+ "Could not allocate memory for memory planned buffer size %lu",
+ static_cast(buffer_size));
+ ctx.planned_spans.push_back({buffer, buffer_size});
+ }
+
+ ctx.planned_buffer_memsize =
+ ctx.method_allocator->used_size() - planned_buffer_membase;
+
+ Span> planned_memory_span;
+ if (!ctx.planned_spans.empty()) {
+ planned_memory_span =
+ Span>(ctx.planned_spans.data(), ctx.planned_spans.size());
+ }
+ ctx.planned_memory.reset(planned_memory_span);
+
+ ctx.temp_allocator.reset(temp_allocation_pool_size, temp_allocation_pool);
+
+ ctx.memory_manager.reset(
+ &ctx.method_allocator.value(),
+ &ctx.planned_memory.value(),
+ &ctx.temp_allocator.value());
+
+ size_t method_loaded_membase = ctx.method_allocator->used_size();
+
+ executorch::runtime::EventTracer* event_tracer_ptr = nullptr;
+
+#if defined(ET_EVENT_TRACER_ENABLED)
+ ET_LOG(Info, "Setting up ETDump");
+ ctx.etdump_gen.reset();
+ event_tracer_ptr = &ctx.etdump_gen.value();
+
+#if defined(ET_DUMP_INTERMEDIATE_OUTPUTS) || defined(ET_DUMP_OUTPUTS)
+ ctx.debug_buffer = ctx.method_allocator->allocate(ET_DEBUG_BUFFER_SIZE, 16);
+ if (ctx.debug_buffer != nullptr) {
+ Span debug_buffer_span(
+ (uint8_t*)ctx.debug_buffer, ET_DEBUG_BUFFER_SIZE);
+
+ Result result =
+ ctx.etdump_gen.value().set_debug_buffer(debug_buffer_span);
+
+ if (result.ok()) {
+#if defined(ET_DUMP_INTERMEDIATE_OUTPUTS)
+ ET_LOG(
+ Info,
+ "ETDump: Allocated intermediate output buffer size: %d at 0x%p",
+ ET_DEBUG_BUFFER_SIZE,
+ ctx.debug_buffer);
+ ctx.etdump_gen.value().set_event_tracer_debug_level(
+ EventTracerDebugLogLevel::kIntermediateOutputs);
+#else
+ ET_LOG(
+ Info,
+ "ETDump: Allocated output buffer size: %d at 0x%p",
+ ET_DEBUG_BUFFER_SIZE,
+ ctx.debug_buffer);
+ ctx.etdump_gen.value().set_event_tracer_debug_level(
+ EventTracerDebugLogLevel::kProgramOutputs);
+#endif
+ } else {
+ ctx.debug_buffer = nullptr;
+ ET_LOG(
+ Error,
+ "ETDump: Could not set_debug_buffer() error:0x%" PRIx32,
+ result.error());
+ }
+ } else {
+ ET_LOG(
+ Error,
+ "ETDump: Could not allocate output buffer size %lu",
+ static_cast(ET_DEBUG_BUFFER_SIZE));
+ }
+#endif // ET_DUMP_INTERMEDIATE_OUTPUTS || ET_DUMP_OUTPUTS
+#endif // ET_EVENT_TRACER_ENABLED
+
+ ctx.method.reset(program.load_method(
+ ctx.method_name, &ctx.memory_manager.value(), event_tracer_ptr));
+
+ if (!ctx.method->ok()) {
+ ET_LOG(
+ Info,
+ "Loading of method %s failed with status 0x%" PRIx32,
+ ctx.method_name,
+ static_cast(ctx.method->error()));
+ }
+ ctx.method_loaded_memsize =
+ ctx.method_allocator->used_size() - method_loaded_membase;
+ ET_LOG(Info, "Method '%s' loaded.", ctx.method_name);
+
+ ET_LOG(Info, "Preparing inputs...");
+ size_t input_membase = ctx.method_allocator->used_size();
+
+#if defined(ET_BUNDLE_IO)
+ if (ctx.bundle_io) {
+ ET_LOG(Info, "Input testset[%d] from bundled bpte", testset_idx);
+ Error status = executorch::bundled_program::load_bundled_input(
+ *ctx.method.value(), model_pte, testset_idx);
+ ET_CHECK_MSG(
+ status == Error::Ok,
+ "load_bundled_input failed with status 0x%" PRIx32,
+ status);
+ } else
+#endif
+ {
+ Error status = ::prepare_input_tensors(
+ *ctx.method.value(), ctx.method_allocator.value());
+ ET_CHECK_MSG(
+ status == Error::Ok,
+ "Failed to prepare inputs 0x%" PRIx32,
+ static_cast(status));
+ }
+
+#if defined(ET_LOG_DUMP_INPUT)
+ {
+ std::vector inputs(ctx.method.value()->inputs_size());
+ ET_LOG(Info, "%lu inputs: ", static_cast(inputs.size()));
+ Error status = ctx.method.value()->get_inputs(inputs.data(), inputs.size());
+ ET_CHECK(status == Error::Ok);
+
+ for (int i = 0; i < inputs.size(); ++i) {
+ if (inputs[i].isTensor()) {
+ Tensor tensor = inputs[i].toTensor();
+ for (int j = 0; j < tensor.numel(); ++j) {
+ if (tensor.scalar_type() == ScalarType::Int) {
+ printf(
+ "Input[%d][%d]: (int) %d\n",
+ i,
+ j,
+ tensor.const_data_ptr()[j]);
+ } else if (tensor.scalar_type() == ScalarType::Float) {
+ printf(
+ "Input[%d][%d]: (float) %f\n",
+ i,
+ j,
+ tensor.const_data_ptr()[j]);
+ } else if (tensor.scalar_type() == ScalarType::Char) {
+ printf(
+ "Input[%d][%d]: (char) %d\n",
+ i,
+ j,
+ tensor.const_data_ptr()[j]);
+ } else if (tensor.scalar_type() == ScalarType::Bool) {
+ printf(
+ "Input[%d][%d]: (bool) %s (0x%x)\n",
+ i,
+ j,
+ tensor.const_data_ptr()[j] ? "true" : "false",
+ tensor.const_data_ptr()[j]);
+ }
+ }
+ } else {
+ printf("Input[%d]: Not Tensor\n", i);
+ }
+ }
+ }
+#endif
+
+ ctx.input_memsize = ctx.method_allocator->used_size() - input_membase;
+ ctx.executor_membase = ctx.method_allocator->used_size();
+
+ ET_LOG(Info, "Input prepared.");
+}
+
+void log_mem_status(RunnerContext& ctx) {
+ size_t executor_memsize =
+ ctx.method_allocator->used_size() - ctx.executor_membase;
+
+ ET_LOG(
+ Info,
+ "model_pte_program_size: %lu bytes.",
+ static_cast(ctx.program_data_len));
+ ET_LOG(
+ Info,
+ "model_pte_loaded_size: %lu bytes.",
+ static_cast(ctx.pte_size));
+
+ if (ctx.method_allocator->size() != 0) {
+ size_t method_allocator_used = ctx.method_allocator->used_size();
+ ET_LOG(
+ Info,
+ "method_allocator_used: %lu / %lu free: %lu ( used: %lu %% ) ",
+ static_cast(method_allocator_used),
+ static_cast(ctx.method_allocator->size()),
+ static_cast(ctx.method_allocator->free_size()),
+ static_cast(
+ 100 * method_allocator_used / ctx.method_allocator->size()));
+ ET_LOG(
+ Info,
+ "method_allocator_planned: %lu bytes",
+ static_cast(ctx.planned_buffer_memsize));
+ ET_LOG(
+ Info,
+ "method_allocator_loaded: %lu bytes",
+ static_cast(ctx.method_loaded_memsize));
+ ET_LOG(
+ Info,
+ "method_allocator_input: %lu bytes",
+ static_cast(ctx.input_memsize));
+ ET_LOG(
+ Info,
+ "method_allocator_executor: %lu bytes",
+ static_cast(executor_memsize));
+ }
+ if (ctx.temp_allocator->size() > 0) {
+ ET_LOG(
+ Info,
+ "temp_allocator: %lu",
+ static_cast(ctx.temp_allocator->size()));
+ }
+
+#if defined(ESP_PLATFORM)
+ ET_LOG(
+ Info,
+ "ESP free heap: %lu bytes",
+ static_cast(esp_get_free_heap_size()));
+ ET_LOG(
+ Info,
+ "ESP min free heap ever: %lu bytes",
+ static_cast(esp_get_minimum_free_heap_size()));
+#if defined(CONFIG_SPIRAM)
+ ET_LOG(
+ Info,
+ "ESP free PSRAM: %lu bytes",
+ static_cast(heap_caps_get_free_size(MALLOC_CAP_SPIRAM)));
+#endif
+#endif
+
+#if defined(ET_EVENT_TRACER_ENABLED)
+#if defined(ET_DUMP_INTERMEDIATE_OUTPUTS) || defined(ET_DUMP_OUTPUTS)
+ if (ctx.debug_buffer != nullptr) {
+ size_t outputdump_len = ctx.etdump_gen->get_data_sink()->get_used_bytes();
+ ET_LOG(
+ Info,
+ "ETDump_outputs_buffer: %lu / %lu free: %lu ( used: %lu %% ) ",
+ static_cast(outputdump_len),
+ static_cast(ET_DEBUG_BUFFER_SIZE),
+ static_cast(ET_DEBUG_BUFFER_SIZE - outputdump_len),
+ static_cast(
+ 100 * outputdump_len / ET_DEBUG_BUFFER_SIZE));
+ }
+#endif
+#endif
+}
+
+void print_outputs(RunnerContext& ctx) {
+ std::vector outputs(ctx.method.value()->outputs_size());
+ ET_LOG(Info, "%lu outputs: ", static_cast(outputs.size()));
+ Error status =
+ ctx.method.value()->get_outputs(outputs.data(), outputs.size());
+ ET_CHECK(status == Error::Ok);
+
+ for (int i = 0; i < outputs.size(); ++i) {
+ if (outputs[i].isTensor()) {
+ Tensor tensor = outputs[i].toTensor();
+#if defined(ET_LOG_DUMP_OUTPUT)
+ for (int j = 0; j < tensor.numel(); ++j) {
+ if (tensor.scalar_type() == ScalarType::Int) {
+ printf(
+ "Output[%d][%d]: (int) %d\n",
+ i,
+ j,
+ tensor.const_data_ptr()[j]);
+ } else if (tensor.scalar_type() == ScalarType::Float) {
+ printf(
+ "Output[%d][%d]: (float) %f\n",
+ i,
+ j,
+ tensor.const_data_ptr()[j]);
+ } else if (tensor.scalar_type() == ScalarType::Char) {
+ printf(
+ "Output[%d][%d]: (char) %d\n",
+ i,
+ j,
+ tensor.const_data_ptr()[j]);
+ } else if (tensor.scalar_type() == ScalarType::Bool) {
+ printf(
+ "Output[%d][%d]: (bool) %s (0x%x)\n",
+ i,
+ j,
+ tensor.const_data_ptr()[j] ? "true " : "false",
+ tensor.const_data_ptr()[j]);
+ }
+ }
+#endif
+ } else {
+ printf("Output[%d]: Not Tensor\n", i);
+ }
+ }
+}
+
+void write_etdump(RunnerContext& ctx) {
+#if defined(ET_EVENT_TRACER_ENABLED)
+ ETDumpResult result = ctx.etdump_gen->get_etdump_data();
+ if (result.buf != nullptr && result.size > 0) {
+ ET_LOG(
+ Info,
+ "ETDump data generated: %lu bytes",
+ static_cast(result.size));
+
+ // On ESP32, we could write to SPIFFS/SD or dump via serial.
+ // For now, log the size. In a production setup, you would
+ // write this to a filesystem or transmit over a network interface.
+#if defined(FILESYSTEM_LOAD) && defined(ESP_PLATFORM)
+ const char* etdump_filename = "/spiffs/etdump.bin";
+ ET_LOG(Info, "Writing etdump to file: %s", etdump_filename);
+ FILE* f = fopen(etdump_filename, "wb");
+ if (f) {
+ size_t bytes_written = fwrite((uint8_t*)result.buf, 1, result.size, f);
+ if (bytes_written != result.size) {
+ ET_LOG(
+ Error,
+ "Failed to write complete ETDump data to %s (wrote %lu of %lu bytes)",
+ etdump_filename,
+ static_cast(bytes_written),
+ static_cast(result.size));
+ }
+ fclose(f);
+ } else {
+ ET_LOG(Error, "Could not open %s for writing", etdump_filename);
+ }
+#endif
+ }
+#endif
+}
+
+bool verify_result(RunnerContext& ctx, const void* model_pte) {
+ bool model_ok = false;
+#if defined(ET_BUNDLE_IO)
+ if (ctx.bundle_io) {
+ ErrorStats stats = compute_method_output_error_stats(
+ *ctx.method.value(), model_pte, testset_idx);
+ if (stats.status == Error::Ok) {
+ ET_LOG(Info, "=== Error stats for testset %d ===", testset_idx);
+ ET_LOG(Info, " mean_absolute_error: %f", stats.mean_abs_error);
+ ET_LOG(Info, " max_absolute_error: %f", stats.max_abs_error);
+ ET_LOG(Info, " mean_relative_error: %f", stats.mean_relative_error);
+ ET_LOG(Info, " max_relative_error: %f", stats.max_relative_error);
+ } else {
+ ET_LOG(
+ Info,
+ "=== Error calculating stats for testset %d ERROR:%d ===",
+ testset_idx,
+ stats.status);
+ }
+
+ Error status = verify_method_outputs(
+ *ctx.method.value(), model_pte, testset_idx, et_rtol, et_atol);
+ if (status == Error::Ok) {
+ ET_LOG(Info, "Model output match expected BundleIO bpte ref data.");
+ ET_LOG(Info, "TEST: BundleIO index[%d] Test_result: PASS", testset_idx);
+ model_ok = true;
+ } else {
+ ET_LOG(
+ Error,
+ "Model output don't match expected BundleIO bpte ref data. rtol=%f atol=%f",
+ et_rtol,
+ et_atol);
+ ET_LOG(Error, "TEST: BundleIO index[%d] Test_result: FAIL", testset_idx);
+ model_ok = false;
+ }
+ } else {
+ model_ok = true;
+ }
+#else
+ (void)ctx;
+ (void)model_pte;
+ model_ok = true;
+#endif
+ return model_ok;
+}
+
+bool run_model(RunnerContext& ctx, const void* model_pte) {
+ Error status = Error::Ok;
+ if (num_inferences <= 0) {
+ ET_LOG(
+ Info,
+ "num_inferences (%d) <= 0; skipping model execution.",
+ num_inferences);
+ // Nothing to run; treat as a no-op run.
+ return true;
+ }
+ ET_LOG(Info, "Starting running %d inferences...", num_inferences);
+ int successful_inferences = 0;
+ StartMeasurements();
+ for (int n = 0; n < num_inferences; n++) {
+ ET_LOG(Debug, "Running inference number %d", n);
+ status = ctx.method.value()->execute();
+ if (status != Error::Ok) {
+ break;
+ }
+ // Reset the temporary allocator between inferences
+ ctx.temp_allocator.reset(temp_allocation_pool_size, temp_allocation_pool);
+ successful_inferences++;
+ }
+ if (successful_inferences > 0) {
+ StopMeasurements(successful_inferences);
+ }
+
+ ET_CHECK_MSG(
+ status == Error::Ok,
+ "Execution of method %s failed with status 0x%" PRIx32,
+ ctx.method_name,
+ static_cast(status));
+
+ ET_LOG(Info, "%d inferences finished", successful_inferences);
+ print_outputs(ctx);
+ bool model_ok = verify_result(ctx, model_pte);
+ ET_LOG(Info, "Model run: %d", model_ok);
+
+ return model_ok;
+}
+
+} // namespace
+
+// =====================================================================
+// Global runner state -- shared by the public et_runner_* API and by
+// executor_runner_main() for its multi-inference demo loop.
+// =====================================================================
+
+static RunnerContext g_runner_ctx;
+static bool g_runner_initialized = false;
+
+// Maximum number of input/output tensors handled in the public API.
+static const size_t kMaxInputOutputs = 16;
+
+// =====================================================================
+// Public API
+// =====================================================================
+
+bool et_runner_init(void) {
+ executorch::runtime::runtime_init();
+
+ size_t pte_size;
+
+#if defined(FILESYSTEM_LOAD)
+#if defined(ESP_PLATFORM)
+ if (!init_spiffs("/spiffs", "storage")) {
+ ET_LOG(Fatal, "Failed to initialize SPIFFS. Cannot load model.");
+ return false;
+ }
+#endif
+ EspMemoryAllocator file_allocator(
+ method_allocation_pool_size, method_allocation_pool);
+ auto [buffer, buffer_size] =
+ load_file_from_fs("/spiffs/model.pte", file_allocator);
+ if (buffer == nullptr) {
+ ET_LOG(Fatal, "Failed to load model from filesystem.");
+ return false;
+ }
+ model_pte = buffer;
+ model_pte_size = buffer_size;
+ pte_size = buffer_size;
+#else
+ pte_size = sizeof(model_pte);
+#endif
+
+ runner_init(g_runner_ctx, pte_size);
+ g_runner_initialized = g_runner_ctx.method->ok();
+ return g_runner_initialized;
+}
+
+bool et_runner_set_input(size_t input_idx, const void* data, size_t num_bytes) {
+ if (!g_runner_initialized) {
+ ET_LOG(Error, "Runner not initialized. Call et_runner_init() first.");
+ return false;
+ }
+
+ Method& method = *g_runner_ctx.method.value();
+ const size_t num_inputs = method.inputs_size();
+
+ if (input_idx >= num_inputs) {
+ ET_LOG(
+ Error,
+ "Input index %lu out of range (num_inputs=%lu).",
+ static_cast(input_idx),
+ static_cast(num_inputs));
+ return false;
+ }
+ if (num_inputs > kMaxInputOutputs) {
+ ET_LOG(
+ Error,
+ "Model has too many inputs (%lu > %lu).",
+ static_cast(num_inputs),
+ static_cast(kMaxInputOutputs));
+ return false;
+ }
+
+ // get_inputs() returns shallow copies whose data pointers alias the
+ // method's internal tensor storage, allowing direct writes.
+ EValue input_evalues[kMaxInputOutputs];
+ Error status = method.get_inputs(input_evalues, num_inputs);
+ if (status != Error::Ok) {
+ ET_LOG(
+ Error,
+ "get_inputs() failed with status 0x%" PRIx32,
+ static_cast(status));
+ return false;
+ }
+
+ if (!input_evalues[input_idx].isTensor()) {
+ ET_LOG(
+ Error,
+ "Input %lu is not a Tensor.",
+ static_cast(input_idx));
+ return false;
+ }
+
+ Tensor& tensor = input_evalues[input_idx].toTensor();
+ const size_t tensor_bytes = tensor.nbytes();
+ if (num_bytes > tensor_bytes) {
+ ET_LOG(
+ Error,
+ "Input %lu: provided %lu bytes exceeds tensor capacity %lu bytes.",
+ static_cast(input_idx),
+ static_cast(num_bytes),
+ static_cast(tensor_bytes));
+ return false;
+ }
+ // Treat zero-length input as a no-op.
+ if (num_bytes == 0) {
+ return true;
+ }
+ // For non-zero length, the input data pointer must be non-null.
+ if (data == nullptr) {
+ ET_LOG(
+ Error,
+ "Input %lu: data pointer is null for non-zero num_bytes (%lu).",
+ static_cast(input_idx),
+ static_cast(num_bytes));
+ return false;
+ }
+
+ memcpy(tensor.mutable_data_ptr(), data, num_bytes);
+ return true;
+}
+
+bool et_runner_execute(void) {
+ if (!g_runner_initialized) {
+ ET_LOG(Error, "Runner not initialized. Call et_runner_init() first.");
+ return false;
+ }
+
+ Method& method = *g_runner_ctx.method.value();
+ Error status = method.execute();
+ // Reset the temporary allocator so it is ready for the next inference.
+ g_runner_ctx.temp_allocator.reset(
+ temp_allocation_pool_size, temp_allocation_pool);
+ if (status != Error::Ok) {
+ ET_LOG(
+ Error,
+ "execute() failed with status 0x%" PRIx32,
+ static_cast(status));
+ return false;
+ }
+ return true;
+}
+
+bool et_runner_get_output(
+ size_t output_idx,
+ void* buffer,
+ size_t buffer_bytes,
+ size_t* out_num_elements) {
+ if (!g_runner_initialized) {
+ ET_LOG(Error, "Runner not initialized. Call et_runner_init() first.");
+ return false;
+ }
+
+ Method& method = *g_runner_ctx.method.value();
+ const size_t num_outputs = method.outputs_size();
+
+ if (output_idx >= num_outputs) {
+ ET_LOG(
+ Error,
+ "Output index %lu out of range (num_outputs=%lu).",
+ static_cast(output_idx),
+ static_cast(num_outputs));
+ return false;
+ }
+ if (num_outputs > kMaxInputOutputs) {
+ ET_LOG(
+ Error,
+ "Model has too many outputs (%lu > %lu).",
+ static_cast(num_outputs),
+ static_cast(kMaxInputOutputs));
+ return false;
+ }
+
+ EValue output_evalues[kMaxInputOutputs];
+ Error status = method.get_outputs(output_evalues, num_outputs);
+ if (status != Error::Ok) {
+ ET_LOG(
+ Error,
+ "get_outputs() failed with status 0x%" PRIx32,
+ static_cast(status));
+ return false;
+ }
+
+ if (!output_evalues[output_idx].isTensor()) {
+ ET_LOG(
+ Error,
+ "Output %lu is not a Tensor.",
+ static_cast(output_idx));
+ return false;
+ }
+
+ Tensor tensor = output_evalues[output_idx].toTensor();
+ const size_t tensor_bytes = tensor.nbytes();
+ if (buffer_bytes < tensor_bytes) {
+ ET_LOG(
+ Error,
+ "Output %lu: buffer too small (%lu bytes < %lu bytes required).",
+ static_cast(output_idx),
+ static_cast(buffer_bytes),
+ static_cast(tensor_bytes));
+ return false;
+ }
+
+ memcpy(buffer, tensor.const_data_ptr(), tensor_bytes);
+ if (out_num_elements != nullptr) {
+ *out_num_elements = static_cast(tensor.numel());
+ }
+ return true;
+}
+
+size_t et_runner_inputs_size(void) {
+ if (!g_runner_initialized) {
+ return 0;
+ }
+ return (*g_runner_ctx.method.value()).inputs_size();
+}
+
+size_t et_runner_outputs_size(void) {
+ if (!g_runner_initialized) {
+ return 0;
+ }
+ return (*g_runner_ctx.method.value()).outputs_size();
+}
+
+/**
+ * Main entry point for the ESP32 executor runner.
+ *
+ * On ESP-IDF, this is called from app_main() (see below).
+ * The function can also be compiled for host testing without ESP-IDF.
+ */
+void executor_runner_main(void) {
+ if (!et_runner_init()) {
+ return;
+ }
+
+ // Log the PTE magic bytes for quick sanity check
+ ET_LOG(
+ Info,
+ "PTE @ %p [----%c%c%c%c]",
+ model_pte,
+ model_pte[4],
+ model_pte[5],
+ model_pte[6],
+ model_pte[7]);
+
+ bool model_ok = run_model(g_runner_ctx, model_pte);
+ ET_LOG(Info, "Model run: %d", model_ok);
+
+ log_mem_status(g_runner_ctx);
+ write_etdump(g_runner_ctx);
+
+ ET_CHECK_MSG(model_ok == true, "Problem running model");
+
+ ET_LOG(Info, "Program complete.");
+}
\ No newline at end of file
diff --git a/examples/espressif/executor_runner/esp_executor_runner.h b/examples/espressif/executor_runner/esp_executor_runner.h
new file mode 100644
index 00000000000..86672d8c0bf
--- /dev/null
+++ b/examples/espressif/executor_runner/esp_executor_runner.h
@@ -0,0 +1,98 @@
+/* Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/**
+ * Public API for the ESP32 ExecuTorch executor runner.
+ *
+ * Provides a simple interface to load a model once and run repeated inferences
+ * on dynamically generated input data:
+ *
+ * et_runner_init();
+ *
+ * // For each inference:
+ * et_runner_set_input(0, my_input_data, my_input_bytes);
+ * et_runner_execute();
+ * et_runner_get_output(0, out_buf, out_buf_bytes, &num_elements);
+ */
+
+#pragma once
+
+#include
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Initialize the runner: load the model, allocate memory pools, and prepare
+ * the inference method. Must be called once before any other et_runner_*
+ * function.
+ *
+ * @returns true on success, false on failure.
+ */
+bool et_runner_init(void);
+
+/**
+ * Copy raw data into the input tensor at the given index.
+ *
+ * The runner must already be initialized with et_runner_init(). The data's
+ * layout (dtype and shape) must match the model's expected input tensor.
+ *
+ * @param input_idx Zero-based index of the input tensor to set.
+ * @param data Pointer to the source data in host memory.
+ * @param num_bytes Number of bytes to copy. Must not exceed the tensor's
+ * total byte size (element_size * num_elements).
+ * @returns true on success, false on failure.
+ */
+bool et_runner_set_input(size_t input_idx, const void* data, size_t num_bytes);
+
+/**
+ * Execute one forward pass of the model.
+ *
+ * Must be called after et_runner_init(). Call et_runner_set_input() before
+ * this if you want to provide custom input data. Results are available via
+ * et_runner_get_output() after this call returns successfully.
+ *
+ * @returns true on success, false on failure.
+ */
+bool et_runner_execute(void);
+
+/**
+ * Copy the output tensor data at the given index into a caller-provided buffer.
+ *
+ * Must be called after a successful et_runner_execute().
+ *
+ * @param output_idx Zero-based index of the output tensor to read.
+ * @param buffer Caller-allocated destination buffer.
+ * @param buffer_bytes Size of the destination buffer in bytes. Must be
+ * >= the output tensor's total byte size.
+ * @param out_num_elements If non-NULL, set to the number of elements in the
+ * output tensor (not bytes).
+ * @returns true on success, false on failure.
+ */
+bool et_runner_get_output(
+ size_t output_idx,
+ void* buffer,
+ size_t buffer_bytes,
+ size_t* out_num_elements);
+
+/**
+ * Returns the number of input tensors expected by the loaded model.
+ * Returns 0 if the runner is not yet initialized.
+ */
+size_t et_runner_inputs_size(void);
+
+/**
+ * Returns the number of output tensors produced by the loaded model.
+ * Returns 0 if the runner is not yet initialized.
+ */
+size_t et_runner_outputs_size(void);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/examples/espressif/executor_runner/esp_memory_allocator.cpp b/examples/espressif/executor_runner/esp_memory_allocator.cpp
new file mode 100644
index 00000000000..c68f94289df
--- /dev/null
+++ b/examples/espressif/executor_runner/esp_memory_allocator.cpp
@@ -0,0 +1,36 @@
+/* Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "esp_memory_allocator.h"
+
+EspMemoryAllocator::EspMemoryAllocator(uint32_t size, uint8_t* base_address)
+ : MemoryAllocator(size, base_address), used_(0) {}
+
+void* EspMemoryAllocator::allocate(size_t size, size_t alignment) {
+ void* ret = executorch::runtime::MemoryAllocator::allocate(size, alignment);
+ if (ret != nullptr) {
+ // Keep used_ in sync with the underlying MemoryAllocator by computing it
+ // from the returned pointer and requested size, which implicitly includes
+ // any padding/alignment the base allocator applied.
+ uint8_t* end_ptr = static_cast(ret) + size;
+ used_ = static_cast(end_ptr - base_address());
+ }
+ return ret;
+}
+
+size_t EspMemoryAllocator::used_size() const {
+ return used_;
+}
+
+size_t EspMemoryAllocator::free_size() const {
+ return executorch::runtime::MemoryAllocator::size() - used_;
+}
+
+void EspMemoryAllocator::reset() {
+ executorch::runtime::MemoryAllocator::reset();
+ used_ = 0;
+}
diff --git a/examples/espressif/executor_runner/esp_memory_allocator.h b/examples/espressif/executor_runner/esp_memory_allocator.h
new file mode 100644
index 00000000000..377f608fe88
--- /dev/null
+++ b/examples/espressif/executor_runner/esp_memory_allocator.h
@@ -0,0 +1,36 @@
+/* Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include
+
+
+/**
+ * Custom allocator for Espressif ESP32/ESP32-S3 targets that tracks
+ * used and free memory. Extends the ExecuTorch MemoryAllocator with
+ * additional instrumentation useful for memory-constrained embedded
+ * environments.
+ */
+class EspMemoryAllocator : public executorch::runtime::MemoryAllocator {
+ public:
+ EspMemoryAllocator(uint32_t size, uint8_t* base_address);
+
+ void* allocate(size_t size, size_t alignment = kDefaultAlignment) override;
+
+ /// Returns the used size of the allocator's memory buffer.
+ size_t used_size() const;
+
+ /// Returns the free size of the allocator's memory buffer.
+ size_t free_size() const;
+
+ /// Resets the allocator to its initial state.
+ void reset();
+
+ private:
+ size_t used_;
+};
diff --git a/examples/espressif/executor_runner/esp_pal.cpp b/examples/espressif/executor_runner/esp_pal.cpp
new file mode 100644
index 00000000000..90c227d8f99
--- /dev/null
+++ b/examples/espressif/executor_runner/esp_pal.cpp
@@ -0,0 +1,91 @@
+/* Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+#include
+
+#include
+#include
+
+#if defined(ESP_PLATFORM)
+#include
+#include
+#include
+#include
+#endif
+
+extern "C" {
+
+void et_pal_init(void) {
+#if defined(ESP_PLATFORM)
+ ET_LOG(
+ Info,
+ "ESP32 ExecuTorch runner initialized. Free heap: %lu bytes.",
+ static_cast(esp_get_free_heap_size()));
+#if defined(CONFIG_SPIRAM)
+ ET_LOG(
+ Info,
+ "PSRAM available. Free PSRAM: %lu bytes.",
+ static_cast(heap_caps_get_free_size(MALLOC_CAP_SPIRAM)));
+#endif
+#endif
+}
+
+ET_NORETURN void et_pal_abort(void) {
+#if defined(ESP_PLATFORM)
+ esp_restart();
+#else
+ abort();
+#endif
+ while (1) {
+ }
+}
+
+et_timestamp_t et_pal_current_ticks(void) {
+#if defined(ESP_PLATFORM)
+ return (et_timestamp_t)esp_cpu_get_cycle_count();
+#else
+ return 0;
+#endif
+}
+
+et_tick_ratio_t et_pal_ticks_to_ns_multiplier(void) {
+#if defined(ESP_PLATFORM)
+ uint32_t cpu_freq_hz;
+ if (esp_clk_tree_src_get_freq_hz(SOC_MOD_CLK_CPU, ESP_CLK_TREE_SRC_FREQ_PRECISION_CACHED, &cpu_freq_hz) ==
+ ESP_OK) {
+ return {1000000000u, cpu_freq_hz};
+ }
+#endif
+ return {1000, 240}; // Default to 240 MHz if we can't get the actual frequency
+}
+
+void et_pal_emit_log_message(
+ ET_UNUSED et_timestamp_t timestamp,
+ et_pal_log_level_t level,
+ const char* filename,
+ const char* function,
+ size_t line,
+ const char* message,
+ ET_UNUSED size_t length) {
+ printf(
+ "%c [executorch:%s:%lu %s()] %s\n",
+ level,
+ filename,
+ static_cast(line),
+ function,
+ message);
+ fflush(stdout);
+}
+
+void* et_pal_allocate(ET_UNUSED size_t size) {
+ return nullptr;
+}
+
+void et_pal_free(ET_UNUSED void* ptr) {}
+
+} // extern "C"
\ No newline at end of file
diff --git a/examples/espressif/executor_runner/esp_perf_monitor.cpp b/examples/espressif/executor_runner/esp_perf_monitor.cpp
new file mode 100644
index 00000000000..1b1a70987b5
--- /dev/null
+++ b/examples/espressif/executor_runner/esp_perf_monitor.cpp
@@ -0,0 +1,100 @@
+/* Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include
+
+#include "esp_perf_monitor.h"
+
+#if defined(ESP_PLATFORM)
+
+#include
+#include
+#include
+#include
+
+namespace {
+
+uint32_t start_cycle_count = 0;
+int64_t start_time_us = 0;
+
+} // namespace
+
+void StartMeasurements() {
+ start_cycle_count = esp_cpu_get_cycle_count();
+ start_time_us = esp_timer_get_time();
+}
+
+void StopMeasurements(int num_inferences) {
+ uint32_t end_cycle_count = esp_cpu_get_cycle_count();
+ int64_t end_time_us = esp_timer_get_time();
+
+ uint32_t delta_cycles = end_cycle_count - start_cycle_count;
+ uint64_t total_cycles = static_cast(delta_cycles);
+ int64_t total_time_us = end_time_us - start_time_us;
+
+ ET_LOG(Info, "Profiler report:");
+ ET_LOG(Info, "Number of inferences: %d", num_inferences);
+
+ // Guard against division by zero or invalid counts when computing
+ // per-inference metrics.
+ if (num_inferences <= 0) {
+ ET_LOG(
+ Info,
+ "Total CPU cycles: %" PRIu64 " (per-inference metrics not computed)",
+ total_cycles);
+ ET_LOG(
+ Info,
+ "Total wall time: %" PRId64 " us (per-inference metrics not computed)",
+ total_time_us);
+ // Log ESP32 system memory info
+ ET_LOG(
+ Info,
+ "Free heap: %lu bytes",
+ static_cast(esp_get_free_heap_size()));
+ ET_LOG(
+ Info,
+ "Min free heap ever: %lu bytes",
+ static_cast(esp_get_minimum_free_heap_size()));
+ return;
+ }
+
+ ET_LOG(
+ Info,
+ "Total CPU cycles: %" PRIu64 " (%.2f per inference)",
+ total_cycles,
+ (double)total_cycles / num_inferences);
+ ET_LOG(
+ Info,
+ "Total wall time: %" PRId64 " us (%.2f us per inference)",
+ total_time_us,
+ (double)total_time_us / num_inferences);
+ ET_LOG(
+ Info,
+ "Average inference time: %.3f ms",
+ (double)total_time_us / num_inferences / 1000.0);
+
+ // Log ESP32 system memory info
+ ET_LOG(
+ Info,
+ "Free heap: %lu bytes",
+ static_cast(esp_get_free_heap_size()));
+ ET_LOG(
+ Info,
+ "Min free heap ever: %lu bytes",
+ static_cast(esp_get_minimum_free_heap_size()));
+}
+
+#else // !defined(ESP_PLATFORM)
+
+// Stub implementation for non-ESP builds (e.g. host testing)
+void StartMeasurements() {}
+
+void StopMeasurements(int num_inferences) {
+ (void)num_inferences;
+}
+
+#endif // defined(ESP_PLATFORM)
diff --git a/examples/espressif/executor_runner/esp_perf_monitor.h b/examples/espressif/executor_runner/esp_perf_monitor.h
new file mode 100644
index 00000000000..ccbdb07e331
--- /dev/null
+++ b/examples/espressif/executor_runner/esp_perf_monitor.h
@@ -0,0 +1,18 @@
+/* Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+/**
+ * Performance monitoring helpers for Espressif ESP32/ESP32-S3.
+ *
+ * Uses the Xtensa/RISC-V CPU cycle counter (CCOUNT register on Xtensa,
+ * or esp_cpu_get_cycle_count() from ESP-IDF) for timing measurements.
+ */
+
+void StartMeasurements();
+void StopMeasurements(int num_inferences);
diff --git a/examples/espressif/executor_runner/pte_to_header.py b/examples/espressif/executor_runner/pte_to_header.py
new file mode 100644
index 00000000000..0a8935b7a92
--- /dev/null
+++ b/examples/espressif/executor_runner/pte_to_header.py
@@ -0,0 +1,100 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Converts an ExecuTorch .pte model file to a C header file containing
+the model data as a byte array. This is used to embed the model directly
+into the firmware binary for ESP32/ESP32-S3 targets.
+
+Usage:
+ python pte_to_header.py --pte model.pte [--outdir .] [--outfile model_pte.h]
+"""
+
+import binascii
+import os
+from argparse import ArgumentParser, ArgumentTypeError
+
+bytes_per_line = 32
+hex_digits_per_line = bytes_per_line * 2
+
+
+def input_file_path(path):
+ if os.path.exists(path):
+ return path
+ else:
+ raise ArgumentTypeError(f"input filepath:{path} does not exist")
+
+
+parser = ArgumentParser(description="Convert .pte model to C header for ESP32")
+parser.add_argument(
+ "-p",
+ "--pte",
+ help="ExecuTorch .pte model file",
+ type=input_file_path,
+ required=True,
+)
+parser.add_argument(
+ "-d",
+ "--outdir",
+ help="Output dir for model header",
+ type=str,
+ required=False,
+ default=".",
+)
+parser.add_argument(
+ "-o",
+ "--outfile",
+ help="Output filename for model header",
+ type=str,
+ required=False,
+ default="model_pte.h",
+)
+parser.add_argument(
+ "-s",
+ "--section",
+ help="Section attribute for the data array (use 'none' for no section attribute)",
+ type=str,
+ required=False,
+ default="none",
+)
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+ outfile = os.path.join(args.outdir, args.outfile)
+
+ if args.section == "none":
+ # No section attribute - let the linker/compiler decide placement.
+ # On ESP32 with PSRAM, the compiler/linker or EXT_RAM_BSS_ATTR
+ # in the code handles placement.
+ attr = "__attribute__((aligned(16))) static const unsigned char "
+ else:
+ attr = f'__attribute__((section("{args.section}"), aligned(16))) static const unsigned char '
+ if not os.path.exists(args.outdir):
+ os.makedirs(args.outdir)
+ with open(args.pte, "rb") as fr, open(outfile, "w") as fw:
+ data = fr.read()
+ hexstream = binascii.hexlify(data).decode("utf-8")
+
+ fw.write(
+ "/* Auto-generated model header for ESP32 ExecuTorch runner. */\n"
+ )
+ fw.write(f"/* Source: {os.path.basename(args.pte)} ({len(data)} bytes) */\n\n")
+ fw.write("#pragma once\n\n")
+ fw.write(attr + "model_pte[] = {")
+
+ for i in range(0, len(hexstream), 2):
+ if 0 == (i % hex_digits_per_line):
+ fw.write("\n")
+ fw.write("0x" + hexstream[i : i + 2] + ", ")
+
+ fw.write("\n};\n")
+ fw.flush()
+ os.fsync(fw.fileno())
+
+ print(
+ f"Input: {args.pte} with {len(data)} bytes. "
+ f"Output: {outfile} with {os.path.getsize(outfile)} bytes."
+ )
diff --git a/examples/espressif/project/CMakeLists.txt b/examples/espressif/project/CMakeLists.txt
new file mode 100644
index 00000000000..b467cb49baa
--- /dev/null
+++ b/examples/espressif/project/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Example ESP-IDF project CMakeLists.txt
+#
+# This is a template project that uses the executor_runner component.
+# Copy this to your own project directory and adjust paths as needed.
+#
+# Usage:
+# cd examples/espressif/project
+# idf.py set-target esp32s3
+# idf.py build
+# idf.py flash monitor
+
+cmake_minimum_required(VERSION 3.16)
+
+# Set the path to ExecuTorch source
+set(EXECUTORCH_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../../.." CACHE PATH "ExecuTorch root")
+
+# Add the executor_runner as an extra component
+set(EXTRA_COMPONENT_DIRS
+ "${CMAKE_CURRENT_SOURCE_DIR}/../executor_runner"
+)
+
+include($ENV{IDF_PATH}/tools/cmake/project.cmake)
+project(executorch_esp_runner)
diff --git a/examples/espressif/project/main/CMakeLists.txt b/examples/espressif/project/main/CMakeLists.txt
new file mode 100644
index 00000000000..2b2cd9d135a
--- /dev/null
+++ b/examples/espressif/project/main/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Main component CMakeLists.txt for the ESP-IDF project.
+# This is a minimal main component that depends on the executor_runner.
+
+idf_component_register(
+ SRCS "main.cpp"
+ INCLUDE_DIRS "."
+ REQUIRES executor_runner
+)
diff --git a/examples/espressif/project/main/main.cpp b/examples/espressif/project/main/main.cpp
new file mode 100644
index 00000000000..ac446d142f8
--- /dev/null
+++ b/examples/espressif/project/main/main.cpp
@@ -0,0 +1,37 @@
+/* Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/*
+ * Example ESP-IDF main component.
+ *
+ * The app_main() defined below performs optional initialization and then
+ * calls executor_runner_main().
+ *
+ * If you want to customize the runner behavior, you can modify the
+ * app_main() implementation here (e.g., add initialization or cleanup)
+ * while still delegating to executor_runner_main().
+ */
+
+
+#include
+#include "sdkconfig.h"
+#include "freertos/FreeRTOS.h"
+#include "freertos/task.h"
+#include "esp_system.h"
+
+extern void executor_runner_main(void);
+
+extern "C" void app_main(void) {
+ printf("Starting executorch runner !\n");
+ fflush(stdout);
+ // Custom initialization here
+ executor_runner_main();
+ for (int i = 5; i >= 0; i--) {
+ vTaskDelay(1000 / portTICK_PERIOD_MS);
+ }
+ esp_restart();
+}
diff --git a/examples/espressif/project/partitions.csv b/examples/espressif/project/partitions.csv
new file mode 100644
index 00000000000..e6d484d3f99
--- /dev/null
+++ b/examples/espressif/project/partitions.csv
@@ -0,0 +1,5 @@
+# ESP-IDF Partition Table
+# Name, Type, SubType, Offset, Size, Flags
+nvs,data,nvs,0x9000,24K,
+phy_init,data,phy,0xf000,4K,
+factory,app,factory,0x10000,2M,
diff --git a/examples/espressif/project/sdkconfig.defaults b/examples/espressif/project/sdkconfig.defaults
new file mode 100644
index 00000000000..08b09229148
--- /dev/null
+++ b/examples/espressif/project/sdkconfig.defaults
@@ -0,0 +1,50 @@
+# ESP-IDF sdkconfig defaults for ExecuTorch executor runner
+#
+# These settings are optimized for running ExecuTorch models on ESP32/ESP32-S3.
+# Copy this file as sdkconfig.defaults in your project directory.
+
+# ─── CPU Frequency ───
+# Run at maximum frequency for best inference performance
+CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y
+
+# ─── PSRAM (if available) ───
+# Enable PSRAM for larger model support
+CONFIG_SPIRAM=y
+CONFIG_SPIRAM_MODE_QUAD=y
+CONFIG_SPIRAM_SPEED_80M=y
+# Allow malloc to fall back to PSRAM when internal RAM is exhausted
+CONFIG_SPIRAM_USE_CAPS_ALLOC=y
+# Place BSS in PSRAM (for large static buffers)
+CONFIG_SPIRAM_ALLOW_BSS_SEG_EXTERNAL_MEMORY=y
+
+# ─── Memory ───
+# Increase main task stack size for ExecuTorch
+CONFIG_ESP_MAIN_TASK_STACK_SIZE=32768
+
+# ─── Flash ───
+# Use QIO flash mode for faster flash reads (model data)
+CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
+CONFIG_ESPTOOLPY_FLASHFREQ_80M=y
+# Larger flash size for model data
+CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y
+
+# ─── Optimization ───
+# Optimize for performance
+CONFIG_COMPILER_OPTIMIZATION_PERF=y
+
+# ─── FreeRTOS ───
+# Increase tick rate for finer timing granularity
+CONFIG_FREERTOS_HZ=1000
+
+# ─── Logging ───
+# Default log level (can be changed at runtime)
+CONFIG_LOG_DEFAULT_LEVEL_INFO=y
+
+# ─── Watchdog ───
+# Disable task watchdog for long-running inference
+CONFIG_ESP_TASK_WDT_EN=n
+
+# ─── Custom partition table to be adjusted for larger builds ───
+CONFIG_PARTITION_TABLE_CUSTOM=y
+CONFIG_PARTITION_TABLE_CUSTOM_FILENAME="partitions.csv"
+CONFIG_PARTITION_TABLE_FILENAME="partitions.csv"
\ No newline at end of file
diff --git a/examples/espressif/project/sdkconfig.defaults.esp32s3 b/examples/espressif/project/sdkconfig.defaults.esp32s3
new file mode 100644
index 00000000000..15f9c4eba30
--- /dev/null
+++ b/examples/espressif/project/sdkconfig.defaults.esp32s3
@@ -0,0 +1,42 @@
+# ESP-IDF sdkconfig defaults for ESP32-S3 target
+#
+# ESP32-S3 specific optimizations:
+# - Octal PSRAM support (up to 32MB)
+# - Dual-core Xtensa LX7 at 240MHz
+# - Vector extensions for faster computation
+
+# ─── CPU ───
+CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y
+
+# ─── PSRAM (Octal PSRAM for ESP32-S3) ───
+CONFIG_SPIRAM=y
+#CONFIG_SPIRAM_MODE_QUAD=y
+CONFIG_SPIRAM_MODE_OCT=y
+CONFIG_SPIRAM_SPEED_80M=y
+CONFIG_SPIRAM_USE_CAPS_ALLOC=y
+CONFIG_SPIRAM_ALLOW_BSS_SEG_EXTERNAL_MEMORY=y
+
+# ─── Memory ───
+CONFIG_ESP_MAIN_TASK_STACK_SIZE=32768
+
+# ─── Flash ───
+CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
+CONFIG_ESPTOOLPY_FLASHFREQ_80M=y
+CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y
+
+# ─── Optimization ───
+CONFIG_COMPILER_OPTIMIZATION_PERF=y
+
+# ─── FreeRTOS ───
+CONFIG_FREERTOS_HZ=1000
+
+# ─── Watchdog ───
+CONFIG_ESP_TASK_WDT_EN=n
+
+# ─── Logging ───
+CONFIG_LOG_DEFAULT_LEVEL_INFO=y
+
+# ─── Custom partition table to be adjusted for larger builds ───
+CONFIG_PARTITION_TABLE_CUSTOM=y
+CONFIG_PARTITION_TABLE_CUSTOM_FILENAME="partitions.csv"
+CONFIG_PARTITION_TABLE_FILENAME="partitions.csv"
\ No newline at end of file
diff --git a/extension/threadpool/threadpool.cpp b/extension/threadpool/threadpool.cpp
index a15a2572669..1928892efe6 100644
--- a/extension/threadpool/threadpool.cpp
+++ b/extension/threadpool/threadpool.cpp
@@ -145,7 +145,7 @@ ThreadPool* get_threadpool() {
* tricky to detect if we are running under tsan, for now capping the
* default threadcount to the tsan limit unconditionally.
*/
- constexpr unsigned int tsan_thread_limit = 63;
+ constexpr decltype(result) tsan_thread_limit = 63;
return std::min(result, tsan_thread_limit);
})();
diff --git a/tools/cmake/preset/esp_baremetal.cmake b/tools/cmake/preset/esp_baremetal.cmake
new file mode 100644
index 00000000000..cf86d5efc79
--- /dev/null
+++ b/tools/cmake/preset/esp_baremetal.cmake
@@ -0,0 +1,21 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}")
+set_overridable_option(EXECUTORCH_BUILD_EXECUTOR_RUNNER OFF)
+set_overridable_option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR OFF)
+set_overridable_option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER OFF)
+set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON)
+set_overridable_option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL ON)
+set_overridable_option(EXECUTORCH_ENABLE_LOGGING ON)
+set_overridable_option(EXECUTORCH_BUILD_ARM_ETDUMP OFF)
+
+if("${EXECUTORCH_BUILD_ARM_ETDUMP}")
+ set(EXECUTORCH_BUILD_DEVTOOLS ON)
+ set(EXECUTORCH_ENABLE_EVENT_TRACER ON)
+ set(FLATCC_ALLOW_WERROR OFF)
+else()
+ set(EXECUTORCH_ENABLE_EVENT_TRACER OFF)
+endif()