This commit is contained in:
2026-05-22 21:52:50 +03:00
commit be7c60e4dd
1854 changed files with 583428 additions and 0 deletions
+86
View File
@@ -0,0 +1,86 @@
#include "ESP_I2S.h"
#include "ESP_SR.h"
#define I2S_PIN_BCK 17
#define I2S_PIN_WS 47
#define I2S_PIN_DIN 16
#define LIGHT_PIN 40
#define FAN_PIN 41
/**
* The input format:
* M to represent the microphone channel
* R to represent the playback reference channel
* N to represent an unknown or unused channel
*
* For example, input_format="MMNR" indicates that the input data consists of four channels,
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
*/
#define SR_INPUT_FORMAT "MM"
I2SClass i2s;
// Generated using the following command:
// python3 tools/gen_sr_commands.py "Turn on the light,Switch on the light;Turn off the light,Switch off the light,Go dark;Start fan;Stop fan"
enum {
SR_CMD_TURN_ON_THE_LIGHT,
SR_CMD_TURN_OFF_THE_LIGHT,
SR_CMD_START_FAN,
SR_CMD_STOP_FAN,
};
static const sr_cmd_t sr_commands[] = {
{0, "Turn on the light", "TkN nN jc LiT"},
{0, "Switch on the light", "SWgp nN jc LiT"},
{1, "Turn off the light", "TkN eF jc LiT"},
{1, "Switch off the light", "SWgp eF jc LiT"},
{1, "Go dark", "Gb DnRK"},
{2, "Start fan", "STnRT FaN"},
{3, "Stop fan", "STnP FaN"},
};
void onSrEvent(sr_event_t event, int command_id, int phrase_id) {
switch (event) {
case SR_EVENT_WAKEWORD: Serial.println("WakeWord Detected!"); break;
case SR_EVENT_WAKEWORD_CHANNEL:
Serial.printf("WakeWord Channel %d Verified!\n", command_id);
ESP_SR.setMode(SR_MODE_COMMAND); // Switch to Command detection
break;
case SR_EVENT_TIMEOUT:
Serial.println("Timeout Detected!");
ESP_SR.setMode(SR_MODE_WAKEWORD); // Switch back to WakeWord detection
break;
case SR_EVENT_COMMAND:
Serial.printf("Command %d Detected! %s\n", command_id, sr_commands[phrase_id].str);
switch (command_id) {
case SR_CMD_TURN_ON_THE_LIGHT: digitalWrite(LIGHT_PIN, HIGH); break;
case SR_CMD_TURN_OFF_THE_LIGHT: digitalWrite(LIGHT_PIN, LOW); break;
case SR_CMD_START_FAN: digitalWrite(FAN_PIN, HIGH); break;
case SR_CMD_STOP_FAN: digitalWrite(FAN_PIN, LOW); break;
default: Serial.println("Unknown Command!"); break;
}
ESP_SR.setMode(SR_MODE_COMMAND); // Allow for more commands to be given, before timeout
// ESP_SR.setMode(SR_MODE_WAKEWORD); // Switch back to WakeWord detection
break;
default: Serial.println("Unknown Event!"); break;
}
}
void setup() {
Serial.begin(115200);
pinMode(LIGHT_PIN, OUTPUT);
digitalWrite(LIGHT_PIN, LOW);
pinMode(FAN_PIN, OUTPUT);
digitalWrite(FAN_PIN, LOW);
i2s.setPins(I2S_PIN_BCK, I2S_PIN_WS, -1, I2S_PIN_DIN);
i2s.setTimeout(1000);
i2s.begin(I2S_MODE_STD, 16000, I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_STEREO);
ESP_SR.onEvent(onSrEvent);
ESP_SR.begin(i2s, sr_commands, sizeof(sr_commands) / sizeof(sr_cmd_t), SR_CHANNELS_STEREO, SR_MODE_WAKEWORD, SR_INPUT_FORMAT);
}
void loop() {}
+16
View File
@@ -0,0 +1,16 @@
fqbn:
esp32s3:
- espressif:esp32:esp32s3:USBMode=default,PartitionScheme=esp_sr_16,FlashSize=16M,FlashMode=dio
esp32p4:
- espressif:esp32:esp32p4:USBMode=default,ChipVariant=postv3,PartitionScheme=esp_sr_16,FlashSize=16M,FlashMode=qio
requires:
- CONFIG_SOC_I2S_SUPPORTED=y
targets:
esp32: false
esp32c3: false
esp32c6: false
esp32h2: false
esp32s2: false
esp32c5: false
+40
View File
@@ -0,0 +1,40 @@
#######################################
# Syntax Coloring Map For ESP_SR
#######################################
#######################################
# Datatypes (KEYWORD1)
#######################################
ESP_SR KEYWORD1
ESP_SR_Class KEYWORD1
sr_cmd_t KEYWORD1
sr_event_t KEYWORD1
sr_mode_t KEYWORD1
sr_channels_t KEYWORD1
sr_cb KEYWORD1
#######################################
# Methods and Functions (KEYWORD2)
#######################################
onEvent KEYWORD2
setMode KEYWORD2
pause KEYWORD2
resume KEYWORD2
#######################################
# Constants (LITERAL1)
#######################################
SR_EVENT_WAKEWORD LITERAL1
SR_EVENT_WAKEWORD_CHANNEL LITERAL1
SR_EVENT_COMMAND LITERAL1
SR_EVENT_TIMEOUT LITERAL1
SR_MODE_OFF LITERAL1
SR_MODE_WAKEWORD LITERAL1
SR_MODE_COMMAND LITERAL1
SR_MODE_MAX LITERAL1
SR_CHANNELS_MONO LITERAL1
SR_CHANNELS_STEREO LITERAL1
SR_CHANNELS_MAX LITERAL1
+9
View File
@@ -0,0 +1,9 @@
name=ESP_SR
version=3.3.7
author=me-no-dev
maintainer=me-no-dev
sentence=Library for ESP Sound Recognition
paragraph=Supports ESP32 Arduino platforms.
category=Sound
url=https://github.com/espressif/arduino-esp32/
architectures=esp32
+67
View File
@@ -0,0 +1,67 @@
/*
* SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Unlicense OR CC0-1.0
*/
#include "sdkconfig.h"
#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4) && (CONFIG_MODEL_IN_FLASH || CONFIG_MODEL_IN_SDCARD)
#include "ESP_SR.h"
static esp_err_t on_sr_fill(void *arg, void *out, size_t len, size_t *bytes_read, uint32_t timeout_ms) {
return ((ESP_SR_Class *)arg)->_fill(out, len, bytes_read, timeout_ms);
}
static void on_sr_event(void *arg, sr_event_t event, int command_id, int phrase_id) {
((ESP_SR_Class *)arg)->_sr_event(event, command_id, phrase_id);
}
ESP_SR_Class::ESP_SR_Class() : cb(NULL), i2s(NULL) {}
ESP_SR_Class::~ESP_SR_Class() {
end();
}
void ESP_SR_Class::onEvent(sr_cb event_cb) {
cb = event_cb;
}
bool ESP_SR_Class::begin(I2SClass &_i2s, const sr_cmd_t *sr_commands, size_t sr_commands_len, sr_channels_t rx_chan, sr_mode_t mode, const char *input_format) {
i2s = &_i2s;
esp_err_t err = sr_start(on_sr_fill, this, rx_chan, mode, input_format, sr_commands, sr_commands_len, on_sr_event, this);
return (err == ESP_OK);
}
bool ESP_SR_Class::end(void) {
return sr_stop() == ESP_OK;
}
bool ESP_SR_Class::setMode(sr_mode_t mode) {
return sr_set_mode(mode) == ESP_OK;
}
bool ESP_SR_Class::pause(void) {
return sr_pause() == ESP_OK;
}
bool ESP_SR_Class::resume(void) {
return sr_resume() == ESP_OK;
}
void ESP_SR_Class::_sr_event(sr_event_t event, int command_id, int phrase_id) {
if (cb) {
cb(event, command_id, phrase_id);
}
}
esp_err_t ESP_SR_Class::_fill(void *out, size_t len, size_t *bytes_read, uint32_t timeout_ms) {
if (i2s == NULL) {
return ESP_FAIL;
}
i2s->setTimeout(timeout_ms);
*bytes_read = i2s->readBytes((char *)out, len);
return (esp_err_t)i2s->lastError();
}
ESP_SR_Class ESP_SR;
#endif // CONFIG_IDF_TARGET_ESP32S3
+52
View File
@@ -0,0 +1,52 @@
/*
* SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Unlicense OR CC0-1.0
*/
#pragma once
#include "sdkconfig.h"
#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4) && (CONFIG_MODEL_IN_FLASH || CONFIG_MODEL_IN_SDCARD)
#include "ESP_I2S.h"
#include "esp32-hal-sr.h"
typedef void (*sr_cb)(sr_event_t event, int command_id, int phrase_id);
class ESP_SR_Class {
private:
sr_cb cb;
I2SClass *i2s;
public:
ESP_SR_Class();
~ESP_SR_Class();
void onEvent(sr_cb cb);
/**
* The input format:
* M to represent the microphone channel
* R to represent the playback reference channel
* N to represent an unknown or unused channel
*
* For example, input_format="MMNR" indicates that the input data consists of four channels,
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
*/
bool begin(
I2SClass &i2s, const sr_cmd_t *sr_commands, size_t sr_commands_len, sr_channels_t rx_chan = SR_CHANNELS_STEREO, sr_mode_t mode = SR_MODE_WAKEWORD,
const char *input_format = "MN"
);
bool end(void);
bool setMode(sr_mode_t mode);
bool pause(void);
bool resume(void);
void _sr_event(sr_event_t event, int command_id, int phrase_id);
esp_err_t _fill(void *out, size_t len, size_t *bytes_read, uint32_t timeout_ms);
};
#if !defined(NO_GLOBAL_INSTANCES) && !defined(NO_GLOBAL_ESP_SR)
extern ESP_SR_Class ESP_SR;
#endif
#endif // CONFIG_IDF_TARGET_ESP32S3
+440
View File
@@ -0,0 +1,440 @@
/*
* SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Unlicense OR CC0-1.0
*/
#include "sdkconfig.h"
#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4) && (CONFIG_MODEL_IN_FLASH || CONFIG_MODEL_IN_SDCARD)
#if !defined(ARDUINO_PARTITION_esp_sr_32) && !defined(ARDUINO_PARTITION_esp_sr_16) && !defined(ARDUINO_PARTITION_esp_sr_8)
#warning Compatible partition must be selected for ESP_SR to work
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <sys/queue.h>
#include "freertos/FreeRTOS.h"
#include "freertos/queue.h"
#include "freertos/event_groups.h"
#include "freertos/task.h"
#include "esp_task_wdt.h"
#include "esp_check.h"
#include "esp_err.h"
#include "esp_log.h"
#include "esp_mn_speech_commands.h"
#include "esp_process_sdkconfig.h"
#include "esp_afe_sr_models.h"
#include "esp_mn_models.h"
#include "esp_wn_iface.h"
#include "esp_wn_models.h"
#include "esp_afe_sr_iface.h"
#include "esp_mn_iface.h"
#include "model_path.h"
#include "driver/i2s_common.h"
#include "esp32-hal-sr.h"
#include "esp32-hal-log.h"
#undef ESP_GOTO_ON_FALSE
#define ESP_GOTO_ON_FALSE(a, err_code, goto_tag, format, ...) \
do { \
if (unlikely(!(a))) { \
log_e(format, ##__VA_ARGS__); \
ret = err_code; \
goto goto_tag; \
} \
} while (0)
#undef ESP_RETURN_ON_FALSE
#define ESP_RETURN_ON_FALSE(a, err_code, format, ...) \
do { \
if (unlikely(!(a))) { \
log_e(format, ##__VA_ARGS__); \
return err_code; \
} \
} while (0)
#define NEED_DELETE BIT0
#define FEED_DELETED BIT1
#define DETECT_DELETED BIT2
#define PAUSE_FEED BIT3
#define PAUSE_DETECT BIT4
#define RESUME_FEED BIT5
#define RESUME_DETECT BIT6
typedef struct {
wakenet_state_t wakenet_mode;
esp_mn_state_t state;
int command_id;
int phrase_id;
} sr_result_t;
typedef struct {
model_iface_data_t *model_data;
const esp_mn_iface_t *multinet;
const esp_afe_sr_iface_t *afe_handle;
esp_afe_sr_data_t *afe_data;
int16_t *afe_in_buffer;
sr_mode_t mode;
uint8_t i2s_rx_chan_num;
sr_event_cb user_cb;
void *user_cb_arg;
sr_fill_cb fill_cb;
void *fill_cb_arg;
TaskHandle_t feed_task;
TaskHandle_t detect_task;
TaskHandle_t handle_task;
QueueHandle_t result_que;
EventGroupHandle_t event_group;
} sr_data_t;
static int SR_CHANNEL_NUM = 3;
static srmodel_list_t *models = NULL;
static sr_data_t *g_sr_data = NULL;
esp_err_t sr_set_mode(sr_mode_t mode);
void sr_handler_task(void *pvParam) {
while (true) {
sr_result_t result;
if (xQueueReceive(g_sr_data->result_que, &result, portMAX_DELAY) != pdTRUE) {
continue;
}
if (WAKENET_DETECTED == result.wakenet_mode) {
if (g_sr_data->user_cb) {
g_sr_data->user_cb(g_sr_data->user_cb_arg, SR_EVENT_WAKEWORD, -1, -1);
}
continue;
}
if (WAKENET_CHANNEL_VERIFIED == result.wakenet_mode) {
if (g_sr_data->user_cb) {
g_sr_data->user_cb(g_sr_data->user_cb_arg, SR_EVENT_WAKEWORD_CHANNEL, result.command_id, -1);
}
continue;
}
if (ESP_MN_STATE_DETECTED == result.state) {
if (g_sr_data->user_cb) {
g_sr_data->user_cb(g_sr_data->user_cb_arg, SR_EVENT_COMMAND, result.command_id, result.phrase_id);
}
continue;
}
if (ESP_MN_STATE_TIMEOUT == result.state) {
if (g_sr_data->user_cb) {
g_sr_data->user_cb(g_sr_data->user_cb_arg, SR_EVENT_TIMEOUT, -1, -1);
}
continue;
}
}
vTaskDelete(NULL);
}
static void audio_feed_task(void *arg) {
size_t bytes_read = 0;
int audio_chunksize = g_sr_data->afe_handle->get_feed_chunksize(g_sr_data->afe_data);
log_i("audio_chunksize=%d, feed_channel=%d", audio_chunksize, SR_CHANNEL_NUM);
/* Allocate audio buffer and check for result */
int16_t *audio_buffer = heap_caps_malloc(audio_chunksize * sizeof(int16_t) * SR_CHANNEL_NUM, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
if (NULL == audio_buffer) {
esp_system_abort("No mem for audio buffer");
}
g_sr_data->afe_in_buffer = audio_buffer;
while (true) {
EventBits_t bits = xEventGroupGetBits(g_sr_data->event_group);
if (NEED_DELETE & bits) {
xEventGroupSetBits(g_sr_data->event_group, FEED_DELETED);
break;
}
if (PAUSE_FEED & bits) {
xEventGroupWaitBits(g_sr_data->event_group, PAUSE_FEED | RESUME_FEED, 1, 1, portMAX_DELAY);
}
/* Read audio data from I2S bus */
//ToDo: handle error
if (g_sr_data->fill_cb == NULL) {
vTaskDelay(100);
continue;
}
esp_err_t err = g_sr_data->fill_cb(
g_sr_data->fill_cb_arg, (char *)audio_buffer, audio_chunksize * g_sr_data->i2s_rx_chan_num * sizeof(int16_t), &bytes_read, portMAX_DELAY
);
if (err != ESP_OK) {
vTaskDelay(100);
continue;
}
/* Channel Adjust */
if (g_sr_data->i2s_rx_chan_num == 1) {
for (int i = audio_chunksize - 1; i >= 0; i--) {
audio_buffer[i * SR_CHANNEL_NUM + 2] = 0;
audio_buffer[i * SR_CHANNEL_NUM + 1] = 0;
audio_buffer[i * SR_CHANNEL_NUM + 0] = audio_buffer[i];
}
} else if (g_sr_data->i2s_rx_chan_num == 2) {
for (int i = audio_chunksize - 1; i >= 0; i--) {
audio_buffer[i * SR_CHANNEL_NUM + 2] = 0;
audio_buffer[i * SR_CHANNEL_NUM + 1] = audio_buffer[i * 2 + 1];
audio_buffer[i * SR_CHANNEL_NUM + 0] = audio_buffer[i * 2 + 0];
}
} else {
vTaskDelay(100);
continue;
}
/* Feed samples of an audio stream to the AFE_SR */
g_sr_data->afe_handle->feed(g_sr_data->afe_data, audio_buffer);
vTaskDelay(2);
}
vTaskDelete(NULL);
}
static void audio_detect_task(void *arg) {
int afe_chunksize = g_sr_data->afe_handle->get_fetch_chunksize(g_sr_data->afe_data);
int mu_chunksize = g_sr_data->multinet->get_samp_chunksize(g_sr_data->model_data);
assert(mu_chunksize == afe_chunksize);
log_i("------------detect start------------");
while (true) {
EventBits_t bits = xEventGroupGetBits(g_sr_data->event_group);
if (NEED_DELETE & bits) {
xEventGroupSetBits(g_sr_data->event_group, DETECT_DELETED);
break;
}
if (PAUSE_DETECT & bits) {
xEventGroupWaitBits(g_sr_data->event_group, PAUSE_DETECT | RESUME_DETECT, 1, 1, portMAX_DELAY);
}
afe_fetch_result_t *res = g_sr_data->afe_handle->fetch(g_sr_data->afe_data);
if (!res || res->ret_value == ESP_FAIL) {
continue;
}
if (g_sr_data->mode == SR_MODE_WAKEWORD) {
if (res->wakeup_state == WAKENET_DETECTED) {
log_d("wakeword detected");
sr_result_t result = {
.wakenet_mode = WAKENET_DETECTED,
.state = ESP_MN_STATE_DETECTING,
.command_id = 0,
.phrase_id = 0,
};
xQueueSend(g_sr_data->result_que, &result, 0);
} else if (res->wakeup_state == WAKENET_CHANNEL_VERIFIED) {
sr_set_mode(SR_MODE_OFF);
log_d("AFE_FETCH_CHANNEL_VERIFIED, channel index: %d", res->trigger_channel_id);
sr_result_t result = {
.wakenet_mode = WAKENET_CHANNEL_VERIFIED,
.state = ESP_MN_STATE_DETECTING,
.command_id = res->trigger_channel_id,
.phrase_id = 0,
};
xQueueSend(g_sr_data->result_que, &result, 0);
}
}
if (g_sr_data->mode == SR_MODE_COMMAND) {
esp_mn_state_t mn_state = ESP_MN_STATE_DETECTING;
mn_state = g_sr_data->multinet->detect(g_sr_data->model_data, res->data);
if (ESP_MN_STATE_DETECTING == mn_state) {
continue;
}
if (ESP_MN_STATE_TIMEOUT == mn_state) {
sr_set_mode(SR_MODE_OFF);
log_d("Time out");
sr_result_t result = {
.wakenet_mode = WAKENET_NO_DETECT,
.state = mn_state,
.command_id = 0,
.phrase_id = 0,
};
xQueueSend(g_sr_data->result_que, &result, 0);
continue;
}
if (ESP_MN_STATE_DETECTED == mn_state) {
sr_set_mode(SR_MODE_OFF);
esp_mn_results_t *mn_result = g_sr_data->multinet->get_results(g_sr_data->model_data);
for (int i = 0; i < mn_result->num; i++) {
log_d("TOP %d, command_id: %d, phrase_id: %d, prob: %f", i + 1, mn_result->command_id[i], mn_result->phrase_id[i], mn_result->prob[i]);
}
int sr_command_id = mn_result->command_id[0];
int sr_phrase_id = mn_result->phrase_id[0];
log_d("Detected command : %d, phrase: %d", sr_command_id, sr_phrase_id);
sr_result_t result = {
.wakenet_mode = WAKENET_NO_DETECT,
.state = mn_state,
.command_id = sr_command_id,
.phrase_id = sr_phrase_id,
};
xQueueSend(g_sr_data->result_que, &result, 0);
continue;
}
log_e("Exception unhandled");
}
}
vTaskDelete(NULL);
}
esp_err_t sr_set_mode(sr_mode_t mode) {
ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_INVALID_STATE, "SR is not running");
switch (mode) {
case SR_MODE_OFF:
if (g_sr_data->mode == SR_MODE_WAKEWORD) {
g_sr_data->afe_handle->disable_wakenet(g_sr_data->afe_data);
}
break;
case SR_MODE_WAKEWORD:
if (g_sr_data->mode != SR_MODE_WAKEWORD) {
g_sr_data->afe_handle->enable_wakenet(g_sr_data->afe_data);
}
break;
case SR_MODE_COMMAND:
if (g_sr_data->mode == SR_MODE_WAKEWORD) {
g_sr_data->afe_handle->disable_wakenet(g_sr_data->afe_data);
}
break;
default: return ESP_FAIL;
}
g_sr_data->mode = mode;
return ESP_OK;
}
esp_err_t sr_start(
sr_fill_cb fill_cb, void *fill_cb_arg, sr_channels_t rx_chan, sr_mode_t mode, const char *input_format, const sr_cmd_t sr_commands[], size_t cmd_number,
sr_event_cb cb, void *cb_arg
) {
esp_err_t ret = ESP_OK;
ESP_RETURN_ON_FALSE(NULL == g_sr_data, ESP_ERR_INVALID_STATE, "SR already running");
g_sr_data = heap_caps_calloc(1, sizeof(sr_data_t), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_NO_MEM, "Failed create sr data");
g_sr_data->result_que = xQueueCreate(3, sizeof(sr_result_t));
ESP_GOTO_ON_FALSE(NULL != g_sr_data->result_que, ESP_ERR_NO_MEM, err, "Failed create result queue");
g_sr_data->event_group = xEventGroupCreate();
ESP_GOTO_ON_FALSE(NULL != g_sr_data->event_group, ESP_ERR_NO_MEM, err, "Failed create event_group");
BaseType_t ret_val;
g_sr_data->user_cb = cb;
g_sr_data->user_cb_arg = cb_arg;
g_sr_data->fill_cb = fill_cb;
g_sr_data->fill_cb_arg = fill_cb_arg;
g_sr_data->i2s_rx_chan_num = rx_chan + 1;
g_sr_data->mode = mode;
// Init Model
log_d("init model");
models = esp_srmodel_init("model");
// Load WakeWord Detection
afe_config_t *afe_config = afe_config_init(input_format, models, AFE_TYPE_SR, AFE_MODE_LOW_COST);
g_sr_data->afe_handle = esp_afe_handle_from_config(afe_config);
log_d("load wakenet '%s'", afe_config->wakenet_model_name);
g_sr_data->afe_data = g_sr_data->afe_handle->create_from_config(afe_config);
afe_config_free(afe_config);
// Load Custom Command Detection
char *mn_name = esp_srmodel_filter(models, ESP_MN_PREFIX, ESP_MN_ENGLISH);
log_d("load multinet '%s'", mn_name);
g_sr_data->multinet = esp_mn_handle_from_name(mn_name);
log_d("load model_data '%s'", mn_name);
g_sr_data->model_data = g_sr_data->multinet->create(mn_name, 5760);
// Add commands
esp_mn_commands_alloc((esp_mn_iface_t *)g_sr_data->multinet, (model_iface_data_t *)g_sr_data->model_data);
log_i("add %d commands", cmd_number);
for (size_t i = 0; i < cmd_number; i++) {
esp_mn_commands_add(sr_commands[i].command_id, (char *)(sr_commands[i].phoneme));
log_i(" cmd[%d] phrase[%d]:'%s'", sr_commands[i].command_id, i, sr_commands[i].str);
}
// Load commands
esp_mn_error_t *err_id = esp_mn_commands_update();
if (err_id) {
for (int i = 0; i < err_id->num; i++) {
log_e("err cmd id:%d", err_id->phrases[i]->command_id);
}
}
//Start tasks
log_d("start tasks");
ret_val = xTaskCreatePinnedToCore(&audio_feed_task, "SR Feed Task", 4 * 1024, NULL, 5, &g_sr_data->feed_task, 0);
ESP_GOTO_ON_FALSE(pdPASS == ret_val, ESP_FAIL, err, "Failed create audio feed task");
vTaskDelay(10);
ret_val = xTaskCreatePinnedToCore(&audio_detect_task, "SR Detect Task", 8 * 1024, NULL, 5, &g_sr_data->detect_task, 1);
ESP_GOTO_ON_FALSE(pdPASS == ret_val, ESP_FAIL, err, "Failed create audio detect task");
ret_val = xTaskCreatePinnedToCore(&sr_handler_task, "SR Handler Task", 6 * 1024, NULL, configMAX_PRIORITIES - 1, &g_sr_data->handle_task, 1);
//ret_val = xTaskCreatePinnedToCore(&sr_handler_task, "SR Handler Task", 6 * 1024, NULL, configMAX_PRIORITIES - 1, &g_sr_data->handle_task, 0);
ESP_GOTO_ON_FALSE(pdPASS == ret_val, ESP_FAIL, err, "Failed create audio handler task");
return ESP_OK;
err:
sr_stop();
return ret;
}
esp_err_t sr_stop(void) {
ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_INVALID_STATE, "SR is not running");
/**
* Waiting for all task stopped
* TODO: A task creation failure cannot be handled correctly now
* */
vTaskDelete(g_sr_data->handle_task);
xEventGroupSetBits(g_sr_data->event_group, NEED_DELETE);
xEventGroupWaitBits(g_sr_data->event_group, NEED_DELETE | FEED_DELETED | DETECT_DELETED, 1, 1, portMAX_DELAY);
if (g_sr_data->result_que) {
vQueueDelete(g_sr_data->result_que);
g_sr_data->result_que = NULL;
}
if (g_sr_data->event_group) {
vEventGroupDelete(g_sr_data->event_group);
g_sr_data->event_group = NULL;
}
if (g_sr_data->model_data) {
g_sr_data->multinet->destroy(g_sr_data->model_data);
}
if (g_sr_data->afe_data) {
g_sr_data->afe_handle->destroy(g_sr_data->afe_data);
}
if (g_sr_data->afe_in_buffer) {
heap_caps_free(g_sr_data->afe_in_buffer);
}
heap_caps_free(g_sr_data);
g_sr_data = NULL;
return ESP_OK;
}
esp_err_t sr_pause(void) {
ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_INVALID_STATE, "SR is not running");
xEventGroupSetBits(g_sr_data->event_group, PAUSE_FEED | PAUSE_DETECT);
return ESP_OK;
}
esp_err_t sr_resume(void) {
ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_INVALID_STATE, "SR is not running");
xEventGroupSetBits(g_sr_data->event_group, RESUME_FEED | RESUME_DETECT);
return ESP_OK;
}
#endif // CONFIG_IDF_TARGET_ESP32S3
+79
View File
@@ -0,0 +1,79 @@
/*
* SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Unlicense OR CC0-1.0
*/
#pragma once
#include "sdkconfig.h"
#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4) && (CONFIG_MODEL_IN_FLASH || CONFIG_MODEL_IN_SDCARD)
#include "driver/i2s_types.h"
#include "esp_err.h"
#ifdef __cplusplus
extern "C" {
#endif
#define SR_CMD_STR_LEN_MAX 64
#define SR_CMD_PHONEME_LEN_MAX 64
typedef struct sr_cmd_t {
int command_id;
char str[SR_CMD_STR_LEN_MAX];
char phoneme[SR_CMD_PHONEME_LEN_MAX];
} sr_cmd_t;
typedef enum {
SR_EVENT_WAKEWORD, //WakeWord Detected
SR_EVENT_WAKEWORD_CHANNEL, //WakeWord Channel Verified
SR_EVENT_COMMAND, //Command Detected
SR_EVENT_TIMEOUT, //Command Timeout
SR_EVENT_MAX
} sr_event_t;
typedef enum {
SR_MODE_OFF, //Detection Off
SR_MODE_WAKEWORD, //WakeWord Detection
SR_MODE_COMMAND, //Command Detection
SR_MODE_MAX
} sr_mode_t;
typedef enum {
SR_CHANNELS_MONO,
SR_CHANNELS_STEREO,
SR_CHANNELS_MAX
} sr_channels_t;
typedef void (*sr_event_cb)(void *arg, sr_event_t event, int command_id, int phrase_id);
typedef esp_err_t (*sr_fill_cb)(void *arg, void *out, size_t len, size_t *bytes_read, uint32_t timeout_ms);
esp_err_t sr_start(
sr_fill_cb fill_cb, void *fill_cb_arg, sr_channels_t rx_chan, sr_mode_t mode, const char *input_format, const sr_cmd_t *sr_commands, size_t cmd_number,
sr_event_cb cb, void *cb_arg
);
esp_err_t sr_stop(void);
esp_err_t sr_pause(void);
esp_err_t sr_resume(void);
esp_err_t sr_set_mode(sr_mode_t mode);
// static const sr_cmd_t sr_commands[] = {
// {0, "Turn On the Light", "TkN nN jc LiT"},
// {0, "Switch On the Light", "SWgp nN jc LiT"},
// {1, "Switch Off the Light", "SWgp eF jc LiT"},
// {1, "Turn Off the Light", "TkN eF jc LiT"},
// {2, "Turn Red", "TkN RfD"},
// {3, "Turn Green", "TkN GRmN"},
// {4, "Turn Blue", "TkN BLo"},
// {5, "Customize Color", "KcSTcMiZ KcLk"},
// {6, "Sing a song", "Sgl c Sel"},
// {7, "Play Music", "PLd MYoZgK"},
// {8, "Next Song", "NfKST Sel"},
// {9, "Pause Playing", "PeZ PLdgl"},
// };
#ifdef __cplusplus
}
#endif
#endif // CONFIG_IDF_TARGET_ESP32S3
+146
View File
@@ -0,0 +1,146 @@
# pip3 install g2p_en
from g2p_en import G2p
import argparse
# python3 gen_sr_commands.py "Turn on the light,Switch on the light;Turn off the light,Switch off the light,Go dark;\
# Start fan;Stop fan;Volume down,Turn down;Mute sound;Next song;Pause playback"
# enum {
# SR_CMD_TURN_ON_THE_LIGHT,
# SR_CMD_TURN_OFF_THE_LIGHT,
# SR_CMD_START_FAN,
# SR_CMD_STOP_FAN,
# SR_CMD_VOLUME_DOWN,
# SR_CMD_MUTE_SOUND,
# SR_CMD_NEXT_SONG,
# SR_CMD_PAUSE_PLAYBACK,
# };
# static const sr_cmd_t sr_commands[] = {
# { 0, "Turn on the light", "TkN nN jc LiT"},
# { 0, "Switch on the light", "SWgp nN jc LiT"},
# { 1, "Turn off the light", "TkN eF jc LiT"},
# { 1, "Switch off the light", "SWgp eF jc LiT"},
# { 1, "Go dark", "Gb DnRK"},
# { 2, "Start fan", "STnRT FaN"},
# { 3, "Stop fan", "STnP FaN"},
# { 4, "Volume down", "VnLYoM DtN"},
# { 4, "Turn down", "TkN DtN"},
# { 5, "Mute sound", "MYoT StND"},
# { 6, "Next song", "NfKST Sel"},
# { 7, "Pause playback", "PeZ PLdBaK"},
# };
def english_g2p(text):
g2p = G2p()
out = "static const sr_cmd_t sr_commands[] = {\n"
enum = "enum {\n"
alphabet = {
"AE1": "a",
"N": "N",
" ": " ",
"OW1": "b",
"V": "V",
"AH0": "c",
"L": "L",
"F": "F",
"EY1": "d",
"S": "S",
"B": "B",
"R": "R",
"AO1": "e",
"D": "D",
"AH1": "c",
"EH1": "f",
"OW0": "b",
"IH0": "g",
"G": "G",
"HH": "h",
"K": "K",
"IH1": "g",
"W": "W",
"AY1": "i",
"T": "T",
"M": "M",
"Z": "Z",
"DH": "j",
"ER0": "k",
"P": "P",
"NG": "l",
"IY1": "m",
"AA1": "n",
"Y": "Y",
"UW1": "o",
"IY0": "m",
"EH2": "f",
"CH": "p",
"AE0": "a",
"JH": "q",
"ZH": "r",
"AA2": "n",
"SH": "s",
"AW1": "t",
"OY1": "u",
"AW2": "t",
"IH2": "g",
"AE2": "a",
"EY2": "d",
"ER1": "k",
"TH": "v",
"UH1": "w",
"UW2": "o",
"OW2": "b",
"AY2": "i",
"UW0": "o",
"AH2": "c",
"EH0": "f",
"AW0": "t",
"AO2": "e",
"AO0": "e",
"UH0": "w",
"UH2": "w",
"AA0": "n",
"AY0": "i",
"IY2": "m",
"EY0": "d",
"ER2": "k",
"OY2": "u",
"OY0": "u",
}
cmd_id = 0
phrase_id = 0
text_list = text.split(";")
for item in text_list:
item = item.split(",")
phrase_id = 0
for phrase in item:
labels = g2p(phrase)
phoneme = ""
for char in labels:
if char not in alphabet:
print("skip %s, not found in alphabet")
continue
else:
phoneme += alphabet[char]
out += " { " + str(cmd_id) + ', "' + phrase + '", "' + phoneme + '"},\n'
if phrase_id == 0:
enum += " SR_CMD_" + phrase.upper().replace(" ", "_") + ",\n"
phrase_id += 1
cmd_id += 1
out += "};"
enum += "};"
# print(text)
print(enum)
print(out)
return out
if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="English Speech Commands G2P")
parser.add_argument("text", type=str, default=None, help="input text")
args = parser.parse_args()
if args.text is not None:
english_g2p(args.text)