3.3.7
This commit is contained in:
@@ -0,0 +1,86 @@
|
||||
|
||||
#include "ESP_I2S.h"
|
||||
#include "ESP_SR.h"
|
||||
|
||||
#define I2S_PIN_BCK 17
|
||||
#define I2S_PIN_WS 47
|
||||
#define I2S_PIN_DIN 16
|
||||
|
||||
#define LIGHT_PIN 40
|
||||
#define FAN_PIN 41
|
||||
|
||||
/**
|
||||
* The input format:
|
||||
* M to represent the microphone channel
|
||||
* R to represent the playback reference channel
|
||||
* N to represent an unknown or unused channel
|
||||
*
|
||||
* For example, input_format="MMNR" indicates that the input data consists of four channels,
|
||||
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
|
||||
*/
|
||||
#define SR_INPUT_FORMAT "MM"
|
||||
|
||||
I2SClass i2s;
|
||||
|
||||
// Generated using the following command:
|
||||
// python3 tools/gen_sr_commands.py "Turn on the light,Switch on the light;Turn off the light,Switch off the light,Go dark;Start fan;Stop fan"
|
||||
enum {
|
||||
SR_CMD_TURN_ON_THE_LIGHT,
|
||||
SR_CMD_TURN_OFF_THE_LIGHT,
|
||||
SR_CMD_START_FAN,
|
||||
SR_CMD_STOP_FAN,
|
||||
};
|
||||
static const sr_cmd_t sr_commands[] = {
|
||||
{0, "Turn on the light", "TkN nN jc LiT"},
|
||||
{0, "Switch on the light", "SWgp nN jc LiT"},
|
||||
{1, "Turn off the light", "TkN eF jc LiT"},
|
||||
{1, "Switch off the light", "SWgp eF jc LiT"},
|
||||
{1, "Go dark", "Gb DnRK"},
|
||||
{2, "Start fan", "STnRT FaN"},
|
||||
{3, "Stop fan", "STnP FaN"},
|
||||
};
|
||||
|
||||
void onSrEvent(sr_event_t event, int command_id, int phrase_id) {
|
||||
switch (event) {
|
||||
case SR_EVENT_WAKEWORD: Serial.println("WakeWord Detected!"); break;
|
||||
case SR_EVENT_WAKEWORD_CHANNEL:
|
||||
Serial.printf("WakeWord Channel %d Verified!\n", command_id);
|
||||
ESP_SR.setMode(SR_MODE_COMMAND); // Switch to Command detection
|
||||
break;
|
||||
case SR_EVENT_TIMEOUT:
|
||||
Serial.println("Timeout Detected!");
|
||||
ESP_SR.setMode(SR_MODE_WAKEWORD); // Switch back to WakeWord detection
|
||||
break;
|
||||
case SR_EVENT_COMMAND:
|
||||
Serial.printf("Command %d Detected! %s\n", command_id, sr_commands[phrase_id].str);
|
||||
switch (command_id) {
|
||||
case SR_CMD_TURN_ON_THE_LIGHT: digitalWrite(LIGHT_PIN, HIGH); break;
|
||||
case SR_CMD_TURN_OFF_THE_LIGHT: digitalWrite(LIGHT_PIN, LOW); break;
|
||||
case SR_CMD_START_FAN: digitalWrite(FAN_PIN, HIGH); break;
|
||||
case SR_CMD_STOP_FAN: digitalWrite(FAN_PIN, LOW); break;
|
||||
default: Serial.println("Unknown Command!"); break;
|
||||
}
|
||||
ESP_SR.setMode(SR_MODE_COMMAND); // Allow for more commands to be given, before timeout
|
||||
// ESP_SR.setMode(SR_MODE_WAKEWORD); // Switch back to WakeWord detection
|
||||
break;
|
||||
default: Serial.println("Unknown Event!"); break;
|
||||
}
|
||||
}
|
||||
|
||||
void setup() {
|
||||
Serial.begin(115200);
|
||||
|
||||
pinMode(LIGHT_PIN, OUTPUT);
|
||||
digitalWrite(LIGHT_PIN, LOW);
|
||||
pinMode(FAN_PIN, OUTPUT);
|
||||
digitalWrite(FAN_PIN, LOW);
|
||||
|
||||
i2s.setPins(I2S_PIN_BCK, I2S_PIN_WS, -1, I2S_PIN_DIN);
|
||||
i2s.setTimeout(1000);
|
||||
i2s.begin(I2S_MODE_STD, 16000, I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_STEREO);
|
||||
|
||||
ESP_SR.onEvent(onSrEvent);
|
||||
ESP_SR.begin(i2s, sr_commands, sizeof(sr_commands) / sizeof(sr_cmd_t), SR_CHANNELS_STEREO, SR_MODE_WAKEWORD, SR_INPUT_FORMAT);
|
||||
}
|
||||
|
||||
void loop() {}
|
||||
@@ -0,0 +1,16 @@
|
||||
fqbn:
|
||||
esp32s3:
|
||||
- espressif:esp32:esp32s3:USBMode=default,PartitionScheme=esp_sr_16,FlashSize=16M,FlashMode=dio
|
||||
esp32p4:
|
||||
- espressif:esp32:esp32p4:USBMode=default,ChipVariant=postv3,PartitionScheme=esp_sr_16,FlashSize=16M,FlashMode=qio
|
||||
|
||||
requires:
|
||||
- CONFIG_SOC_I2S_SUPPORTED=y
|
||||
|
||||
targets:
|
||||
esp32: false
|
||||
esp32c3: false
|
||||
esp32c6: false
|
||||
esp32h2: false
|
||||
esp32s2: false
|
||||
esp32c5: false
|
||||
@@ -0,0 +1,40 @@
|
||||
#######################################
|
||||
# Syntax Coloring Map For ESP_SR
|
||||
#######################################
|
||||
|
||||
#######################################
|
||||
# Datatypes (KEYWORD1)
|
||||
#######################################
|
||||
|
||||
ESP_SR KEYWORD1
|
||||
ESP_SR_Class KEYWORD1
|
||||
sr_cmd_t KEYWORD1
|
||||
sr_event_t KEYWORD1
|
||||
sr_mode_t KEYWORD1
|
||||
sr_channels_t KEYWORD1
|
||||
sr_cb KEYWORD1
|
||||
|
||||
#######################################
|
||||
# Methods and Functions (KEYWORD2)
|
||||
#######################################
|
||||
|
||||
onEvent KEYWORD2
|
||||
setMode KEYWORD2
|
||||
pause KEYWORD2
|
||||
resume KEYWORD2
|
||||
|
||||
#######################################
|
||||
# Constants (LITERAL1)
|
||||
#######################################
|
||||
|
||||
SR_EVENT_WAKEWORD LITERAL1
|
||||
SR_EVENT_WAKEWORD_CHANNEL LITERAL1
|
||||
SR_EVENT_COMMAND LITERAL1
|
||||
SR_EVENT_TIMEOUT LITERAL1
|
||||
SR_MODE_OFF LITERAL1
|
||||
SR_MODE_WAKEWORD LITERAL1
|
||||
SR_MODE_COMMAND LITERAL1
|
||||
SR_MODE_MAX LITERAL1
|
||||
SR_CHANNELS_MONO LITERAL1
|
||||
SR_CHANNELS_STEREO LITERAL1
|
||||
SR_CHANNELS_MAX LITERAL1
|
||||
@@ -0,0 +1,9 @@
|
||||
name=ESP_SR
|
||||
version=3.3.7
|
||||
author=me-no-dev
|
||||
maintainer=me-no-dev
|
||||
sentence=Library for ESP Sound Recognition
|
||||
paragraph=Supports ESP32 Arduino platforms.
|
||||
category=Sound
|
||||
url=https://github.com/espressif/arduino-esp32/
|
||||
architectures=esp32
|
||||
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Unlicense OR CC0-1.0
|
||||
*/
|
||||
#include "sdkconfig.h"
|
||||
#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4) && (CONFIG_MODEL_IN_FLASH || CONFIG_MODEL_IN_SDCARD)
|
||||
#include "ESP_SR.h"
|
||||
|
||||
static esp_err_t on_sr_fill(void *arg, void *out, size_t len, size_t *bytes_read, uint32_t timeout_ms) {
|
||||
return ((ESP_SR_Class *)arg)->_fill(out, len, bytes_read, timeout_ms);
|
||||
}
|
||||
|
||||
static void on_sr_event(void *arg, sr_event_t event, int command_id, int phrase_id) {
|
||||
((ESP_SR_Class *)arg)->_sr_event(event, command_id, phrase_id);
|
||||
}
|
||||
|
||||
ESP_SR_Class::ESP_SR_Class() : cb(NULL), i2s(NULL) {}
|
||||
|
||||
ESP_SR_Class::~ESP_SR_Class() {
|
||||
end();
|
||||
}
|
||||
|
||||
void ESP_SR_Class::onEvent(sr_cb event_cb) {
|
||||
cb = event_cb;
|
||||
}
|
||||
|
||||
bool ESP_SR_Class::begin(I2SClass &_i2s, const sr_cmd_t *sr_commands, size_t sr_commands_len, sr_channels_t rx_chan, sr_mode_t mode, const char *input_format) {
|
||||
i2s = &_i2s;
|
||||
esp_err_t err = sr_start(on_sr_fill, this, rx_chan, mode, input_format, sr_commands, sr_commands_len, on_sr_event, this);
|
||||
return (err == ESP_OK);
|
||||
}
|
||||
|
||||
bool ESP_SR_Class::end(void) {
|
||||
return sr_stop() == ESP_OK;
|
||||
}
|
||||
|
||||
bool ESP_SR_Class::setMode(sr_mode_t mode) {
|
||||
return sr_set_mode(mode) == ESP_OK;
|
||||
}
|
||||
|
||||
bool ESP_SR_Class::pause(void) {
|
||||
return sr_pause() == ESP_OK;
|
||||
}
|
||||
|
||||
bool ESP_SR_Class::resume(void) {
|
||||
return sr_resume() == ESP_OK;
|
||||
}
|
||||
|
||||
void ESP_SR_Class::_sr_event(sr_event_t event, int command_id, int phrase_id) {
|
||||
if (cb) {
|
||||
cb(event, command_id, phrase_id);
|
||||
}
|
||||
}
|
||||
|
||||
esp_err_t ESP_SR_Class::_fill(void *out, size_t len, size_t *bytes_read, uint32_t timeout_ms) {
|
||||
if (i2s == NULL) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
i2s->setTimeout(timeout_ms);
|
||||
*bytes_read = i2s->readBytes((char *)out, len);
|
||||
return (esp_err_t)i2s->lastError();
|
||||
}
|
||||
|
||||
ESP_SR_Class ESP_SR;
|
||||
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Unlicense OR CC0-1.0
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "sdkconfig.h"
|
||||
#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4) && (CONFIG_MODEL_IN_FLASH || CONFIG_MODEL_IN_SDCARD)
|
||||
|
||||
#include "ESP_I2S.h"
|
||||
#include "esp32-hal-sr.h"
|
||||
|
||||
typedef void (*sr_cb)(sr_event_t event, int command_id, int phrase_id);
|
||||
|
||||
class ESP_SR_Class {
|
||||
private:
|
||||
sr_cb cb;
|
||||
I2SClass *i2s;
|
||||
|
||||
public:
|
||||
ESP_SR_Class();
|
||||
~ESP_SR_Class();
|
||||
|
||||
void onEvent(sr_cb cb);
|
||||
/**
|
||||
* The input format:
|
||||
* M to represent the microphone channel
|
||||
* R to represent the playback reference channel
|
||||
* N to represent an unknown or unused channel
|
||||
*
|
||||
* For example, input_format="MMNR" indicates that the input data consists of four channels,
|
||||
* which are the microphone channel, the microphone channel, an unused channel, and the playback channel
|
||||
*/
|
||||
bool begin(
|
||||
I2SClass &i2s, const sr_cmd_t *sr_commands, size_t sr_commands_len, sr_channels_t rx_chan = SR_CHANNELS_STEREO, sr_mode_t mode = SR_MODE_WAKEWORD,
|
||||
const char *input_format = "MN"
|
||||
);
|
||||
bool end(void);
|
||||
bool setMode(sr_mode_t mode);
|
||||
bool pause(void);
|
||||
bool resume(void);
|
||||
|
||||
void _sr_event(sr_event_t event, int command_id, int phrase_id);
|
||||
esp_err_t _fill(void *out, size_t len, size_t *bytes_read, uint32_t timeout_ms);
|
||||
};
|
||||
|
||||
#if !defined(NO_GLOBAL_INSTANCES) && !defined(NO_GLOBAL_ESP_SR)
|
||||
extern ESP_SR_Class ESP_SR;
|
||||
#endif
|
||||
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
@@ -0,0 +1,440 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Unlicense OR CC0-1.0
|
||||
*/
|
||||
#include "sdkconfig.h"
|
||||
#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4) && (CONFIG_MODEL_IN_FLASH || CONFIG_MODEL_IN_SDCARD)
|
||||
|
||||
#if !defined(ARDUINO_PARTITION_esp_sr_32) && !defined(ARDUINO_PARTITION_esp_sr_16) && !defined(ARDUINO_PARTITION_esp_sr_8)
|
||||
#warning Compatible partition must be selected for ESP_SR to work
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/queue.h>
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/queue.h"
|
||||
#include "freertos/event_groups.h"
|
||||
#include "freertos/task.h"
|
||||
#include "esp_task_wdt.h"
|
||||
#include "esp_check.h"
|
||||
#include "esp_err.h"
|
||||
#include "esp_log.h"
|
||||
#include "esp_mn_speech_commands.h"
|
||||
#include "esp_process_sdkconfig.h"
|
||||
#include "esp_afe_sr_models.h"
|
||||
#include "esp_mn_models.h"
|
||||
#include "esp_wn_iface.h"
|
||||
#include "esp_wn_models.h"
|
||||
#include "esp_afe_sr_iface.h"
|
||||
#include "esp_mn_iface.h"
|
||||
#include "model_path.h"
|
||||
|
||||
#include "driver/i2s_common.h"
|
||||
#include "esp32-hal-sr.h"
|
||||
#include "esp32-hal-log.h"
|
||||
|
||||
#undef ESP_GOTO_ON_FALSE
|
||||
#define ESP_GOTO_ON_FALSE(a, err_code, goto_tag, format, ...) \
|
||||
do { \
|
||||
if (unlikely(!(a))) { \
|
||||
log_e(format, ##__VA_ARGS__); \
|
||||
ret = err_code; \
|
||||
goto goto_tag; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#undef ESP_RETURN_ON_FALSE
|
||||
#define ESP_RETURN_ON_FALSE(a, err_code, format, ...) \
|
||||
do { \
|
||||
if (unlikely(!(a))) { \
|
||||
log_e(format, ##__VA_ARGS__); \
|
||||
return err_code; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define NEED_DELETE BIT0
|
||||
#define FEED_DELETED BIT1
|
||||
#define DETECT_DELETED BIT2
|
||||
#define PAUSE_FEED BIT3
|
||||
#define PAUSE_DETECT BIT4
|
||||
#define RESUME_FEED BIT5
|
||||
#define RESUME_DETECT BIT6
|
||||
|
||||
typedef struct {
|
||||
wakenet_state_t wakenet_mode;
|
||||
esp_mn_state_t state;
|
||||
int command_id;
|
||||
int phrase_id;
|
||||
} sr_result_t;
|
||||
|
||||
typedef struct {
|
||||
model_iface_data_t *model_data;
|
||||
const esp_mn_iface_t *multinet;
|
||||
const esp_afe_sr_iface_t *afe_handle;
|
||||
esp_afe_sr_data_t *afe_data;
|
||||
int16_t *afe_in_buffer;
|
||||
sr_mode_t mode;
|
||||
uint8_t i2s_rx_chan_num;
|
||||
sr_event_cb user_cb;
|
||||
void *user_cb_arg;
|
||||
sr_fill_cb fill_cb;
|
||||
void *fill_cb_arg;
|
||||
TaskHandle_t feed_task;
|
||||
TaskHandle_t detect_task;
|
||||
TaskHandle_t handle_task;
|
||||
QueueHandle_t result_que;
|
||||
EventGroupHandle_t event_group;
|
||||
} sr_data_t;
|
||||
|
||||
static int SR_CHANNEL_NUM = 3;
|
||||
|
||||
static srmodel_list_t *models = NULL;
|
||||
static sr_data_t *g_sr_data = NULL;
|
||||
|
||||
esp_err_t sr_set_mode(sr_mode_t mode);
|
||||
|
||||
void sr_handler_task(void *pvParam) {
|
||||
while (true) {
|
||||
sr_result_t result;
|
||||
if (xQueueReceive(g_sr_data->result_que, &result, portMAX_DELAY) != pdTRUE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (WAKENET_DETECTED == result.wakenet_mode) {
|
||||
if (g_sr_data->user_cb) {
|
||||
g_sr_data->user_cb(g_sr_data->user_cb_arg, SR_EVENT_WAKEWORD, -1, -1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (WAKENET_CHANNEL_VERIFIED == result.wakenet_mode) {
|
||||
if (g_sr_data->user_cb) {
|
||||
g_sr_data->user_cb(g_sr_data->user_cb_arg, SR_EVENT_WAKEWORD_CHANNEL, result.command_id, -1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ESP_MN_STATE_DETECTED == result.state) {
|
||||
if (g_sr_data->user_cb) {
|
||||
g_sr_data->user_cb(g_sr_data->user_cb_arg, SR_EVENT_COMMAND, result.command_id, result.phrase_id);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ESP_MN_STATE_TIMEOUT == result.state) {
|
||||
if (g_sr_data->user_cb) {
|
||||
g_sr_data->user_cb(g_sr_data->user_cb_arg, SR_EVENT_TIMEOUT, -1, -1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
|
||||
static void audio_feed_task(void *arg) {
|
||||
size_t bytes_read = 0;
|
||||
int audio_chunksize = g_sr_data->afe_handle->get_feed_chunksize(g_sr_data->afe_data);
|
||||
log_i("audio_chunksize=%d, feed_channel=%d", audio_chunksize, SR_CHANNEL_NUM);
|
||||
|
||||
/* Allocate audio buffer and check for result */
|
||||
int16_t *audio_buffer = heap_caps_malloc(audio_chunksize * sizeof(int16_t) * SR_CHANNEL_NUM, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
|
||||
if (NULL == audio_buffer) {
|
||||
esp_system_abort("No mem for audio buffer");
|
||||
}
|
||||
g_sr_data->afe_in_buffer = audio_buffer;
|
||||
|
||||
while (true) {
|
||||
EventBits_t bits = xEventGroupGetBits(g_sr_data->event_group);
|
||||
if (NEED_DELETE & bits) {
|
||||
xEventGroupSetBits(g_sr_data->event_group, FEED_DELETED);
|
||||
break;
|
||||
}
|
||||
if (PAUSE_FEED & bits) {
|
||||
xEventGroupWaitBits(g_sr_data->event_group, PAUSE_FEED | RESUME_FEED, 1, 1, portMAX_DELAY);
|
||||
}
|
||||
|
||||
/* Read audio data from I2S bus */
|
||||
//ToDo: handle error
|
||||
if (g_sr_data->fill_cb == NULL) {
|
||||
vTaskDelay(100);
|
||||
continue;
|
||||
}
|
||||
esp_err_t err = g_sr_data->fill_cb(
|
||||
g_sr_data->fill_cb_arg, (char *)audio_buffer, audio_chunksize * g_sr_data->i2s_rx_chan_num * sizeof(int16_t), &bytes_read, portMAX_DELAY
|
||||
);
|
||||
if (err != ESP_OK) {
|
||||
vTaskDelay(100);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Channel Adjust */
|
||||
if (g_sr_data->i2s_rx_chan_num == 1) {
|
||||
for (int i = audio_chunksize - 1; i >= 0; i--) {
|
||||
audio_buffer[i * SR_CHANNEL_NUM + 2] = 0;
|
||||
audio_buffer[i * SR_CHANNEL_NUM + 1] = 0;
|
||||
audio_buffer[i * SR_CHANNEL_NUM + 0] = audio_buffer[i];
|
||||
}
|
||||
} else if (g_sr_data->i2s_rx_chan_num == 2) {
|
||||
for (int i = audio_chunksize - 1; i >= 0; i--) {
|
||||
audio_buffer[i * SR_CHANNEL_NUM + 2] = 0;
|
||||
audio_buffer[i * SR_CHANNEL_NUM + 1] = audio_buffer[i * 2 + 1];
|
||||
audio_buffer[i * SR_CHANNEL_NUM + 0] = audio_buffer[i * 2 + 0];
|
||||
}
|
||||
} else {
|
||||
vTaskDelay(100);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Feed samples of an audio stream to the AFE_SR */
|
||||
g_sr_data->afe_handle->feed(g_sr_data->afe_data, audio_buffer);
|
||||
vTaskDelay(2);
|
||||
}
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
|
||||
static void audio_detect_task(void *arg) {
|
||||
int afe_chunksize = g_sr_data->afe_handle->get_fetch_chunksize(g_sr_data->afe_data);
|
||||
int mu_chunksize = g_sr_data->multinet->get_samp_chunksize(g_sr_data->model_data);
|
||||
assert(mu_chunksize == afe_chunksize);
|
||||
log_i("------------detect start------------");
|
||||
|
||||
while (true) {
|
||||
EventBits_t bits = xEventGroupGetBits(g_sr_data->event_group);
|
||||
if (NEED_DELETE & bits) {
|
||||
xEventGroupSetBits(g_sr_data->event_group, DETECT_DELETED);
|
||||
break;
|
||||
}
|
||||
if (PAUSE_DETECT & bits) {
|
||||
xEventGroupWaitBits(g_sr_data->event_group, PAUSE_DETECT | RESUME_DETECT, 1, 1, portMAX_DELAY);
|
||||
}
|
||||
|
||||
afe_fetch_result_t *res = g_sr_data->afe_handle->fetch(g_sr_data->afe_data);
|
||||
if (!res || res->ret_value == ESP_FAIL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (g_sr_data->mode == SR_MODE_WAKEWORD) {
|
||||
if (res->wakeup_state == WAKENET_DETECTED) {
|
||||
log_d("wakeword detected");
|
||||
sr_result_t result = {
|
||||
.wakenet_mode = WAKENET_DETECTED,
|
||||
.state = ESP_MN_STATE_DETECTING,
|
||||
.command_id = 0,
|
||||
.phrase_id = 0,
|
||||
};
|
||||
xQueueSend(g_sr_data->result_que, &result, 0);
|
||||
} else if (res->wakeup_state == WAKENET_CHANNEL_VERIFIED) {
|
||||
sr_set_mode(SR_MODE_OFF);
|
||||
log_d("AFE_FETCH_CHANNEL_VERIFIED, channel index: %d", res->trigger_channel_id);
|
||||
sr_result_t result = {
|
||||
.wakenet_mode = WAKENET_CHANNEL_VERIFIED,
|
||||
.state = ESP_MN_STATE_DETECTING,
|
||||
.command_id = res->trigger_channel_id,
|
||||
.phrase_id = 0,
|
||||
};
|
||||
xQueueSend(g_sr_data->result_que, &result, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (g_sr_data->mode == SR_MODE_COMMAND) {
|
||||
|
||||
esp_mn_state_t mn_state = ESP_MN_STATE_DETECTING;
|
||||
mn_state = g_sr_data->multinet->detect(g_sr_data->model_data, res->data);
|
||||
|
||||
if (ESP_MN_STATE_DETECTING == mn_state) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ESP_MN_STATE_TIMEOUT == mn_state) {
|
||||
sr_set_mode(SR_MODE_OFF);
|
||||
log_d("Time out");
|
||||
sr_result_t result = {
|
||||
.wakenet_mode = WAKENET_NO_DETECT,
|
||||
.state = mn_state,
|
||||
.command_id = 0,
|
||||
.phrase_id = 0,
|
||||
};
|
||||
xQueueSend(g_sr_data->result_que, &result, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ESP_MN_STATE_DETECTED == mn_state) {
|
||||
sr_set_mode(SR_MODE_OFF);
|
||||
esp_mn_results_t *mn_result = g_sr_data->multinet->get_results(g_sr_data->model_data);
|
||||
for (int i = 0; i < mn_result->num; i++) {
|
||||
log_d("TOP %d, command_id: %d, phrase_id: %d, prob: %f", i + 1, mn_result->command_id[i], mn_result->phrase_id[i], mn_result->prob[i]);
|
||||
}
|
||||
|
||||
int sr_command_id = mn_result->command_id[0];
|
||||
int sr_phrase_id = mn_result->phrase_id[0];
|
||||
log_d("Detected command : %d, phrase: %d", sr_command_id, sr_phrase_id);
|
||||
sr_result_t result = {
|
||||
.wakenet_mode = WAKENET_NO_DETECT,
|
||||
.state = mn_state,
|
||||
.command_id = sr_command_id,
|
||||
.phrase_id = sr_phrase_id,
|
||||
};
|
||||
xQueueSend(g_sr_data->result_que, &result, 0);
|
||||
continue;
|
||||
}
|
||||
log_e("Exception unhandled");
|
||||
}
|
||||
}
|
||||
vTaskDelete(NULL);
|
||||
}
|
||||
|
||||
esp_err_t sr_set_mode(sr_mode_t mode) {
|
||||
ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_INVALID_STATE, "SR is not running");
|
||||
switch (mode) {
|
||||
case SR_MODE_OFF:
|
||||
if (g_sr_data->mode == SR_MODE_WAKEWORD) {
|
||||
g_sr_data->afe_handle->disable_wakenet(g_sr_data->afe_data);
|
||||
}
|
||||
break;
|
||||
case SR_MODE_WAKEWORD:
|
||||
if (g_sr_data->mode != SR_MODE_WAKEWORD) {
|
||||
g_sr_data->afe_handle->enable_wakenet(g_sr_data->afe_data);
|
||||
}
|
||||
break;
|
||||
case SR_MODE_COMMAND:
|
||||
if (g_sr_data->mode == SR_MODE_WAKEWORD) {
|
||||
g_sr_data->afe_handle->disable_wakenet(g_sr_data->afe_data);
|
||||
}
|
||||
break;
|
||||
default: return ESP_FAIL;
|
||||
}
|
||||
g_sr_data->mode = mode;
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t sr_start(
|
||||
sr_fill_cb fill_cb, void *fill_cb_arg, sr_channels_t rx_chan, sr_mode_t mode, const char *input_format, const sr_cmd_t sr_commands[], size_t cmd_number,
|
||||
sr_event_cb cb, void *cb_arg
|
||||
) {
|
||||
esp_err_t ret = ESP_OK;
|
||||
ESP_RETURN_ON_FALSE(NULL == g_sr_data, ESP_ERR_INVALID_STATE, "SR already running");
|
||||
|
||||
g_sr_data = heap_caps_calloc(1, sizeof(sr_data_t), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
|
||||
ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_NO_MEM, "Failed create sr data");
|
||||
|
||||
g_sr_data->result_que = xQueueCreate(3, sizeof(sr_result_t));
|
||||
ESP_GOTO_ON_FALSE(NULL != g_sr_data->result_que, ESP_ERR_NO_MEM, err, "Failed create result queue");
|
||||
|
||||
g_sr_data->event_group = xEventGroupCreate();
|
||||
ESP_GOTO_ON_FALSE(NULL != g_sr_data->event_group, ESP_ERR_NO_MEM, err, "Failed create event_group");
|
||||
|
||||
BaseType_t ret_val;
|
||||
g_sr_data->user_cb = cb;
|
||||
g_sr_data->user_cb_arg = cb_arg;
|
||||
g_sr_data->fill_cb = fill_cb;
|
||||
g_sr_data->fill_cb_arg = fill_cb_arg;
|
||||
g_sr_data->i2s_rx_chan_num = rx_chan + 1;
|
||||
g_sr_data->mode = mode;
|
||||
|
||||
// Init Model
|
||||
log_d("init model");
|
||||
models = esp_srmodel_init("model");
|
||||
|
||||
// Load WakeWord Detection
|
||||
afe_config_t *afe_config = afe_config_init(input_format, models, AFE_TYPE_SR, AFE_MODE_LOW_COST);
|
||||
g_sr_data->afe_handle = esp_afe_handle_from_config(afe_config);
|
||||
log_d("load wakenet '%s'", afe_config->wakenet_model_name);
|
||||
g_sr_data->afe_data = g_sr_data->afe_handle->create_from_config(afe_config);
|
||||
afe_config_free(afe_config);
|
||||
|
||||
// Load Custom Command Detection
|
||||
char *mn_name = esp_srmodel_filter(models, ESP_MN_PREFIX, ESP_MN_ENGLISH);
|
||||
log_d("load multinet '%s'", mn_name);
|
||||
g_sr_data->multinet = esp_mn_handle_from_name(mn_name);
|
||||
log_d("load model_data '%s'", mn_name);
|
||||
g_sr_data->model_data = g_sr_data->multinet->create(mn_name, 5760);
|
||||
|
||||
// Add commands
|
||||
esp_mn_commands_alloc((esp_mn_iface_t *)g_sr_data->multinet, (model_iface_data_t *)g_sr_data->model_data);
|
||||
log_i("add %d commands", cmd_number);
|
||||
for (size_t i = 0; i < cmd_number; i++) {
|
||||
esp_mn_commands_add(sr_commands[i].command_id, (char *)(sr_commands[i].phoneme));
|
||||
log_i(" cmd[%d] phrase[%d]:'%s'", sr_commands[i].command_id, i, sr_commands[i].str);
|
||||
}
|
||||
|
||||
// Load commands
|
||||
esp_mn_error_t *err_id = esp_mn_commands_update();
|
||||
if (err_id) {
|
||||
for (int i = 0; i < err_id->num; i++) {
|
||||
log_e("err cmd id:%d", err_id->phrases[i]->command_id);
|
||||
}
|
||||
}
|
||||
|
||||
//Start tasks
|
||||
log_d("start tasks");
|
||||
ret_val = xTaskCreatePinnedToCore(&audio_feed_task, "SR Feed Task", 4 * 1024, NULL, 5, &g_sr_data->feed_task, 0);
|
||||
ESP_GOTO_ON_FALSE(pdPASS == ret_val, ESP_FAIL, err, "Failed create audio feed task");
|
||||
vTaskDelay(10);
|
||||
ret_val = xTaskCreatePinnedToCore(&audio_detect_task, "SR Detect Task", 8 * 1024, NULL, 5, &g_sr_data->detect_task, 1);
|
||||
ESP_GOTO_ON_FALSE(pdPASS == ret_val, ESP_FAIL, err, "Failed create audio detect task");
|
||||
ret_val = xTaskCreatePinnedToCore(&sr_handler_task, "SR Handler Task", 6 * 1024, NULL, configMAX_PRIORITIES - 1, &g_sr_data->handle_task, 1);
|
||||
//ret_val = xTaskCreatePinnedToCore(&sr_handler_task, "SR Handler Task", 6 * 1024, NULL, configMAX_PRIORITIES - 1, &g_sr_data->handle_task, 0);
|
||||
ESP_GOTO_ON_FALSE(pdPASS == ret_val, ESP_FAIL, err, "Failed create audio handler task");
|
||||
|
||||
return ESP_OK;
|
||||
err:
|
||||
sr_stop();
|
||||
return ret;
|
||||
}
|
||||
|
||||
esp_err_t sr_stop(void) {
|
||||
ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_INVALID_STATE, "SR is not running");
|
||||
|
||||
/**
|
||||
* Waiting for all task stopped
|
||||
* TODO: A task creation failure cannot be handled correctly now
|
||||
* */
|
||||
vTaskDelete(g_sr_data->handle_task);
|
||||
xEventGroupSetBits(g_sr_data->event_group, NEED_DELETE);
|
||||
xEventGroupWaitBits(g_sr_data->event_group, NEED_DELETE | FEED_DELETED | DETECT_DELETED, 1, 1, portMAX_DELAY);
|
||||
|
||||
if (g_sr_data->result_que) {
|
||||
vQueueDelete(g_sr_data->result_que);
|
||||
g_sr_data->result_que = NULL;
|
||||
}
|
||||
|
||||
if (g_sr_data->event_group) {
|
||||
vEventGroupDelete(g_sr_data->event_group);
|
||||
g_sr_data->event_group = NULL;
|
||||
}
|
||||
|
||||
if (g_sr_data->model_data) {
|
||||
g_sr_data->multinet->destroy(g_sr_data->model_data);
|
||||
}
|
||||
|
||||
if (g_sr_data->afe_data) {
|
||||
g_sr_data->afe_handle->destroy(g_sr_data->afe_data);
|
||||
}
|
||||
|
||||
if (g_sr_data->afe_in_buffer) {
|
||||
heap_caps_free(g_sr_data->afe_in_buffer);
|
||||
}
|
||||
|
||||
heap_caps_free(g_sr_data);
|
||||
g_sr_data = NULL;
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t sr_pause(void) {
|
||||
ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_INVALID_STATE, "SR is not running");
|
||||
xEventGroupSetBits(g_sr_data->event_group, PAUSE_FEED | PAUSE_DETECT);
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t sr_resume(void) {
|
||||
ESP_RETURN_ON_FALSE(NULL != g_sr_data, ESP_ERR_INVALID_STATE, "SR is not running");
|
||||
xEventGroupSetBits(g_sr_data->event_group, RESUME_FEED | RESUME_DETECT);
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Unlicense OR CC0-1.0
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "sdkconfig.h"
|
||||
#if (CONFIG_IDF_TARGET_ESP32S3 || CONFIG_IDF_TARGET_ESP32P4) && (CONFIG_MODEL_IN_FLASH || CONFIG_MODEL_IN_SDCARD)
|
||||
|
||||
#include "driver/i2s_types.h"
|
||||
#include "esp_err.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SR_CMD_STR_LEN_MAX 64
|
||||
#define SR_CMD_PHONEME_LEN_MAX 64
|
||||
|
||||
typedef struct sr_cmd_t {
|
||||
int command_id;
|
||||
char str[SR_CMD_STR_LEN_MAX];
|
||||
char phoneme[SR_CMD_PHONEME_LEN_MAX];
|
||||
} sr_cmd_t;
|
||||
|
||||
typedef enum {
|
||||
SR_EVENT_WAKEWORD, //WakeWord Detected
|
||||
SR_EVENT_WAKEWORD_CHANNEL, //WakeWord Channel Verified
|
||||
SR_EVENT_COMMAND, //Command Detected
|
||||
SR_EVENT_TIMEOUT, //Command Timeout
|
||||
SR_EVENT_MAX
|
||||
} sr_event_t;
|
||||
|
||||
typedef enum {
|
||||
SR_MODE_OFF, //Detection Off
|
||||
SR_MODE_WAKEWORD, //WakeWord Detection
|
||||
SR_MODE_COMMAND, //Command Detection
|
||||
SR_MODE_MAX
|
||||
} sr_mode_t;
|
||||
|
||||
typedef enum {
|
||||
SR_CHANNELS_MONO,
|
||||
SR_CHANNELS_STEREO,
|
||||
SR_CHANNELS_MAX
|
||||
} sr_channels_t;
|
||||
|
||||
typedef void (*sr_event_cb)(void *arg, sr_event_t event, int command_id, int phrase_id);
|
||||
typedef esp_err_t (*sr_fill_cb)(void *arg, void *out, size_t len, size_t *bytes_read, uint32_t timeout_ms);
|
||||
|
||||
esp_err_t sr_start(
|
||||
sr_fill_cb fill_cb, void *fill_cb_arg, sr_channels_t rx_chan, sr_mode_t mode, const char *input_format, const sr_cmd_t *sr_commands, size_t cmd_number,
|
||||
sr_event_cb cb, void *cb_arg
|
||||
);
|
||||
esp_err_t sr_stop(void);
|
||||
esp_err_t sr_pause(void);
|
||||
esp_err_t sr_resume(void);
|
||||
esp_err_t sr_set_mode(sr_mode_t mode);
|
||||
|
||||
// static const sr_cmd_t sr_commands[] = {
|
||||
// {0, "Turn On the Light", "TkN nN jc LiT"},
|
||||
// {0, "Switch On the Light", "SWgp nN jc LiT"},
|
||||
// {1, "Switch Off the Light", "SWgp eF jc LiT"},
|
||||
// {1, "Turn Off the Light", "TkN eF jc LiT"},
|
||||
// {2, "Turn Red", "TkN RfD"},
|
||||
// {3, "Turn Green", "TkN GRmN"},
|
||||
// {4, "Turn Blue", "TkN BLo"},
|
||||
// {5, "Customize Color", "KcSTcMiZ KcLk"},
|
||||
// {6, "Sing a song", "Sgl c Sel"},
|
||||
// {7, "Play Music", "PLd MYoZgK"},
|
||||
// {8, "Next Song", "NfKST Sel"},
|
||||
// {9, "Pause Playing", "PeZ PLdgl"},
|
||||
// };
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // CONFIG_IDF_TARGET_ESP32S3
|
||||
@@ -0,0 +1,146 @@
|
||||
# pip3 install g2p_en
|
||||
from g2p_en import G2p
|
||||
import argparse
|
||||
|
||||
# python3 gen_sr_commands.py "Turn on the light,Switch on the light;Turn off the light,Switch off the light,Go dark;\
|
||||
# Start fan;Stop fan;Volume down,Turn down;Mute sound;Next song;Pause playback"
|
||||
# enum {
|
||||
# SR_CMD_TURN_ON_THE_LIGHT,
|
||||
# SR_CMD_TURN_OFF_THE_LIGHT,
|
||||
# SR_CMD_START_FAN,
|
||||
# SR_CMD_STOP_FAN,
|
||||
# SR_CMD_VOLUME_DOWN,
|
||||
# SR_CMD_MUTE_SOUND,
|
||||
# SR_CMD_NEXT_SONG,
|
||||
# SR_CMD_PAUSE_PLAYBACK,
|
||||
# };
|
||||
# static const sr_cmd_t sr_commands[] = {
|
||||
# { 0, "Turn on the light", "TkN nN jc LiT"},
|
||||
# { 0, "Switch on the light", "SWgp nN jc LiT"},
|
||||
# { 1, "Turn off the light", "TkN eF jc LiT"},
|
||||
# { 1, "Switch off the light", "SWgp eF jc LiT"},
|
||||
# { 1, "Go dark", "Gb DnRK"},
|
||||
# { 2, "Start fan", "STnRT FaN"},
|
||||
# { 3, "Stop fan", "STnP FaN"},
|
||||
# { 4, "Volume down", "VnLYoM DtN"},
|
||||
# { 4, "Turn down", "TkN DtN"},
|
||||
# { 5, "Mute sound", "MYoT StND"},
|
||||
# { 6, "Next song", "NfKST Sel"},
|
||||
# { 7, "Pause playback", "PeZ PLdBaK"},
|
||||
# };
|
||||
|
||||
|
||||
def english_g2p(text):
|
||||
g2p = G2p()
|
||||
out = "static const sr_cmd_t sr_commands[] = {\n"
|
||||
enum = "enum {\n"
|
||||
alphabet = {
|
||||
"AE1": "a",
|
||||
"N": "N",
|
||||
" ": " ",
|
||||
"OW1": "b",
|
||||
"V": "V",
|
||||
"AH0": "c",
|
||||
"L": "L",
|
||||
"F": "F",
|
||||
"EY1": "d",
|
||||
"S": "S",
|
||||
"B": "B",
|
||||
"R": "R",
|
||||
"AO1": "e",
|
||||
"D": "D",
|
||||
"AH1": "c",
|
||||
"EH1": "f",
|
||||
"OW0": "b",
|
||||
"IH0": "g",
|
||||
"G": "G",
|
||||
"HH": "h",
|
||||
"K": "K",
|
||||
"IH1": "g",
|
||||
"W": "W",
|
||||
"AY1": "i",
|
||||
"T": "T",
|
||||
"M": "M",
|
||||
"Z": "Z",
|
||||
"DH": "j",
|
||||
"ER0": "k",
|
||||
"P": "P",
|
||||
"NG": "l",
|
||||
"IY1": "m",
|
||||
"AA1": "n",
|
||||
"Y": "Y",
|
||||
"UW1": "o",
|
||||
"IY0": "m",
|
||||
"EH2": "f",
|
||||
"CH": "p",
|
||||
"AE0": "a",
|
||||
"JH": "q",
|
||||
"ZH": "r",
|
||||
"AA2": "n",
|
||||
"SH": "s",
|
||||
"AW1": "t",
|
||||
"OY1": "u",
|
||||
"AW2": "t",
|
||||
"IH2": "g",
|
||||
"AE2": "a",
|
||||
"EY2": "d",
|
||||
"ER1": "k",
|
||||
"TH": "v",
|
||||
"UH1": "w",
|
||||
"UW2": "o",
|
||||
"OW2": "b",
|
||||
"AY2": "i",
|
||||
"UW0": "o",
|
||||
"AH2": "c",
|
||||
"EH0": "f",
|
||||
"AW0": "t",
|
||||
"AO2": "e",
|
||||
"AO0": "e",
|
||||
"UH0": "w",
|
||||
"UH2": "w",
|
||||
"AA0": "n",
|
||||
"AY0": "i",
|
||||
"IY2": "m",
|
||||
"EY0": "d",
|
||||
"ER2": "k",
|
||||
"OY2": "u",
|
||||
"OY0": "u",
|
||||
}
|
||||
|
||||
cmd_id = 0
|
||||
phrase_id = 0
|
||||
text_list = text.split(";")
|
||||
for item in text_list:
|
||||
item = item.split(",")
|
||||
phrase_id = 0
|
||||
for phrase in item:
|
||||
labels = g2p(phrase)
|
||||
phoneme = ""
|
||||
for char in labels:
|
||||
if char not in alphabet:
|
||||
print("skip %s, not found in alphabet")
|
||||
continue
|
||||
else:
|
||||
phoneme += alphabet[char]
|
||||
out += " { " + str(cmd_id) + ', "' + phrase + '", "' + phoneme + '"},\n'
|
||||
if phrase_id == 0:
|
||||
enum += " SR_CMD_" + phrase.upper().replace(" ", "_") + ",\n"
|
||||
phrase_id += 1
|
||||
cmd_id += 1
|
||||
out += "};"
|
||||
enum += "};"
|
||||
# print(text)
|
||||
print(enum)
|
||||
print(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
parser = argparse.ArgumentParser(prog="English Speech Commands G2P")
|
||||
parser.add_argument("text", type=str, default=None, help="input text")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.text is not None:
|
||||
english_g2p(args.text)
|
||||
Reference in New Issue
Block a user