From 6c3710df4dc14751b60ba8f8ed4780e0f6288ec8 Mon Sep 17 00:00:00 2001 From: Matthew Grooms Date: Thu, 16 Jun 2016 15:34:37 -0500 Subject: [PATCH] FS-9264: Introduce two new api calls named detect_audio and detect_audio_silence. The existing wait_for_silence call never actually waits for silence until it first detects non-silence. There is also no way to set an independent timeout for detecting both the non-silence and then silence. This causes problems when wait_for_silence is called on an already quiet channel. Splitting the function up into two separate calls with separate timeouts offers more flexibility. --- src/include/switch_ivr.h | 6 + .../applications/mod_dptools/mod_dptools.c | 60 +++++ src/switch_ivr_play_say.c | 253 ++++++++++++++++++ 3 files changed, 319 insertions(+) diff --git a/src/include/switch_ivr.h b/src/include/switch_ivr.h index 7d6d6ac1db..06fa8c137a 100644 --- a/src/include/switch_ivr.h +++ b/src/include/switch_ivr.h @@ -414,6 +414,12 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_tone_detect_session(switch_core_sessi SWITCH_DECLARE(switch_status_t) switch_ivr_play_file(switch_core_session_t *session, switch_file_handle_t *fh, const char *file, switch_input_args_t *args); +SWITCH_DECLARE(switch_status_t) switch_ivr_detect_audio(switch_core_session_t *session, uint32_t thresh, uint32_t audio_hits, + uint32_t timeout_ms, const char *file); + +SWITCH_DECLARE(switch_status_t) switch_ivr_detect_silence(switch_core_session_t *session, uint32_t thresh, uint32_t silence_hits, + uint32_t timeout_ms, const char *file); + SWITCH_DECLARE(switch_status_t) switch_ivr_wait_for_silence(switch_core_session_t *session, uint32_t thresh, uint32_t silence_hits, uint32_t listen_hits, uint32_t timeout_ms, const char *file); diff --git a/src/mod/applications/mod_dptools/mod_dptools.c b/src/mod/applications/mod_dptools/mod_dptools.c index d071321ddb..669fa6583c 100644 --- a/src/mod/applications/mod_dptools/mod_dptools.c +++ b/src/mod/applications/mod_dptools/mod_dptools.c @@ -4356,6 +4356,62 @@ SWITCH_STANDARD_APP(wait_for_silence_function) switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Usage: %s\n", WAIT_FOR_SILENCE_SYNTAX); } +#define DETECT_AUDIO_SYNTAX " []" +SWITCH_STANDARD_APP(detect_audio_function) +{ + char *argv[5] = { 0 }; + uint32_t thresh, audio_hits, timeout_ms = 0; + int argc; + char *lbuf = NULL; + + if (!zstr(data) && (lbuf = switch_core_session_strdup(session, data)) + && (argc = switch_separate_string(lbuf, ' ', argv, (sizeof(argv) / sizeof(argv[0])))) >= 3) { + thresh = atoi(argv[0]); + audio_hits = atoi(argv[1]); + timeout_ms = atoi(argv[2]); + + if (argv[3]) { + timeout_ms = switch_atoui(argv[3]); + } + + if (thresh > 0 && audio_hits > 0) { + switch_ivr_detect_audio(session, thresh, audio_hits, timeout_ms, argv[4]); + return; + } + + } + + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Usage: %s\n", DETECT_AUDIO_SYNTAX); +} + +#define DETECT_SILENCE_SYNTAX " []" +SWITCH_STANDARD_APP(detect_silence_function) +{ + char *argv[5] = { 0 }; + uint32_t thresh, silence_hits, timeout_ms = 0; + int argc; + char *lbuf = NULL; + + if (!zstr(data) && (lbuf = switch_core_session_strdup(session, data)) + && (argc = switch_separate_string(lbuf, ' ', argv, (sizeof(argv) / sizeof(argv[0])))) >= 3) { + thresh = atoi(argv[0]); + silence_hits = atoi(argv[1]); + timeout_ms = atoi(argv[2]); + + if (argv[3]) { + timeout_ms = switch_atoui(argv[3]); + } + + if (thresh > 0 && silence_hits > 0) { + switch_ivr_detect_silence(session, thresh, silence_hits, timeout_ms, argv[4]); + return; + } + + } + + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Usage: %s\n", DETECT_SILENCE_SYNTAX); +} + static switch_status_t event_chat_send(switch_event_t *message_event) { @@ -6270,6 +6326,10 @@ SWITCH_MODULE_LOAD_FUNCTION(mod_dptools_load) SAF_SUPPORT_NOMEDIA | SAF_ZOMBIE_EXEC); SWITCH_ADD_APP(app_interface, "say", "say", "say", say_function, SAY_SYNTAX, SAF_NONE); + SWITCH_ADD_APP(app_interface, "detect_audio", "detect_audio", "detect_audio", detect_audio_function, DETECT_AUDIO_SYNTAX, + SAF_NONE); + SWITCH_ADD_APP(app_interface, "detect_silence", "detect_silence", "detect_silence", detect_silence_function, DETECT_SILENCE_SYNTAX, + SAF_NONE); SWITCH_ADD_APP(app_interface, "wait_for_silence", "wait_for_silence", "wait_for_silence", wait_for_silence_function, WAIT_FOR_SILENCE_SYNTAX, SAF_NONE); SWITCH_ADD_APP(app_interface, "session_loglevel", "session_loglevel", "session_loglevel", session_loglevel_function, SESSION_LOGLEVEL_SYNTAX, diff --git a/src/switch_ivr_play_say.c b/src/switch_ivr_play_say.c index 4a1d6da68c..5da20b25ae 100644 --- a/src/switch_ivr_play_say.c +++ b/src/switch_ivr_play_say.c @@ -2111,6 +2111,259 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_wait_for_silence(switch_core_session_ return status; } +SWITCH_DECLARE(switch_status_t) switch_ivr_detect_audio(switch_core_session_t *session, uint32_t thresh, + uint32_t audio_hits, uint32_t timeout_ms, const char *file) +{ + uint32_t score, count = 0, j = 0; + double energy = 0; + switch_channel_t *channel = switch_core_session_get_channel(session); + int divisor = 0; + uint32_t channels; + switch_frame_t *read_frame; + switch_status_t status = SWITCH_STATUS_FALSE; + int16_t *data; + uint32_t hits = 0; + switch_codec_t raw_codec = { 0 }; + int16_t *abuf = NULL; + switch_frame_t write_frame = { 0 }; + switch_file_handle_t fh = { 0 }; + int32_t sample_count = 0; + switch_codec_implementation_t read_impl = { 0 }; + switch_core_session_get_read_impl(session, &read_impl); + + if (timeout_ms) { + sample_count = (read_impl.actual_samples_per_second / 1000) * timeout_ms; + } + + if (file) { + if (switch_core_file_open(&fh, + file, + read_impl.number_of_channels, + read_impl.actual_samples_per_second, SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT, NULL) != SWITCH_STATUS_SUCCESS) { + switch_core_session_reset(session, SWITCH_TRUE, SWITCH_FALSE); + return SWITCH_STATUS_NOTFOUND; + } + switch_zmalloc(abuf, SWITCH_RECOMMENDED_BUFFER_SIZE); + write_frame.data = abuf; + write_frame.buflen = SWITCH_RECOMMENDED_BUFFER_SIZE; + } + + + if (switch_core_codec_init(&raw_codec, + "L16", + NULL, + NULL, + read_impl.actual_samples_per_second, + read_impl.microseconds_per_packet / 1000, + 1, SWITCH_CODEC_FLAG_ENCODE | SWITCH_CODEC_FLAG_DECODE, + NULL, switch_core_session_get_pool(session)) != SWITCH_STATUS_SUCCESS) { + + status = SWITCH_STATUS_FALSE; + goto end; + } + + write_frame.codec = &raw_codec; + + divisor = read_impl.actual_samples_per_second / 8000; + channels = read_impl.number_of_channels; + + switch_core_session_set_read_codec(session, &raw_codec); + + while (switch_channel_ready(channel)) { + + status = switch_core_session_read_frame(session, &read_frame, SWITCH_IO_FLAG_NONE, 0); + + if (!SWITCH_READ_ACCEPTABLE(status)) { + break; + } + + if (sample_count) { + sample_count -= raw_codec.implementation->samples_per_packet; + if (sample_count <= 0) { + switch_channel_set_variable(channel, "detect_audio_timeout", "true"); + switch_channel_set_variable_printf(channel, "detect_audio_hits", "%d", hits); + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "switch_ivr_detect_audio: TIMEOUT %d hits\n", hits); + break; + } + } + + if (abuf) { + switch_size_t olen = raw_codec.implementation->samples_per_packet; + + if (switch_core_file_read(&fh, abuf, &olen) != SWITCH_STATUS_SUCCESS) { + break; + } + + write_frame.samples = (uint32_t) olen; + write_frame.datalen = (uint32_t) (olen * sizeof(int16_t) * fh.channels); + if ((status = switch_core_session_write_frame(session, &write_frame, SWITCH_IO_FLAG_NONE, 0)) != SWITCH_STATUS_SUCCESS) { + break; + } + } + + data = (int16_t *) read_frame->data; + + for (energy = 0, j = 0, count = 0; count < read_frame->samples; count++) { + energy += abs(data[j++]); + j += channels; + } + + score = (uint32_t) (energy / (read_frame->samples / divisor)); + + if (score >= thresh) { + hits++; + } else { + hits=0; + } + + if (hits > audio_hits) { + switch_channel_set_variable(channel, "detect_audio_timeout", "false"); + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "switch_ivr_detect_audio: AUDIO DETECTED\n"); + break; + } + } + + switch_core_session_reset(session, SWITCH_FALSE, SWITCH_TRUE); + switch_core_codec_destroy(&raw_codec); + + end: + + if (abuf) { + + switch_core_file_close(&fh); + free(abuf); + } + + return status; +} + +SWITCH_DECLARE(switch_status_t) switch_ivr_detect_silence(switch_core_session_t *session, uint32_t thresh, + uint32_t silence_hits, uint32_t timeout_ms, const char *file) +{ + uint32_t score, count = 0, j = 0; + double energy = 0; + switch_channel_t *channel = switch_core_session_get_channel(session); + int divisor = 0; + uint32_t channels; + switch_frame_t *read_frame; + switch_status_t status = SWITCH_STATUS_FALSE; + int16_t *data; + uint32_t hits = 0; + switch_codec_t raw_codec = { 0 }; + int16_t *abuf = NULL; + switch_frame_t write_frame = { 0 }; + switch_file_handle_t fh = { 0 }; + int32_t sample_count = 0; + switch_codec_implementation_t read_impl = { 0 }; + switch_core_session_get_read_impl(session, &read_impl); + + + if (timeout_ms) { + sample_count = (read_impl.actual_samples_per_second / 1000) * timeout_ms; + } + + if (file) { + if (switch_core_file_open(&fh, + file, + read_impl.number_of_channels, + read_impl.actual_samples_per_second, SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT, NULL) != SWITCH_STATUS_SUCCESS) { + switch_core_session_reset(session, SWITCH_TRUE, SWITCH_FALSE); + return SWITCH_STATUS_NOTFOUND; + } + switch_zmalloc(abuf, SWITCH_RECOMMENDED_BUFFER_SIZE); + write_frame.data = abuf; + write_frame.buflen = SWITCH_RECOMMENDED_BUFFER_SIZE; + } + + + if (switch_core_codec_init(&raw_codec, + "L16", + NULL, + NULL, + read_impl.actual_samples_per_second, + read_impl.microseconds_per_packet / 1000, + 1, SWITCH_CODEC_FLAG_ENCODE | SWITCH_CODEC_FLAG_DECODE, + NULL, switch_core_session_get_pool(session)) != SWITCH_STATUS_SUCCESS) { + + status = SWITCH_STATUS_FALSE; + goto end; + } + + write_frame.codec = &raw_codec; + + divisor = read_impl.actual_samples_per_second / 8000; + channels = read_impl.number_of_channels; + + switch_core_session_set_read_codec(session, &raw_codec); + + while (switch_channel_ready(channel)) { + + status = switch_core_session_read_frame(session, &read_frame, SWITCH_IO_FLAG_NONE, 0); + + if (!SWITCH_READ_ACCEPTABLE(status)) { + break; + } + + if (sample_count) { + sample_count -= raw_codec.implementation->samples_per_packet; + if (sample_count <= 0) { + switch_channel_set_variable(channel, "detect_silence_timeout", "true"); + switch_channel_set_variable_printf(channel, "detect_silence_hits", "%d", hits); + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "switch_ivr_detect_silence: TIMEOUT %d hits\n", hits); + break; + } + } + + if (abuf) { + switch_size_t olen = raw_codec.implementation->samples_per_packet; + + if (switch_core_file_read(&fh, abuf, &olen) != SWITCH_STATUS_SUCCESS) { + break; + } + + write_frame.samples = (uint32_t) olen; + write_frame.datalen = (uint32_t) (olen * sizeof(int16_t) * fh.channels); + if ((status = switch_core_session_write_frame(session, &write_frame, SWITCH_IO_FLAG_NONE, 0)) != SWITCH_STATUS_SUCCESS) { + break; + } + } + + data = (int16_t *) read_frame->data; + + for (energy = 0, j = 0, count = 0; count < read_frame->samples; count++) { + energy += abs(data[j++]); + j += channels; + } + + score = (uint32_t) (energy / (read_frame->samples / divisor)); + + if (score <= thresh) { + hits++; + } else { + hits=0; + } + + if (hits > silence_hits) { + switch_channel_set_variable(channel, "detect_silence_timeout", "false"); + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "switch_ivr_detect_silence: SILENCE DETECTED\n"); + break; + } + } + + switch_core_session_reset(session, SWITCH_FALSE, SWITCH_TRUE); + switch_core_codec_destroy(&raw_codec); + + end: + + if (abuf) { + + switch_core_file_close(&fh); + free(abuf); + } + + return status; +} + SWITCH_DECLARE(switch_status_t) switch_ivr_read(switch_core_session_t *session, uint32_t min_digits, uint32_t max_digits,