From da507c5f1edeef4759bc2b7820636d11599f2e7d Mon Sep 17 00:00:00 2001 From: Anthony Minessale Date: Wed, 18 Feb 2009 18:53:28 +0000 Subject: [PATCH] add resampler to speech handles git-svn-id: http://svn.freeswitch.org/svn/freeswitch/trunk@12141 d0543943-73ff-0310-b7d9-9358b9ac24b2 --- src/include/switch_core.h | 2 +- src/include/switch_module_interfaces.h | 12 ++- src/include/switch_types.h | 4 +- .../mod_conference/mod_conference.c | 8 +- src/mod/asr_tts/mod_cepstral/mod_cepstral.c | 2 +- src/switch_core_speech.c | 90 ++++++++++++++++++- src/switch_ivr_play_say.c | 3 +- 7 files changed, 107 insertions(+), 14 deletions(-) diff --git a/src/include/switch_core.h b/src/include/switch_core.h index 1f501dd77a..49d80088e4 100644 --- a/src/include/switch_core.h +++ b/src/include/switch_core.h @@ -1432,7 +1432,7 @@ SWITCH_DECLARE(void) switch_core_speech_float_param_tts(switch_speech_handle_t * \return SWITCH_STATUS_SUCCESS with len adjusted to the bytes written if successful */ SWITCH_DECLARE(switch_status_t) switch_core_speech_read_tts(switch_speech_handle_t *sh, - void *data, switch_size_t *datalen, uint32_t *rate, switch_speech_flag_t *flags); + void *data, switch_size_t *datalen, switch_speech_flag_t *flags); /*! \brief Close an open speech handle \param sh the speech handle to close diff --git a/src/include/switch_module_interfaces.h b/src/include/switch_module_interfaces.h index 67d1f31839..e1e2f510f1 100644 --- a/src/include/switch_module_interfaces.h +++ b/src/include/switch_module_interfaces.h @@ -389,7 +389,7 @@ struct switch_speech_interface { /*! function to feed audio to the ASR */ switch_status_t (*speech_feed_tts) (switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags); /*! function to read audio from the TTS */ - switch_status_t (*speech_read_tts) (switch_speech_handle_t *sh, void *data, switch_size_t *datalen, uint32_t *rate, switch_speech_flag_t *flags); + switch_status_t (*speech_read_tts) (switch_speech_handle_t *sh, void *data, switch_size_t *datalen, switch_speech_flag_t *flags); void (*speech_flush_tts) (switch_speech_handle_t *sh); void (*speech_text_param_tts) (switch_speech_handle_t *sh, char *param, const char *val); void (*speech_numeric_param_tts) (switch_speech_handle_t *sh, char *param, int val); @@ -420,6 +420,16 @@ struct switch_speech_handle { char *param; /*! the handle's memory pool */ switch_memory_pool_t *memory_pool; + switch_audio_resampler_t *resampler; + switch_buffer_t *buffer; + switch_byte_t *dbuf; + switch_size_t dbuflen; + /*! the current samplerate */ + uint32_t samplerate; + /*! the current native samplerate */ + uint32_t native_rate; + /*! the number of channels */ + /*! private data for the format module to store handle specific info */ void *private_info; }; diff --git a/src/include/switch_types.h b/src/include/switch_types.h index dce378e277..c91b6be004 100644 --- a/src/include/switch_types.h +++ b/src/include/switch_types.h @@ -944,7 +944,9 @@ typedef enum { SWITCH_SPEECH_FLAG_PEEK = (1 << 1), SWITCH_SPEECH_FLAG_FREE_POOL = (1 << 2), SWITCH_SPEECH_FLAG_BLOCKING = (1 << 3), - SWITCH_SPEECH_FLAG_PAUSE = (1 << 4) + SWITCH_SPEECH_FLAG_PAUSE = (1 << 4), + SWITCH_SPEECH_FLAG_OPEN = (1 << 5), + SWITCH_SPEECH_FLAG_DONE = (1 << 6) } switch_speech_flag_enum_t; typedef uint32_t switch_speech_flag_t; diff --git a/src/mod/applications/mod_conference/mod_conference.c b/src/mod/applications/mod_conference/mod_conference.c index 7dd67566ce..b707304521 100644 --- a/src/mod/applications/mod_conference/mod_conference.c +++ b/src/mod/applications/mod_conference/mod_conference.c @@ -983,9 +983,8 @@ static void *SWITCH_THREAD_FUNC conference_thread_run(switch_thread_t *thread, v file_sample_len = samples; if (conference->fnode->type == NODE_TYPE_SPEECH) { switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_BLOCKING; - uint32_t rate = conference->rate; - if (switch_core_speech_read_tts(conference->fnode->sh, file_frame, &file_data_len, &rate, &flags) == SWITCH_STATUS_SUCCESS) { + if (switch_core_speech_read_tts(conference->fnode->sh, file_frame, &file_data_len, &flags) == SWITCH_STATUS_SUCCESS) { file_sample_len = file_data_len / 2; } else { file_sample_len = file_data_len = 0; @@ -1997,9 +1996,8 @@ static void conference_loop_output(conference_member_t *member) } else { /* send the node frame instead of the conference frame to the call leg */ if (member->fnode->type == NODE_TYPE_SPEECH) { switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_BLOCKING; - uint32_t rate = member->conference->rate; - - if (switch_core_speech_read_tts(member->fnode->sh, file_frame, &file_data_len, &rate, &flags) == SWITCH_STATUS_SUCCESS) { + + if (switch_core_speech_read_tts(member->fnode->sh, file_frame, &file_data_len, &flags) == SWITCH_STATUS_SUCCESS) { file_sample_len = file_data_len / 2; } else { file_sample_len = file_data_len = 0; diff --git a/src/mod/asr_tts/mod_cepstral/mod_cepstral.c b/src/mod/asr_tts/mod_cepstral/mod_cepstral.c index 6df1617595..196a0aa029 100644 --- a/src/mod/asr_tts/mod_cepstral/mod_cepstral.c +++ b/src/mod/asr_tts/mod_cepstral/mod_cepstral.c @@ -256,7 +256,7 @@ static void cepstral_speech_flush_tts(switch_speech_handle_t *sh) swift_port_stop(cepstral->port, SWIFT_ASYNC_ANY, SWIFT_EVENT_NOW); } -static switch_status_t cepstral_speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *datalen, uint32_t *rate, switch_speech_flag_t *flags) +static switch_status_t cepstral_speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *datalen, switch_speech_flag_t *flags) { cepstral_t *cepstral; size_t desired = *datalen; diff --git a/src/switch_core_speech.c b/src/switch_core_speech.c index 0c10e1af49..2497123e75 100644 --- a/src/switch_core_speech.c +++ b/src/switch_core_speech.c @@ -80,8 +80,14 @@ SWITCH_DECLARE(switch_status_t) switch_core_speech_open(switch_speech_handle_t * sh->rate = rate; sh->name = switch_core_strdup(pool, module_name); sh->samples = switch_samples_per_packet(rate, interval); + sh->samplerate = rate; + sh->native_rate = rate; - return sh->speech_interface->speech_open(sh, voice_name, rate, flags); + if ((status = sh->speech_interface->speech_open(sh, voice_name, rate, flags)) == SWITCH_STATUS_SUCCESS) { + switch_set_flag(sh, SWITCH_SPEECH_FLAG_OPEN); + } + + return status; } SWITCH_DECLARE(switch_status_t) switch_core_speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) @@ -128,17 +134,93 @@ SWITCH_DECLARE(void) switch_core_speech_float_param_tts(switch_speech_handle_t * } SWITCH_DECLARE(switch_status_t) switch_core_speech_read_tts(switch_speech_handle_t *sh, - void *data, switch_size_t *datalen, uint32_t *rate, switch_speech_flag_t *flags) + void *data, switch_size_t *datalen, switch_speech_flag_t *flags) { + switch_status_t status; + switch_size_t want, orig_len = *datalen; + switch_assert(sh != NULL); + + want = *datalen; + + top: + + if (sh->buffer && (switch_buffer_inuse(sh->buffer) >= orig_len || switch_test_flag(sh, SWITCH_SPEECH_FLAG_DONE))) { + if ((*datalen = switch_buffer_read(sh->buffer, data, orig_len))) { + return SWITCH_STATUS_SUCCESS; + } + } + + if (switch_test_flag(sh, SWITCH_SPEECH_FLAG_DONE)) { + switch_clear_flag(sh, SWITCH_SPEECH_FLAG_DONE); + *datalen = 0; + return SWITCH_STATUS_FALSE; + } + + more: + + if ((status = sh->speech_interface->speech_read_tts(sh, data, datalen, flags)) != SWITCH_STATUS_SUCCESS) { + switch_set_flag(sh, SWITCH_SPEECH_FLAG_DONE); + goto top; + } + + + if (sh->native_rate && sh->samplerate && sh->native_rate != sh->samplerate) { + if (!sh->resampler) { + if (switch_resample_create(&sh->resampler, + sh->native_rate, sh->samplerate, (uint32_t) orig_len, SWITCH_RESAMPLE_QUALITY) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Unable to create resampler!\n"); + return SWITCH_STATUS_GENERR; + } + } + + switch_resample_process(sh->resampler, data, *datalen / 2); + if (sh->resampler->to_len < want / 2 || sh->resampler->to_len > orig_len / 2) { + if (!sh->buffer) { + int factor = sh->resampler->to_len * sh->samplerate / 1000; + switch_buffer_create_dynamic(&sh->buffer, factor, factor, 0); + switch_assert(sh->buffer); + } + if (!sh->dbuf || sh->dbuflen < sh->resampler->to_len * 2) { + sh->dbuflen = sh->resampler->to_len * 2; + sh->dbuf = switch_core_alloc(sh->memory_pool, sh->dbuflen); + } + switch_assert(sh->resampler->to_len <= sh->dbuflen); + + memcpy((int16_t *) sh->dbuf, sh->resampler->to, sh->resampler->to_len * 2); + switch_buffer_write(sh->buffer, sh->dbuf, sh->resampler->to_len * 2); + + if (switch_buffer_inuse(sh->buffer) < want) { + *datalen = want; + goto more; + } + *datalen = switch_buffer_read(sh->buffer, data, orig_len); + status = SWITCH_STATUS_SUCCESS; + } else { + memcpy(data, sh->resampler->to, sh->resampler->to_len * 2); + *datalen = sh->resampler->to_len * 2; + status = SWITCH_STATUS_SUCCESS; + } + } + + return status; - return sh->speech_interface->speech_read_tts(sh, data, datalen, rate, flags); } SWITCH_DECLARE(switch_status_t) switch_core_speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) { switch_status_t status = sh->speech_interface->speech_close(sh, flags); + + if (!switch_test_flag(sh, SWITCH_SPEECH_FLAG_OPEN)) { + return SWITCH_STATUS_FALSE; + } + + if (sh->buffer) { + switch_buffer_destroy(&sh->buffer); + } + + switch_resample_destroy(&sh->resampler); UNPROTECT_INTERFACE(sh->speech_interface); @@ -146,6 +228,8 @@ SWITCH_DECLARE(switch_status_t) switch_core_speech_close(switch_speech_handle_t switch_core_destroy_memory_pool(&sh->memory_pool); } + switch_clear_flag(sh, SWITCH_SPEECH_FLAG_OPEN); + return status; } diff --git a/src/switch_ivr_play_say.c b/src/switch_ivr_play_say.c index e554522c28..a406dc89c7 100644 --- a/src/switch_ivr_play_say.c +++ b/src/switch_ivr_play_say.c @@ -1582,7 +1582,6 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text_handle(switch_core_session int lead_in_out = 10; switch_status_t status = SWITCH_STATUS_SUCCESS; switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE; - uint32_t rate = 0; switch_size_t extra = 0; char *p, *tmp = NULL; const char *star, *pound; @@ -1753,7 +1752,7 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text_handle(switch_core_session } flags = SWITCH_SPEECH_FLAG_BLOCKING; - status = switch_core_speech_read_tts(sh, abuf, &ilen, &rate, &flags); + status = switch_core_speech_read_tts(sh, abuf, &ilen, &flags); if (status != SWITCH_STATUS_SUCCESS) { for (x = 0; !done && x < lead_in_out; x++) {