mirror of
https://github.com/signalwire/freeswitch.git
synced 2025-06-01 11:10:24 +00:00
add resampler to speech handles
git-svn-id: http://svn.freeswitch.org/svn/freeswitch/trunk@12141 d0543943-73ff-0310-b7d9-9358b9ac24b2
This commit is contained in:
parent
1b5656f3a8
commit
da507c5f1e
@ -1432,7 +1432,7 @@ SWITCH_DECLARE(void) switch_core_speech_float_param_tts(switch_speech_handle_t *
|
|||||||
\return SWITCH_STATUS_SUCCESS with len adjusted to the bytes written if successful
|
\return SWITCH_STATUS_SUCCESS with len adjusted to the bytes written if successful
|
||||||
*/
|
*/
|
||||||
SWITCH_DECLARE(switch_status_t) switch_core_speech_read_tts(switch_speech_handle_t *sh,
|
SWITCH_DECLARE(switch_status_t) switch_core_speech_read_tts(switch_speech_handle_t *sh,
|
||||||
void *data, switch_size_t *datalen, uint32_t *rate, switch_speech_flag_t *flags);
|
void *data, switch_size_t *datalen, switch_speech_flag_t *flags);
|
||||||
/*!
|
/*!
|
||||||
\brief Close an open speech handle
|
\brief Close an open speech handle
|
||||||
\param sh the speech handle to close
|
\param sh the speech handle to close
|
||||||
|
@ -389,7 +389,7 @@ struct switch_speech_interface {
|
|||||||
/*! function to feed audio to the ASR */
|
/*! function to feed audio to the ASR */
|
||||||
switch_status_t (*speech_feed_tts) (switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags);
|
switch_status_t (*speech_feed_tts) (switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags);
|
||||||
/*! function to read audio from the TTS */
|
/*! function to read audio from the TTS */
|
||||||
switch_status_t (*speech_read_tts) (switch_speech_handle_t *sh, void *data, switch_size_t *datalen, uint32_t *rate, switch_speech_flag_t *flags);
|
switch_status_t (*speech_read_tts) (switch_speech_handle_t *sh, void *data, switch_size_t *datalen, switch_speech_flag_t *flags);
|
||||||
void (*speech_flush_tts) (switch_speech_handle_t *sh);
|
void (*speech_flush_tts) (switch_speech_handle_t *sh);
|
||||||
void (*speech_text_param_tts) (switch_speech_handle_t *sh, char *param, const char *val);
|
void (*speech_text_param_tts) (switch_speech_handle_t *sh, char *param, const char *val);
|
||||||
void (*speech_numeric_param_tts) (switch_speech_handle_t *sh, char *param, int val);
|
void (*speech_numeric_param_tts) (switch_speech_handle_t *sh, char *param, int val);
|
||||||
@ -420,6 +420,16 @@ struct switch_speech_handle {
|
|||||||
char *param;
|
char *param;
|
||||||
/*! the handle's memory pool */
|
/*! the handle's memory pool */
|
||||||
switch_memory_pool_t *memory_pool;
|
switch_memory_pool_t *memory_pool;
|
||||||
|
switch_audio_resampler_t *resampler;
|
||||||
|
switch_buffer_t *buffer;
|
||||||
|
switch_byte_t *dbuf;
|
||||||
|
switch_size_t dbuflen;
|
||||||
|
/*! the current samplerate */
|
||||||
|
uint32_t samplerate;
|
||||||
|
/*! the current native samplerate */
|
||||||
|
uint32_t native_rate;
|
||||||
|
/*! the number of channels */
|
||||||
|
|
||||||
/*! private data for the format module to store handle specific info */
|
/*! private data for the format module to store handle specific info */
|
||||||
void *private_info;
|
void *private_info;
|
||||||
};
|
};
|
||||||
|
@ -944,7 +944,9 @@ typedef enum {
|
|||||||
SWITCH_SPEECH_FLAG_PEEK = (1 << 1),
|
SWITCH_SPEECH_FLAG_PEEK = (1 << 1),
|
||||||
SWITCH_SPEECH_FLAG_FREE_POOL = (1 << 2),
|
SWITCH_SPEECH_FLAG_FREE_POOL = (1 << 2),
|
||||||
SWITCH_SPEECH_FLAG_BLOCKING = (1 << 3),
|
SWITCH_SPEECH_FLAG_BLOCKING = (1 << 3),
|
||||||
SWITCH_SPEECH_FLAG_PAUSE = (1 << 4)
|
SWITCH_SPEECH_FLAG_PAUSE = (1 << 4),
|
||||||
|
SWITCH_SPEECH_FLAG_OPEN = (1 << 5),
|
||||||
|
SWITCH_SPEECH_FLAG_DONE = (1 << 6)
|
||||||
} switch_speech_flag_enum_t;
|
} switch_speech_flag_enum_t;
|
||||||
typedef uint32_t switch_speech_flag_t;
|
typedef uint32_t switch_speech_flag_t;
|
||||||
|
|
||||||
|
@ -983,9 +983,8 @@ static void *SWITCH_THREAD_FUNC conference_thread_run(switch_thread_t *thread, v
|
|||||||
file_sample_len = samples;
|
file_sample_len = samples;
|
||||||
if (conference->fnode->type == NODE_TYPE_SPEECH) {
|
if (conference->fnode->type == NODE_TYPE_SPEECH) {
|
||||||
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_BLOCKING;
|
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_BLOCKING;
|
||||||
uint32_t rate = conference->rate;
|
|
||||||
|
|
||||||
if (switch_core_speech_read_tts(conference->fnode->sh, file_frame, &file_data_len, &rate, &flags) == SWITCH_STATUS_SUCCESS) {
|
if (switch_core_speech_read_tts(conference->fnode->sh, file_frame, &file_data_len, &flags) == SWITCH_STATUS_SUCCESS) {
|
||||||
file_sample_len = file_data_len / 2;
|
file_sample_len = file_data_len / 2;
|
||||||
} else {
|
} else {
|
||||||
file_sample_len = file_data_len = 0;
|
file_sample_len = file_data_len = 0;
|
||||||
@ -1997,9 +1996,8 @@ static void conference_loop_output(conference_member_t *member)
|
|||||||
} else { /* send the node frame instead of the conference frame to the call leg */
|
} else { /* send the node frame instead of the conference frame to the call leg */
|
||||||
if (member->fnode->type == NODE_TYPE_SPEECH) {
|
if (member->fnode->type == NODE_TYPE_SPEECH) {
|
||||||
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_BLOCKING;
|
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_BLOCKING;
|
||||||
uint32_t rate = member->conference->rate;
|
|
||||||
|
if (switch_core_speech_read_tts(member->fnode->sh, file_frame, &file_data_len, &flags) == SWITCH_STATUS_SUCCESS) {
|
||||||
if (switch_core_speech_read_tts(member->fnode->sh, file_frame, &file_data_len, &rate, &flags) == SWITCH_STATUS_SUCCESS) {
|
|
||||||
file_sample_len = file_data_len / 2;
|
file_sample_len = file_data_len / 2;
|
||||||
} else {
|
} else {
|
||||||
file_sample_len = file_data_len = 0;
|
file_sample_len = file_data_len = 0;
|
||||||
|
@ -256,7 +256,7 @@ static void cepstral_speech_flush_tts(switch_speech_handle_t *sh)
|
|||||||
swift_port_stop(cepstral->port, SWIFT_ASYNC_ANY, SWIFT_EVENT_NOW);
|
swift_port_stop(cepstral->port, SWIFT_ASYNC_ANY, SWIFT_EVENT_NOW);
|
||||||
}
|
}
|
||||||
|
|
||||||
static switch_status_t cepstral_speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *datalen, uint32_t *rate, switch_speech_flag_t *flags)
|
static switch_status_t cepstral_speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *datalen, switch_speech_flag_t *flags)
|
||||||
{
|
{
|
||||||
cepstral_t *cepstral;
|
cepstral_t *cepstral;
|
||||||
size_t desired = *datalen;
|
size_t desired = *datalen;
|
||||||
|
@ -80,8 +80,14 @@ SWITCH_DECLARE(switch_status_t) switch_core_speech_open(switch_speech_handle_t *
|
|||||||
sh->rate = rate;
|
sh->rate = rate;
|
||||||
sh->name = switch_core_strdup(pool, module_name);
|
sh->name = switch_core_strdup(pool, module_name);
|
||||||
sh->samples = switch_samples_per_packet(rate, interval);
|
sh->samples = switch_samples_per_packet(rate, interval);
|
||||||
|
sh->samplerate = rate;
|
||||||
|
sh->native_rate = rate;
|
||||||
|
|
||||||
return sh->speech_interface->speech_open(sh, voice_name, rate, flags);
|
if ((status = sh->speech_interface->speech_open(sh, voice_name, rate, flags)) == SWITCH_STATUS_SUCCESS) {
|
||||||
|
switch_set_flag(sh, SWITCH_SPEECH_FLAG_OPEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
SWITCH_DECLARE(switch_status_t) switch_core_speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags)
|
SWITCH_DECLARE(switch_status_t) switch_core_speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags)
|
||||||
@ -128,17 +134,93 @@ SWITCH_DECLARE(void) switch_core_speech_float_param_tts(switch_speech_handle_t *
|
|||||||
}
|
}
|
||||||
|
|
||||||
SWITCH_DECLARE(switch_status_t) switch_core_speech_read_tts(switch_speech_handle_t *sh,
|
SWITCH_DECLARE(switch_status_t) switch_core_speech_read_tts(switch_speech_handle_t *sh,
|
||||||
void *data, switch_size_t *datalen, uint32_t *rate, switch_speech_flag_t *flags)
|
void *data, switch_size_t *datalen, switch_speech_flag_t *flags)
|
||||||
{
|
{
|
||||||
|
switch_status_t status;
|
||||||
|
switch_size_t want, orig_len = *datalen;
|
||||||
|
|
||||||
switch_assert(sh != NULL);
|
switch_assert(sh != NULL);
|
||||||
|
|
||||||
|
want = *datalen;
|
||||||
|
|
||||||
|
top:
|
||||||
|
|
||||||
|
if (sh->buffer && (switch_buffer_inuse(sh->buffer) >= orig_len || switch_test_flag(sh, SWITCH_SPEECH_FLAG_DONE))) {
|
||||||
|
if ((*datalen = switch_buffer_read(sh->buffer, data, orig_len))) {
|
||||||
|
return SWITCH_STATUS_SUCCESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (switch_test_flag(sh, SWITCH_SPEECH_FLAG_DONE)) {
|
||||||
|
switch_clear_flag(sh, SWITCH_SPEECH_FLAG_DONE);
|
||||||
|
*datalen = 0;
|
||||||
|
return SWITCH_STATUS_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
more:
|
||||||
|
|
||||||
|
if ((status = sh->speech_interface->speech_read_tts(sh, data, datalen, flags)) != SWITCH_STATUS_SUCCESS) {
|
||||||
|
switch_set_flag(sh, SWITCH_SPEECH_FLAG_DONE);
|
||||||
|
goto top;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (sh->native_rate && sh->samplerate && sh->native_rate != sh->samplerate) {
|
||||||
|
if (!sh->resampler) {
|
||||||
|
if (switch_resample_create(&sh->resampler,
|
||||||
|
sh->native_rate, sh->samplerate, (uint32_t) orig_len, SWITCH_RESAMPLE_QUALITY) != SWITCH_STATUS_SUCCESS) {
|
||||||
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Unable to create resampler!\n");
|
||||||
|
return SWITCH_STATUS_GENERR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch_resample_process(sh->resampler, data, *datalen / 2);
|
||||||
|
if (sh->resampler->to_len < want / 2 || sh->resampler->to_len > orig_len / 2) {
|
||||||
|
if (!sh->buffer) {
|
||||||
|
int factor = sh->resampler->to_len * sh->samplerate / 1000;
|
||||||
|
switch_buffer_create_dynamic(&sh->buffer, factor, factor, 0);
|
||||||
|
switch_assert(sh->buffer);
|
||||||
|
}
|
||||||
|
if (!sh->dbuf || sh->dbuflen < sh->resampler->to_len * 2) {
|
||||||
|
sh->dbuflen = sh->resampler->to_len * 2;
|
||||||
|
sh->dbuf = switch_core_alloc(sh->memory_pool, sh->dbuflen);
|
||||||
|
}
|
||||||
|
switch_assert(sh->resampler->to_len <= sh->dbuflen);
|
||||||
|
|
||||||
|
memcpy((int16_t *) sh->dbuf, sh->resampler->to, sh->resampler->to_len * 2);
|
||||||
|
switch_buffer_write(sh->buffer, sh->dbuf, sh->resampler->to_len * 2);
|
||||||
|
|
||||||
|
if (switch_buffer_inuse(sh->buffer) < want) {
|
||||||
|
*datalen = want;
|
||||||
|
goto more;
|
||||||
|
}
|
||||||
|
*datalen = switch_buffer_read(sh->buffer, data, orig_len);
|
||||||
|
status = SWITCH_STATUS_SUCCESS;
|
||||||
|
} else {
|
||||||
|
memcpy(data, sh->resampler->to, sh->resampler->to_len * 2);
|
||||||
|
*datalen = sh->resampler->to_len * 2;
|
||||||
|
status = SWITCH_STATUS_SUCCESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
|
||||||
return sh->speech_interface->speech_read_tts(sh, data, datalen, rate, flags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
SWITCH_DECLARE(switch_status_t) switch_core_speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags)
|
SWITCH_DECLARE(switch_status_t) switch_core_speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags)
|
||||||
{
|
{
|
||||||
switch_status_t status = sh->speech_interface->speech_close(sh, flags);
|
switch_status_t status = sh->speech_interface->speech_close(sh, flags);
|
||||||
|
|
||||||
|
if (!switch_test_flag(sh, SWITCH_SPEECH_FLAG_OPEN)) {
|
||||||
|
return SWITCH_STATUS_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sh->buffer) {
|
||||||
|
switch_buffer_destroy(&sh->buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
switch_resample_destroy(&sh->resampler);
|
||||||
|
|
||||||
UNPROTECT_INTERFACE(sh->speech_interface);
|
UNPROTECT_INTERFACE(sh->speech_interface);
|
||||||
|
|
||||||
@ -146,6 +228,8 @@ SWITCH_DECLARE(switch_status_t) switch_core_speech_close(switch_speech_handle_t
|
|||||||
switch_core_destroy_memory_pool(&sh->memory_pool);
|
switch_core_destroy_memory_pool(&sh->memory_pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch_clear_flag(sh, SWITCH_SPEECH_FLAG_OPEN);
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1582,7 +1582,6 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text_handle(switch_core_session
|
|||||||
int lead_in_out = 10;
|
int lead_in_out = 10;
|
||||||
switch_status_t status = SWITCH_STATUS_SUCCESS;
|
switch_status_t status = SWITCH_STATUS_SUCCESS;
|
||||||
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
|
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
|
||||||
uint32_t rate = 0;
|
|
||||||
switch_size_t extra = 0;
|
switch_size_t extra = 0;
|
||||||
char *p, *tmp = NULL;
|
char *p, *tmp = NULL;
|
||||||
const char *star, *pound;
|
const char *star, *pound;
|
||||||
@ -1753,7 +1752,7 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text_handle(switch_core_session
|
|||||||
}
|
}
|
||||||
|
|
||||||
flags = SWITCH_SPEECH_FLAG_BLOCKING;
|
flags = SWITCH_SPEECH_FLAG_BLOCKING;
|
||||||
status = switch_core_speech_read_tts(sh, abuf, &ilen, &rate, &flags);
|
status = switch_core_speech_read_tts(sh, abuf, &ilen, &flags);
|
||||||
|
|
||||||
if (status != SWITCH_STATUS_SUCCESS) {
|
if (status != SWITCH_STATUS_SUCCESS) {
|
||||||
for (x = 0; !done && x < lead_in_out; x++) {
|
for (x = 0; !done && x < lead_in_out; x++) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user