change javascript and confernece to use cached speech handles

This is for better performace and to take atvantage of mrcp connections.
There should be no apparent change in usage.
The conference now supports changing the voice via the text string
by starting a text blurb with #voice# eg

#william#This will sound like William.




git-svn-id: http://svn.freeswitch.org/svn/freeswitch/trunk@5664 d0543943-73ff-0310-b7d9-9358b9ac24b2
This commit is contained in:
Anthony Minessale 2007-08-25 21:33:26 +00:00
parent 32e44d9a1a
commit 1a091c1ad2
8 changed files with 206 additions and 64 deletions

View File

@ -1173,13 +1173,17 @@ SWITCH_DECLARE(switch_status_t) switch_core_file_close(switch_file_handle_t *fh)
\param module_name the speech module to use
\param voice_name the desired voice name
\param rate the sampling rate
\param interval the sampling interval
\param flags tts flags
\param pool the pool to use (NULL for new pool)
\return SWITCH_STATUS_SUCCESS if the handle is opened
*/
SWITCH_DECLARE(switch_status_t) switch_core_speech_open(switch_speech_handle_t *sh,
char *module_name,
char *voice_name, unsigned int rate, switch_speech_flag_t *flags, switch_memory_pool_t *pool);
char *voice_name,
unsigned int rate,
unsigned int interval,
switch_speech_flag_t *flags, switch_memory_pool_t *pool);
/*!
\brief Feed text to the TTS module
\param sh the speech handle to feed

View File

@ -155,7 +155,7 @@ typedef enum {
typedef struct conference_file_node {
switch_file_handle_t fh;
switch_speech_handle_t sh;
switch_speech_handle_t *sh;
node_type_t type;
uint8_t done;
uint8_t async;
@ -218,6 +218,8 @@ typedef struct conference_obj {
uint32_t count;
int32_t energy_level;
uint8_t min;
switch_speech_handle_t lsh;
switch_speech_handle_t *sh;
} conference_obj_t;
/* Relationship with another member */
@ -257,6 +259,8 @@ struct conference_member {
conference_file_node_t *fnode;
conference_relationship_t *relationships;
switch_ivr_digit_stream_t *digit_stream;
switch_speech_handle_t lsh;
switch_speech_handle_t *sh;
struct conference_member *next;
};
@ -607,8 +611,8 @@ static switch_status_t conference_del_member(conference_obj_t * conference, conf
fnode = fnode->next;
if (cur->type == NODE_TYPE_SPEECH) {
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
switch_core_speech_close(&cur->sh, &flags);
//switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
//switch_core_speech_close(&cur->sh, &flags);
} else {
switch_core_file_close(&cur->fh);
}
@ -618,6 +622,12 @@ static switch_status_t conference_del_member(conference_obj_t * conference, conf
}
}
if (member->sh) {
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
switch_core_speech_close(member->sh, &flags);
member->sh = NULL;
}
member->conference = NULL;
if (!switch_test_flag(member, MFLAG_NOCHANNEL)) {
@ -763,7 +773,7 @@ static void *SWITCH_THREAD_FUNC conference_thread_run(switch_thread_t * thread,
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_BLOCKING;
uint32_t rate = conference->rate;
if (switch_core_speech_read_tts(&conference->fnode->sh, file_frame, &file_data_len, &rate, &flags) == SWITCH_STATUS_SUCCESS) {
if (switch_core_speech_read_tts(conference->fnode->sh, file_frame, &file_data_len, &rate, &flags) == SWITCH_STATUS_SUCCESS) {
file_sample_len = file_data_len / 2;
} else {
file_sample_len = file_data_len = 0;
@ -897,8 +907,8 @@ static void *SWITCH_THREAD_FUNC conference_thread_run(switch_thread_t * thread,
switch_memory_pool_t *pool;
if (conference->fnode->type == NODE_TYPE_SPEECH) {
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
switch_core_speech_close(&conference->fnode->sh, &flags);
//switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
//switch_core_speech_close(conference->fnode->sh, &flags);
} else {
switch_core_file_close(&conference->fnode->fh);
}
@ -943,8 +953,8 @@ static void *SWITCH_THREAD_FUNC conference_thread_run(switch_thread_t * thread,
fnode = fnode->next;
if (cur->type == NODE_TYPE_SPEECH) {
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
switch_core_speech_close(&cur->sh, &flags);
//switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
//switch_core_speech_close(&cur->sh, &flags);
} else {
switch_core_file_close(&cur->fh);
}
@ -991,6 +1001,12 @@ static void *SWITCH_THREAD_FUNC conference_thread_run(switch_thread_t * thread,
switch_ivr_digit_stream_parser_destroy(conference->dtmf_parser);
if (conference->sh) {
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
switch_core_speech_close(conference->sh, &flags);
conference->sh = NULL;
}
if (conference->pool) {
switch_memory_pool_t *pool = conference->pool;
switch_core_destroy_memory_pool(&pool);
@ -1658,8 +1674,8 @@ static void conference_loop_output(conference_member_t * member)
switch_memory_pool_t *pool;
if (member->fnode->type == NODE_TYPE_SPEECH) {
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
switch_core_speech_close(&member->fnode->sh, &flags);
//switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
//switch_core_speech_close(&member->fnode->sh, &flags);
} else {
switch_core_file_close(&member->fnode->fh);
}
@ -1680,7 +1696,7 @@ static void conference_loop_output(conference_member_t * member)
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_BLOCKING;
uint32_t rate = member->conference->rate;
if (switch_core_speech_read_tts(&member->fnode->sh, file_frame, &file_data_len, &rate, &flags) == SWITCH_STATUS_SUCCESS) {
if (switch_core_speech_read_tts(member->fnode->sh, file_frame, &file_data_len, &rate, &flags) == SWITCH_STATUS_SUCCESS) {
file_sample_len = file_data_len / 2;
} else {
file_sample_len = file_data_len = 0;
@ -2238,11 +2254,15 @@ static switch_status_t conference_member_say(conference_member_t * member, char
fnode->leadin = leadin;
fnode->pool = pool;
memset(&fnode->sh, 0, sizeof(fnode->sh));
if (switch_core_speech_open(&fnode->sh, conference->tts_engine, conference->tts_voice, conference->rate, &flags, fnode->pool) !=
SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid TTS module [%s]!\n", conference->tts_engine);
return SWITCH_STATUS_FALSE;
if (!member->sh) {
memset(&member->lsh, 0, sizeof(member->lsh));
if (switch_core_speech_open(&member->lsh, conference->tts_engine, conference->tts_voice,
conference->rate, conference->interval, &flags, fnode->pool) !=
SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid TTS module [%s]!\n", conference->tts_engine);
return SWITCH_STATUS_FALSE;
}
member->sh = &member->lsh;
}
/* Queue the node */
@ -2254,10 +2274,24 @@ static switch_status_t conference_member_say(conference_member_t * member, char
} else {
member->fnode = fnode;
}
fnode->sh = member->sh;
/* Begin Generation */
switch_sleep(200000);
switch_core_speech_feed_tts(&fnode->sh, text, &flags);
if (*text == '#') {
char *tmp = (char *)text + 1;
char *vp = tmp, voice[128] = "";
if ((tmp = strchr(tmp, '#'))) {
text = tmp + 1;
switch_copy_string(voice, vp, (tmp - vp) + 1);
switch_core_speech_text_param_tts(fnode->sh, "voice", voice);
}
} else {
switch_core_speech_text_param_tts(fnode->sh, "voice", conference->tts_voice);
}
switch_core_speech_feed_tts(fnode->sh, text, &flags);
switch_mutex_unlock(member->flag_mutex);
status = SWITCH_STATUS_SUCCESS;
@ -2309,12 +2343,16 @@ static switch_status_t conference_say(conference_obj_t * conference, const char
fnode->type = NODE_TYPE_SPEECH;
fnode->leadin = leadin;
memset(&fnode->sh, 0, sizeof(fnode->sh));
if (switch_core_speech_open(&fnode->sh, conference->tts_engine, conference->tts_voice, conference->rate, &flags, conference->pool) !=
SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid TTS module [%s]!\n", conference->tts_engine);
return SWITCH_STATUS_FALSE;
if (!conference->sh) {
memset(&conference->lsh, 0, sizeof(conference->lsh));
if (switch_core_speech_open(&conference->lsh, conference->tts_engine, conference->tts_voice,
conference->rate, conference->interval, &flags, conference->pool) !=
SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid TTS module [%s]!\n", conference->tts_engine);
return SWITCH_STATUS_FALSE;
}
conference->sh = &conference->lsh;
}
fnode->pool = pool;
@ -2329,9 +2367,22 @@ static switch_status_t conference_say(conference_obj_t * conference, const char
conference->fnode = fnode;
}
fnode->sh = conference->sh;
if (*text == '#') {
char *tmp = (char *)text + 1;
char *vp = tmp, voice[128] = "";
if ((tmp = strchr(tmp, '#'))) {
text = tmp + 1;
switch_copy_string(voice, vp, (tmp - vp) + 1);
switch_core_speech_text_param_tts(fnode->sh, "voice", voice);
}
} else {
switch_core_speech_text_param_tts(fnode->sh, "voice", conference->tts_voice);
}
/* Begin Generation */
switch_sleep(200000);
switch_core_speech_feed_tts(&fnode->sh, (char *) text, &flags);
switch_core_speech_feed_tts(fnode->sh, (char *) text, &flags);
switch_mutex_unlock(conference->mutex);
status = SWITCH_STATUS_SUCCESS;

View File

@ -243,7 +243,7 @@ SWITCH_STANDARD_APP(rss_function)
}
memset(&sh, 0, sizeof(sh));
if (switch_core_speech_open(&sh, engine, voice, rate, &flags, switch_core_session_get_pool(session)) != SWITCH_STATUS_SUCCESS) {
if (switch_core_speech_open(&sh, engine, voice, rate, interval, &flags, switch_core_session_get_pool(session)) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid TTS module!\n");
return;
}

View File

@ -1506,6 +1506,53 @@ static JSBool session_get_variable(JSContext * cx, JSObject * obj, uintN argc, j
return JS_TRUE;
}
static void destroy_speech_engine(struct js_session *jss)
{
if (jss->speech) {
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
switch_core_codec_destroy(&jss->speech->codec);
switch_core_speech_close(&jss->speech->sh, &flags);
jss->speech = NULL;
}
}
static switch_status_t init_speech_engine(struct js_session *jss, char *engine, char *voice)
{
switch_codec_t *read_codec;
switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
uint32_t rate = 0;
int interval = 0;
read_codec = switch_core_session_get_read_codec(jss->session);
rate = read_codec->implementation->samples_per_second;
interval = read_codec->implementation->microseconds_per_frame / 1000;
if (switch_core_codec_init(&jss->speech->codec,
"L16",
NULL,
rate,
interval,
1, SWITCH_CODEC_FLAG_ENCODE | SWITCH_CODEC_FLAG_DECODE, NULL,
switch_core_session_get_pool(jss->session)) == SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Raw Codec Activation Success L16@%uhz 1 channel %dms\n", rate, interval);
} else {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Raw Codec Activation Failed L16@%uhz 1 channel %dms\n", rate, interval);
return SWITCH_STATUS_FALSE;
}
if (switch_core_speech_open(&jss->speech->sh, engine, voice, rate, interval,
&flags, switch_core_session_get_pool(jss->session)) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid TTS module!\n");
switch_core_codec_destroy(&jss->speech->codec);
return SWITCH_STATUS_FALSE;
}
return SWITCH_STATUS_SUCCESS;
}
static JSBool session_speak(JSContext * cx, JSObject * obj, uintN argc, jsval * argv, jsval * rval)
{
@ -1524,13 +1571,13 @@ static JSBool session_speak(JSContext * cx, JSObject * obj, uintN argc, jsval *
METHOD_SANITY_CHECK();
*rval = BOOLEAN_TO_JSVAL(JS_FALSE);
channel = switch_core_session_get_channel(jss->session);
assert(channel != NULL);
CHANNEL_SANITY_CHECK();
if (argc < 3) {
*rval = BOOLEAN_TO_JSVAL(JS_FALSE);
return JS_FALSE;
}
@ -1538,6 +1585,34 @@ static JSBool session_speak(JSContext * cx, JSObject * obj, uintN argc, jsval *
voice_name = JS_GetStringBytes(JS_ValueToString(cx, argv[1]));
text = JS_GetStringBytes(JS_ValueToString(cx, argv[2]));
if (switch_strlen_zero(tts_name)) {
eval_some_js("~throw new Error(\"Invalid TTS Name\");", cx, obj, rval);
return JS_TRUE;
}
if (switch_strlen_zero(text)) {
eval_some_js("~throw new Error(\"Invalid Text\");", cx, obj, rval);
return JS_TRUE;
}
if (jss->speech && strcasecmp(jss->speech->sh.name, tts_name)) {
destroy_speech_engine(jss);
}
if (jss->speech) {
switch_core_speech_text_param_tts(&jss->speech->sh, "voice", voice_name);
} else {
jss->speech = switch_core_session_alloc(jss->session, sizeof(*jss->speech));
assert(jss->speech != NULL);
if (init_speech_engine(jss, tts_name, voice_name) != SWITCH_STATUS_SUCCESS) {
eval_some_js("~throw new Error(\"Cannot allocate speech engine!\");", cx, obj, rval);
jss->speech = NULL;
return JS_TRUE;
}
}
if (argc > 3) {
if ((function = JS_ValueToFunction(cx, argv[3]))) {
memset(&cb_state, 0, sizeof(cb_state));
@ -1555,9 +1630,6 @@ static JSBool session_speak(JSContext * cx, JSObject * obj, uintN argc, jsval *
}
}
if (!tts_name && text) {
return JS_FALSE;
}
codec = switch_core_session_get_read_codec(jss->session);
cb_state.ret = BOOLEAN_TO_JSVAL(JS_FALSE);
@ -1565,8 +1637,9 @@ static JSBool session_speak(JSContext * cx, JSObject * obj, uintN argc, jsval *
args.input_callback = dtmf_func;
args.buf = bp;
args.buflen = len;
switch_ivr_speak_text(jss->session, tts_name, voice_name
&& strlen(voice_name) ? voice_name : NULL, codec->implementation->samples_per_second, text, &args);
switch_core_speech_flush_tts(&jss->speech->sh);
switch_ivr_speak_text_handle(jss->session, &jss->speech->sh, &jss->speech->codec, NULL, text, &args);
JS_ResumeRequest(cx, cb_state.saveDepth);
*rval = cb_state.ret;
@ -2389,6 +2462,8 @@ static void session_destroy(JSContext * cx, JSObject * obj)
if (cx && obj) {
if ((jss = JS_GetPrivate(cx, obj))) {
destroy_speech_engine(jss);
if (jss->session) {
channel = switch_core_session_get_channel(jss->session);
switch_channel_set_private(channel, "jss", NULL);

View File

@ -122,6 +122,11 @@ struct sm_module_interface {
typedef struct sm_module_interface sm_module_interface_t;
typedef switch_status_t (*spidermonkey_init_t) (const sm_module_interface_t ** module_interface);
struct js_session_speech {
switch_speech_handle_t sh;
switch_codec_t codec;
};
struct js_session {
switch_core_session_t *session;
JSContext *cx;
@ -141,6 +146,7 @@ struct js_session {
char *rdnis;
char *context;
char *username;
struct js_session_speech *speech;
};
JSBool DEFAULT_SET_PROPERTY(JSContext * cx, JSObject *obj, jsval id, jsval *vp)

View File

@ -76,28 +76,3 @@ SWITCH_DECLARE(switch_status_t) switch_core_directory_close(switch_directory_han
return dh->directory_interface->directory_close(dh);
}
SWITCH_DECLARE(switch_status_t) switch_core_speech_open(switch_speech_handle_t *sh,
char *module_name,
char *voice_name, unsigned int rate, switch_speech_flag_t *flags, switch_memory_pool_t *pool)
{
switch_status_t status;
if ((sh->speech_interface = switch_loadable_module_get_speech_interface(module_name)) == 0) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "invalid speech module [%s]!\n", module_name);
return SWITCH_STATUS_GENERR;
}
switch_copy_string(sh->engine, module_name, sizeof(sh->engine));
sh->flags = *flags;
if (pool) {
sh->memory_pool = pool;
} else {
if ((status = switch_core_new_memory_pool(&sh->memory_pool)) != SWITCH_STATUS_SUCCESS) {
return status;
}
switch_set_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL);
}
sh->rate = rate;
sh->name = switch_core_strdup(pool, module_name);
return sh->speech_interface->speech_open(sh, voice_name, rate, flags);
}

View File

@ -34,6 +34,40 @@
#include <switch.h>
#include "private/switch_core_pvt.h"
SWITCH_DECLARE(switch_status_t) switch_core_speech_open(switch_speech_handle_t *sh,
char *module_name,
char *voice_name,
unsigned int rate,
unsigned int interval,
switch_speech_flag_t *flags,
switch_memory_pool_t *pool)
{
switch_status_t status;
if ((sh->speech_interface = switch_loadable_module_get_speech_interface(module_name)) == 0) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "invalid speech module [%s]!\n", module_name);
return SWITCH_STATUS_GENERR;
}
switch_copy_string(sh->engine, module_name, sizeof(sh->engine));
sh->flags = *flags;
if (pool) {
sh->memory_pool = pool;
} else {
if ((status = switch_core_new_memory_pool(&sh->memory_pool)) != SWITCH_STATUS_SUCCESS) {
return status;
}
switch_set_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL);
}
sh->rate = rate;
sh->name = switch_core_strdup(pool, module_name);
sh->samples = switch_bytes_per_frame(rate, interval);
return sh->speech_interface->speech_open(sh, voice_name, rate, flags);
}
SWITCH_DECLARE(switch_status_t) switch_core_speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags)
{
assert(sh != NULL);

View File

@ -1344,7 +1344,6 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *ses
{
switch_channel_t *channel;
int interval = 0;
uint32_t len = 0;
switch_frame_t write_frame = { 0 };
switch_timer_t timer;
switch_core_thread_session_t thread_session;
@ -1371,7 +1370,8 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *ses
}
memset(&sh, 0, sizeof(sh));
if (switch_core_speech_open(&sh, tts_name, voice_name, (uint32_t) rate, &flags, switch_core_session_get_pool(session)) != SWITCH_STATUS_SUCCESS) {
if (switch_core_speech_open(&sh, tts_name, voice_name, (uint32_t) rate, interval,
&flags, switch_core_session_get_pool(session)) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid TTS module!\n");
switch_core_session_reset(session);
return SWITCH_STATUS_FALSE;
@ -1381,9 +1381,6 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *ses
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "OPEN TTS %s\n", tts_name);
interval = read_codec->implementation->microseconds_per_frame / 1000;
sh.samples = switch_bytes_per_frame(rate, interval);
len = sh.samples * 2;
codec_name = "L16";
if (switch_core_codec_init(&codec,
@ -1409,7 +1406,7 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *ses
switch_core_session_reset(session);
return SWITCH_STATUS_GENERR;
}
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "setup timer success %u bytes per %d ms!\n", len, interval);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "setup timer success %u bytes per %d ms!\n", sh.samples * 2, interval);
/* start a thread to absorb incoming audio */
for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) {