diff --git a/libs/jitterbuffer/src/jb_speakup.c b/libs/jitterbuffer/src/jb_speakup.c new file mode 100644 index 0000000000..a0084a4141 --- /dev/null +++ b/libs/jitterbuffer/src/jb_speakup.c @@ -0,0 +1,1048 @@ +/******************************************************* + * jb_speakup: + * an application-independent jitterbuffer, which tries + * to achieve the maximum user perception during a call. + * For more information look at: + * http://www.speakup.nl/opensource/jitterbuffer/ + * + * Copyright on this file is held by: + * - Jesse Kaijen + * - SpeakUp + * + * Contributors: + * Jesse Kaijen + * + * Version: 1.2 (2006-04-20) + * + * Changelog: + * 1.1 => 1.2 (2006-04-20) + * - renamed files to jb_speakup + * - renamed the struct to jb_speakup to avoid namespace collisions + * - renamed all the functions from jb_ to jb_speakup_ to avoid namespace collisions + * - the codecs are defined (where possible) through iana codes instead of arbitrary values + * + * 1.0 => 1.1 (2006-03-24) (thanks to Micheal Jerris, freeswitch.org) + * - added MSVC 2005 project files + * - removed compile warnings (forced floating point) + * - fixed minor bug in setting jb->target + * - added JB_NOJB as return value + * - added version numbering + * + * This program is free software, distributed under the terms of: + * - the GNU Lesser (Library) General Public License + * - the Mozilla Public License + * + * if you are interested in an different licence type, please contact us. + * + * How to use the jitterbuffer, please look at the comments + * in the headerfile. + * + * Further details on specific implementations, + * please look at the comments in the code file. + */ + +#include "jb_speakup.h" +#include +#include +#include + +#define jb_warn(...) (warnf ? warnf(__VA_ARGS__) : (void)0) +#define jb_err(...) (errf ? errf(__VA_ARGS__) : (void)0) +#define jb_dbg(...) (dbgf ? dbgf(__VA_ARGS__) : (void)0) + + +//public functions +jb_speakup *jb_speakup_new(); +void jb_speakup_reset(jb_speakup *jb); +void jb_speakup_reset_all(jb_speakup *jb); +void jb_speakup_destroy(jb_speakup *jb); +void jb_speakup_set_settings(jb_speakup *jb, jb_speakup_settings *settings); + +void jb_speakup_get_info(jb_speakup *jb, jb_speakup_info *stats); +void jb_speakup_get_settings(jb_speakup *jb, jb_speakup_settings *settings); +float jb_speakup_guess_mos(float p, long d, int codec); +int jb_speakup_has_frames(jb_speakup *jb); + +void jb_speakup_put(jb_speakup *jb, void *data, int type, long ms, long ts, long now, int codec); +int jb_speakup_get(jb_speakup *jb, void **data, long now, long interpl); + + + +//private functions +static void set_default_settings(jb_speakup *jb); +static void reset(jb_speakup *jb); +static long find_pointer(long *array, long max_index, long value); static void frame_free(jb_speakup_frame *frame); + +static void put_control(jb_speakup *jb, void *data, int type, long ts); +static void put_voice(jb_speakup *jb, void *data, int type, long ms, long ts, int codec); +static void put_history(jb_speakup *jb, long ts, long now, long ms, int codec); +static void calculate_info(jb_speakup *jb, long ts, long now, int codec); + +static int get_control(jb_speakup *jb, void **data); +static int get_voice(jb_speakup *jb, void **data, long now, long interpl); +static int get_voicecase(jb_speakup *jb, void **data, long now, long interpl, long diff); + +static int get_next_frametype(jb_speakup *jb, long ts); +static long get_next_framets(jb_speakup *jb); +static jb_speakup_frame *get_frame(jb_speakup *jb, long ts); +static jb_speakup_frame *get_all_frames(jb_speakup *jb); + +//debug... +static jb_output_function_t warnf, errf, dbgf; +void jb_speakup_setoutput(jb_output_function_t warn, jb_output_function_t err, jb_output_function_t dbg) { + warnf = warn; + errf = err; + dbgf = dbg; +} + + +/*********** + * create a new jitterbuffer + * return NULL if malloc doesn't work + * else return jb with default_settings. + */ +jb_speakup *jb_speakup_new() +{ + jb_speakup *jb; + + jb_dbg("N"); + jb = malloc(sizeof(jb_speakup)); + if (!jb) { + jb_err("cannot allocate jb_speakup\n"); + return NULL; + } + set_default_settings(jb); + reset(jb); + return jb; +} + + +/*********** + * empty voice messages + * reset statistics + * keep the settings + */ +void jb_speakup_reset(jb_speakup *jb) +{ + jb_speakup_frame *frame; + + jb_dbg("R"); + if (jb == NULL) { + jb_err("no jb_speakup in jb_reset()\n"); + return; + } + + //free voice + while(jb->voiceframes) { + frame = get_all_frames(jb); + frame_free(frame); + } + //reset stats + memset(&(jb->info),0,sizeof(jb_speakup_info) ); + // set default settings + reset(jb); +} + + +/*********** + * empty nonvoice messages + * empty voice messages + * reset statistics + * reset settings to default + */ +void jb_speakup_reset_all(jb_speakup *jb) +{ + jb_speakup_frame *frame; + + jb_dbg("r"); + if (jb == NULL) { + jb_err("no jb_speakup in jb_reset_all()\n"); + return; + } + + // free nonvoice + while(jb->controlframes) { + frame = jb->controlframes; + jb->controlframes = frame->next; + frame_free(frame); + } + // free voice and reset statistics is done by jb_reset + jb_speakup_reset(jb); + set_default_settings(jb); +} + + +/*********** + * destroy the jitterbuffer + * free all the [non]voice frames with reset_all + * free the jitterbuffer + */ +void jb_speakup_destroy(jb_speakup *jb) +{ + jb_dbg("D"); + if (jb == NULL) { + jb_err("no jb_speakup in jb_destroy()\n"); + return; + } + + jb_speakup_reset_all(jb); + free(jb); +} + + +/*********** + * Set settings for the jitterbuffer. + * Only if a setting is defined it will be written + * in the jb->settings. + * This means that no setting can be set to zero + */ +void jb_speakup_set_settings(jb_speakup *jb, jb_speakup_settings *settings) +{ + jb_dbg("S"); + if (jb == NULL) { + jb_err("no jb_speakup in jb_set_settings()\n"); + return; + } + + if (settings->min_jb) { + jb->settings.min_jb = settings->min_jb; + } + if (settings->max_jb) { + jb->settings.max_jb = settings->max_jb; + } + if (settings->max_successive_interp) { + jb->settings.max_successive_interp = settings->max_successive_interp; + } + if (settings->extra_delay) { + jb->settings.extra_delay = settings->extra_delay; + } + if (settings->wait_grow) { + jb->settings.wait_grow = settings->wait_grow; + } + if (settings->wait_shrink) { + jb->settings.wait_shrink = settings->wait_shrink; + } + if (settings->max_diff) { + jb->settings.max_diff = settings->max_diff; + } +} + + +/*********** + * validates the statistics + * the losspct due the jitterbuffer will be calculated. + * delay and delay_target will be calculated + * *stats = info + */ +void jb_speakup_get_info(jb_speakup *jb, jb_speakup_info *stats) +{ + long max_index, pointer; + + jb_dbg("I"); + if (jb == NULL) { + jb_err("no jb_speakup in jb_speakup_get_info()\n"); + return; + } + + jb->info.delay = jb->current - jb->min; + jb->info.delay_target = jb->target - jb->min; + + //calculate the losspct... + max_index = (jb->hist_pointer < JB_HISTORY_SIZE) ? +jb->hist_pointer : JB_HISTORY_SIZE-1; + if (max_index>1) { + pointer = find_pointer(&jb->hist_sorted_delay[0], max_index, +jb->current); + jb->info.losspct = ((max_index - pointer)*100/max_index); + if (jb->info.losspct < 0) { + jb->info.losspct = 0; + } + } else { + jb->info.losspct = 0; + } + + *stats = jb->info; +} + + +/*********** + * gives the settings for this jitterbuffer + * *settings = settings + */ +void jb_speakup_get_settings(jb_speakup *jb, jb_speakup_settings *settings) +{ + jb_dbg("S"); + if (jb == NULL) { + jb_err("no jb_speakup in jb_speakup_get_settings()\n"); + return; + } + + *settings = jb->settings; +} + + +/*********** + * returns an estimate on the MOS with given loss, delay and codec + * if the formula is not present the default will be used + * please use the JB_CODEC_OTHER if you want to define your own formula + * + */ +float jb_speakup_guess_mos(float p, long d, int codec) +{ + float result; + + switch (codec) { + case JB_CODEC_GSM_EFR: + result = (4.31f - 0.23f*p - 0.0071f*d); + break; + + case JB_CODEC_G723: + result = (3.99f - 0.16f*p - 0.0071f*d); + break; + + case JB_CODEC_G729: + result = (4.13f - 0.14f*p - 0.0071f*d); + break; + + case JB_CODEC_PCMU_PLC: + case JB_CODEC_PCMA_PLC: + result = (4.42f - 0.087f*p - 0.0071f*d); + break; + + case JB_CODEC_PCMU: + case JB_CODEC_PCMA: + result = (4.42f - 0.63f*p - 0.0071f*d); + break; + + case JB_CODEC_OTHER: + default: + result = (4.42f - 0.63f*p - 0.0071f*d); + + } + return result; +} + + +/*********** + * if there are any frames left in JB returns JB_OK, otherwise returns JB_EMPTY + */ +int jb_speakup_has_frames(jb_speakup *jb) +{ + jb_dbg("H"); + if (jb == NULL) { + jb_err("no jb_speakup in jb_speakup_has_frames()\n"); + return JB_NOJB; + } + + if(jb->controlframes || jb->voiceframes) { + return JB_OK; + } else { + return JB_EMPTY; + } +} + + +/*********** + * Put a packet into the jitterbuffers + * Only the timestamps of voicepackets are put in the history + * this because the jitterbuffer only works for voicepackets + * don't put packets twice in history and queue (e.g. transmitting every frame twice) + * keep track of statistics + */ +void jb_speakup_put(jb_speakup *jb, void *data, int type, long ms, long ts, long now, int codec) +{ + long pointer, max_index; + + if (jb == NULL) { + jb_err("no jb_speakup in jb_speakup_put()\n"); + return; + } + + jb->info.frames_received++; + + if (type == JB_TYPE_CONTROL) { + //put the packet into the contol-queue of the jitterbuffer + jb_dbg("pC"); + put_control(jb,data,type,ts); + + } else if (type == JB_TYPE_VOICE) { + // only add voice that aren't already in the buffer + max_index = (jb->hist_pointer < JB_HISTORY_SIZE) ? jb->hist_pointer : JB_HISTORY_SIZE-1; + pointer = find_pointer(&jb->hist_sorted_timestamp[0], max_index, ts); + if (jb->hist_sorted_timestamp[pointer]==ts) { //timestamp already in queue + jb_dbg("pT"); + free(data); + jb->info.frames_dropped_twice++; + } else { //add + jb_dbg("pV"); + /* add voicepacket to history */ + put_history(jb,ts,now,ms,codec); + /*calculate jitterbuffer size*/ + calculate_info(jb, ts, now, codec); + /*put the packet into the queue of the jitterbuffer*/ + put_voice(jb,data,type,ms,ts,codec); + } + + } else if (type == JB_TYPE_SILENCE){ //silence + jb_dbg("pS"); + put_voice(jb,data,type,ms,ts,codec); + + } else {//should NEVER happen + jb_err("jb_speakup_put(): type not known\n"); + free(data); + } +} + + +/*********** + * control frames have a higher priority then voice frames + * returns JB_OK if a frame is available and *data points to the packet + * returns JB_NOFRAME if it's no time to play voice and no control available + * returns JB_INTERP if interpolating is required + * returns JB_EMPTY if no voice frame is in the jitterbuffer (only during silence) + */ +int jb_speakup_get(jb_speakup *jb, void **data, long now, long interpl) +{ + int result; + + jb_dbg("A"); + if (jb == NULL) { + jb_err("no jb_speakup in jb_speakup_get()\n"); + return JB_NOJB; + } + + result = get_control(jb, data); + if (result != JB_OK ) { //no control message available maybe there is voice... + result = get_voice(jb, data, now, interpl); + } + return result; +} + + +/*********** + * set all the settings to default + */ +static void set_default_settings(jb_speakup *jb) +{ + jb->settings.min_jb = JB_MIN_SIZE; + jb->settings.max_jb = JB_MAX_SIZE; + jb->settings.max_successive_interp = JB_MAX_SUCCESSIVE_INTERP; + jb->settings.extra_delay = JB_ALLOW_EXTRA_DELAY; + jb->settings.wait_grow = JB_WAIT_GROW; + jb->settings.wait_shrink = JB_WAIT_SHRINK; + jb->settings.max_diff = JB_MAX_DIFF; +} + + +/*********** + * reset the jitterbuffer so we can start in silence and + * we start with a new history + */ +static void reset(jb_speakup *jb) +{ + jb->hist_pointer = 0; //start over + jb->silence_begin_ts = 0; //no begin_ts defined + jb->info.silence =1; //we always start in silence +} + + +/*********** + * Search algorithm + * @REQUIRE max_index is within array + * + * Find the position of value in hist_sorted_delay + * if value doesn't exist return first pointer where array[low]>value + * int low; //the lowest index being examined + * int max_index; //the highest index being examined + * int mid; //the middle index between low and max_index. + * mid ==(low+max_index)/2 + * at the end low is the position of value or where array[low]>value + */ +static long find_pointer(long *array, long max_index, long value) +{ + long low, mid, high; + low = 0; + high = max_index; + while (low<=high) { + mid= (low+high)/2; + if (array[mid] < value) { + low = mid+1; + } else { + high = mid-1; + } + } + while(low < max_index && (array[low]==array[(low+1)]) ) { + low++; + } + return low; +} + + +/*********** + * free the given frame, afterwards the framepointer is undefined + */ +static void frame_free(jb_speakup_frame *frame) +{ + if (frame->data) { + free(frame->data); + } + free(frame); +} + + +/*********** + * put a nonvoice frame into the nonvoice queue + */ +static void put_control(jb_speakup *jb, void *data, int type, long ts) +{ + jb_speakup_frame *frame, *p; + + frame = malloc(sizeof(jb_speakup_frame)); + if(!frame) { + jb_err("cannot allocate frame\n"); + return; + } + frame->data = data; + frame->ts = ts; + frame->type = type; + frame->next = NULL; + data = NULL;//to avoid stealing memory + + p = jb->controlframes; + if (p) { //there are already control messages + if (ts < p->ts) { + jb->controlframes = frame; + frame->next = p; + } else { + while (p->next && (ts >=p->next->ts)) {//sort on timestamps! so find place to put... + p = p->next; + } + if (p->next) { + frame->next = p->next; + } + p->next = frame; + } + } else { + jb->controlframes = frame; + } +} + + +/*********** + * put a voice or silence frame into the jitterbuffer + */ +static void put_voice(jb_speakup *jb, void *data, int type, long ms, long ts, int codec) +{ + jb_speakup_frame *frame, *p; + frame = malloc(sizeof(jb_speakup_frame)); + if(!frame) { + jb_err("cannot allocate frame\n"); + return; + } + + frame->data = data; + frame->ts = ts; + frame->ms = ms; + frame->type = type; + frame->codec = codec; + + data = NULL; //to avoid stealing the memory location + /* + * frames are a circular list, jb->voiceframes points to to the lowest ts, + * jb->voiceframes->prev points to the highest ts + */ + if(!jb->voiceframes) { /* queue is empty */ + jb->voiceframes = frame; + frame->next = frame; + frame->prev = frame; + } else { + p = jb->voiceframes; + if(ts < p->prev->ts) { //frame is out of order + jb->info.frames_ooo++; + } + if (ts < p->ts) { //frame is lowest, let voiceframes point to it! + jb->voiceframes = frame; + } else { + while(ts < p->prev->ts ) { + p = p->prev; + } + } + frame->next = p; + frame->prev = p->prev; + frame->next->prev = frame; + frame->prev->next = frame; + } +} + + +/*********** + * puts the timestamps of a received packet in the history of *jb + * for later calculations of the size of jitterbuffer *jb. + * + * summary of function: + * - calculate delay difference + * - delete old value from hist & sorted_history_delay & sorted_history_timestamp if needed + * - add new value to history & sorted_history_delay & sorted_history_timestamp + * - we keep sorted_history_delay for calculations + * - we keep sorted_history_timestamp for ensuring each timestamp isn't put twice in the buffer. + */ +static void put_history(jb_speakup *jb, long ts, long now, long ms, int codec) +{ + jb_speakup_hist_element out, in; + long max_index, pointer, location; + + // max_index is the highest possible index + max_index = (jb->hist_pointer < JB_HISTORY_SIZE) ? jb->hist_pointer : JB_HISTORY_SIZE-1; + location = (jb->hist_pointer % JB_HISTORY_SIZE); + + // we want to delete a value from the jitterbuffer + // only when we are through the history. + if (jb->hist_pointer > JB_HISTORY_SIZE-1) { + /* the value we need to delete from sorted histories */ + out = jb->hist[location]; + //delete delay from hist_sorted_delay + pointer = find_pointer(&jb->hist_sorted_delay[0], max_index, out.delay); + /* move over pointer is the position of kicked*/ + if (pointerhist_sorted_delay[pointer]), + &(jb->hist_sorted_delay[pointer+1]), + ((JB_HISTORY_SIZE-(pointer+1)) * sizeof(long)) ); + } + + //delete timestamp from hist_sorted_timestamp + pointer = find_pointer(&jb->hist_sorted_timestamp[0], max_index, out.ts); + /* move over pointer is the position of kicked*/ + if (pointerhist_sorted_timestamp[pointer]), + &(jb->hist_sorted_timestamp[pointer+1]), + ((JB_HISTORY_SIZE-(pointer+1)) * sizeof(long)) ); + } + } + + in.delay = now - ts; //delay of current packet + in.ts = ts; //timestamp of current packet + in.ms = ms; //length of current packet + in.codec = codec; //codec of current packet + + /* adding the new delay to the sorted history + * first special cases: + * - delay is the first history stamp + * - delay > highest history stamp + */ + if (max_index==0 || in.delay >= jb->hist_sorted_delay[max_index-1]) { + jb->hist_sorted_delay[max_index] = in.delay; + } else { + pointer = find_pointer(&jb->hist_sorted_delay[0], (max_index-1), in.delay); + /* move over and add delay */ + memmove( &(jb->hist_sorted_delay[pointer+1]), + &(jb->hist_sorted_delay[pointer]), + ((JB_HISTORY_SIZE-(pointer+1)) * sizeof(long)) ); + jb->hist_sorted_delay[pointer] = in.delay; + } + + /* adding the new timestamp to the sorted history + * first special cases: + * - timestamp is the first history stamp + * - timestamp > highest history stamp + */ + if (max_index==0 || in.ts >= jb->hist_sorted_timestamp[max_index-1]) { + jb->hist_sorted_timestamp[max_index] = in.ts; + } else { + + pointer = find_pointer(&jb->hist_sorted_timestamp[0], (max_index-1), in.ts); + /* move over and add timestamp */ + memmove( &(jb->hist_sorted_timestamp[pointer+1]), + &(jb->hist_sorted_timestamp[pointer]), + ((JB_HISTORY_SIZE-(pointer+1)) * sizeof(long)) ); + jb->hist_sorted_timestamp[pointer] = in.ts; + } + + /* put the jb_speakup_hist_element in the history + * then increase hist_pointer for next time + */ + jb->hist[location] = in; + jb->hist_pointer++; +} + + +/*********** + * this tries to make a jitterbuffer that behaves like + * the jitterbuffer proposed in this article: + * Adaptive Playout Buffer Algorithm for Enhancing Perceived Quality of Streaming Applications + * by: Kouhei Fujimoto & Shingo Ata & Masayuki Murata + * http://www.nal.ics.es.osaka-u.ac.jp/achievements/web2002/pdf/journal/k-fujimo02TSJ-AdaptivePlayoutBuffer.pdf + * + * it calculates jitter and minimum delay + * get the best delay for the specified codec + + */ +static void calculate_info(jb_speakup *jb, long ts, long now, int codec) +{ + long diff, size, max_index, d, d1, d2, n; + float p, p1, p2, A, B; + //size = how many items there in the history + size = (jb->hist_pointer < JB_HISTORY_SIZE) ? jb->hist_pointer : JB_HISTORY_SIZE; + max_index = size-1; + + /* + * the Inter-Quartile Range can be used for estimating jitter + * http://www.slac.stanford.edu/comp/net/wan-mon/tutorial.html#variable + * just take the square root of the iqr for jitter + */ + jb->info.iqr = jb->hist_sorted_delay[max_index*3/4] - jb->hist_sorted_delay[max_index/4]; + + + /* + * The RTP way of calculating jitter. + * This one is used at the moment, although it is not correct. + * But in this way the other side understands us. + */ + diff = now - ts - jb->last_delay; + if (!jb->last_delay) { + diff = 0; //this to make sure we won't get odd jitter due first ts. + } + jb->last_delay = now - ts; + if (diff <0){ + diff = -diff; + } + jb->info.jitter = jb->info.jitter + (diff - jb->info.jitter)/16; + + /* jb->min is minimum delay in hist_sorted_delay, we don't look at the lowest 2% */ + /* because sometimes there are odd delays in there */ + jb->min = jb->hist_sorted_delay[(max_index*2/100)]; + + /* + * calculating the preferred size of the jitterbuffer: + * instead of calculating the optimum delay using the Pareto equation + * I use look at the array of sorted delays and choose my optimum from there + * always walk trough a percentage of the history this because imagine following tail: + * [...., 12, 300, 301 ,302] + * her we want to discard last three but that won't happen if we won't walk the array + * the number of frames we walk depends on how scattered the sorted delays are. + * For that we look at the iqr. The dependencies of the iqr are based on + * tests we've done here in the lab. But are not optimized. + */ + //init: + //the higest delay.. + d = d1= d2 = jb->hist_sorted_delay[max_index]- jb->min; + A=B=LONG_MIN; + p = p2 =0; + n=0; + p1 = 5; //always look at the top 5% + if (jb->info.iqr >200) { //with more jitter look at more delays + p1=25; + } else if (jb->info.iqr >100) { + p1=20; + } else if (jb->info.iqr >50){ + p1=11; + } + + //find the optimum delay.. + while(max_index>10 && (B >= A ||p2 A) { + p = p2; + d = d2; + A = B; + } + d1 = d2; + //find next delay != delay so the same delay isn't calculated twice + //don't look further if we have seen half of the history + while((d2>=d1) && ((n*2)hist_sorted_delay[(max_index-n)] - jb->min; + } + } + //the targeted size of the jitterbuffer + if (jb->settings.min_jb && (jb->settings.min_jb > d) ) { + jb->target = jb->min + jb->settings.min_jb; + } else if (jb->settings.max_jb && (jb->settings.max_jb > d) ){ + jb->target = jb->min + jb->settings.max_jb; + } else { + jb->target = jb->min + d; + } +} + + +/*********** + * if there is a nonvoice frame it will be returned [*data] and the frame + * will be made free + */ +static int get_control(jb_speakup *jb, void **data) +{ + jb_speakup_frame *frame; + int result; + + frame = jb->controlframes; + if (frame) { + jb_dbg("gC"); + *data = frame->data; + frame->data = NULL; + jb->controlframes = frame->next; + frame_free(frame); + result = JB_OK; + } else { + result = JB_NOFRAME; + } + return result; +} + + +/*********** + * returns JB_OK if a frame is available and *data points to the packet + * returns JB_NOFRAME if it's no time to play voice and or no frame available + * returns JB_INTERP if interpolating is required + * returns JB_EMPTY if no voice frame is in the jitterbuffer (only during silence) + * + * if the next frame is a silence frame we will go in silence-mode + * each new instance of the jitterbuffer will start in silence mode + * in silence mode we will set the jitterbuffer to the size we want + * when we are not in silence mode get_voicecase will handle the rest. + */ +static int get_voice(jb_speakup *jb, void **data, long now, long interpl) +{ + jb_speakup_frame *frame; + long diff; + int result; + + diff = jb->target - jb->current; + + //if the next frame is a silence frame, go in silence mode... + if((get_next_frametype(jb, now - jb->current) == JB_TYPE_SILENCE) ) { + jb_dbg("gs"); + frame = get_frame(jb, now - jb->current); + *data = frame->data; + frame->data = NULL; + jb->info.silence =1; + jb->silence_begin_ts = frame->ts; + frame_free(frame); + result = JB_OK; + } else { + if(jb->info.silence) { // we are in silence + /* + * During silence we can set the jitterbuffer size to the size + * we want... + */ + if (diff) { + jb->current = jb->target; + } + frame = get_frame(jb, now - jb->current); + if (frame) { + if (jb->silence_begin_ts && frame->ts < jb->silence_begin_ts) { + jb_dbg("gL"); + /* voice frame is late, next!*/ + jb->info.frames_late++; + frame_free(frame); + result = get_voice(jb, data, now, interpl); + } else { + jb_dbg("gP"); + /* voice frame */ + jb->info.silence = 0; + jb->silence_begin_ts = 0; + jb->next_voice_time = frame->ts + frame->ms; + jb->info.last_voice_ms = frame->ms; + *data = frame->data; + frame->data = NULL; + frame_free(frame); + result = JB_OK; + } + } else { //no frame + jb_dbg("gS"); + result = JB_EMPTY; + } + } else { //voice case + result = get_voicecase(jb,data,now,interpl,diff); + } + } + return result; +} + + +/*********** + * The voicecase has four 'options' + * - difference is way off, reset + * - diff > 0, we may need to grow + * - diff < 0, we may need to shrink + * - everything else + */ +static int get_voicecase(jb_speakup *jb, void **data, long now, long interpl, long diff) +{ + jb_speakup_frame *frame; + int result; + + // * - difference is way off, reset + if (diff > jb->settings.max_diff || -diff > jb->settings.max_diff) { + jb_err("wakko diff in get_voicecase\n"); + reset(jb); //reset hist because the timestamps are wakko. + result = JB_NOFRAME; + //- diff > 0, we may need to grow + } else if ((diff > 0) && + (now > (jb->last_adjustment + jb->settings.wait_grow) + || (now + jb->current + interpl) < get_next_framets(jb) ) ) { //grow + /* first try to grow */ + if (diffcurrent +=diff; + } else { + jb_dbg("aG"); + /* grow by interp frame len */ + jb->current += interpl; + } + jb->last_adjustment = now; + result = get_voice(jb, data, now, interpl); + //- diff < 0, we may need to shrink + } else if ( (diff < 0) + && (now > (jb->last_adjustment + jb->settings.wait_shrink)) + && ((-diff) > jb->settings.extra_delay) ) { + /* now try to shrink + * if there is a frame shrink by frame length + * otherwise shrink by interpl + */ + jb->last_adjustment = now; + + frame = get_frame(jb, now - jb->current); + if(frame) { + jb_dbg("as"); + /* shrink by frame size we're throwing out */ + jb->info.frames_dropped++; + jb->current -= frame->ms; + frame_free(frame); + } else { + jb_dbg("aS"); + /* shrink by interpl */ + jb->current -= interpl; + } + result = get_voice(jb, data, now, interpl); + } else { + /* if it is not the time to play a result = JB_NOFRAME + * else We try to play a frame if a frame is available + * and not late it is played otherwise + * if available it is dropped and the next is tried + * last option is interpolating + */ + if (now - jb->current < jb->next_voice_time) { + jb_dbg("aN"); + result = JB_NOFRAME; + } else { + frame = get_frame(jb, now - jb->current); + if (frame) { //there is a frame + /* voice frame is late */ + if(frame->ts < jb->next_voice_time) { //late + jb_dbg("aL"); + jb->info.frames_late++; + frame_free(frame); + result = get_voice(jb, data, now, interpl); + } else { + jb_dbg("aP"); + /* normal case; return the frame, increment stuff */ + *data = frame->data; + frame->data = NULL; + jb->next_voice_time = frame->ts + frame->ms; + jb->cnt_successive_interp = 0; + frame_free(frame); + result = JB_OK; + } + } else { // no frame, thus interpolate + jb->cnt_successive_interp++; + /* assume silence instead of continuing to interpolate */ + if (jb->settings.max_successive_interp && jb->cnt_successive_interp >= jb->settings.max_successive_interp) { + jb->info.silence = 1; + jb->silence_begin_ts = jb->next_voice_time; + } + jb_dbg("aI"); + jb->next_voice_time += interpl; + result = JB_INTERP; + } + } + } + return result; + +} + + +/*********** + * if there are frames and next frame->ts is smaller or equal ts + * return type of next frame. + * else return 0 + */ +static int get_next_frametype(jb_speakup *jb, long ts) +{ + jb_speakup_frame *frame; + int result; + + result = 0; + frame = jb->voiceframes; + if (frame && frame->ts <= ts) { + result = frame->type; + } + return result; +} + + +/*********** + * returns ts from next frame in jb->voiceframes + * or returns LONG_MAX if there is no frame + */ +static long get_next_framets(jb_speakup *jb) +{ + if (jb->voiceframes) { + return jb->voiceframes->ts; + } + return LONG_MAX; +} + + +/*********** + * if there is a frame in jb->voiceframes and + * has a timestamp smaller/equal to ts + * this frame will be returned and + * removed from the queue + */ +static jb_speakup_frame *get_frame(jb_speakup *jb, long ts) +{ + jb_speakup_frame *frame; + + frame = jb->voiceframes; + if (frame && frame->ts <= ts) { + if(frame->next == frame) { + jb->voiceframes = NULL; + } else { + /* remove this frame */ + frame->prev->next = frame->next; + frame->next->prev = frame->prev; + jb->voiceframes = frame->next; + } + return frame; + } + return NULL; +} + +/*********** + * if there is a frame in jb->voiceframes + * this frame will be unconditionally returned and + * removed from the queue + */ +static jb_speakup_frame *get_all_frames(jb_speakup *jb) +{ + jb_speakup_frame *frame; + + frame = jb->voiceframes; + if (frame) { + if(frame->next == frame) { + jb->voiceframes = NULL; + } else { + /* remove this frame */ + frame->prev->next = frame->next; + frame->next->prev = frame->prev; + jb->voiceframes = frame->next; + } + return frame; + } + return NULL; +} + + +//EOF diff --git a/libs/jitterbuffer/src/jb_speakup.h b/libs/jitterbuffer/src/jb_speakup.h new file mode 100644 index 0000000000..6ff4853d07 --- /dev/null +++ b/libs/jitterbuffer/src/jb_speakup.h @@ -0,0 +1,322 @@ +/******************************************************* + * jb_speakup: + * an application-independent jitterbuffer, which tries + * to achieve the maximum user perception during a call. + * For more information look at: + * http://www.speakup.nl/opensource/jitterbuffer/ + * + * Copyright on this file is held by: + * - Jesse Kaijen + * - SpeakUp + * + * Contributors: + * Jesse Kaijen + * + * Version: 1.1 + * + * Changelog: + * See jb_speakup.c + * + * This program is free software, distributed under the terms of: + * - the GNU Lesser (Library) General Public License + * - the Mozilla Public License + * + * if you are interested in an different licence type, please contact us. + * + * How to use the jitterbuffer, please look at the comments + * in the headerfile. + * + * Further details on specific implementations, + * please look at the comments in the code file. + */ + +#ifndef _JB_SPEAKUP_H_ +#define _JB_SPEAKUP_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/*********** + * The header file consists of four parts. + * - configuration constants, structs and parameter definitions + * - functions + * - How to use the jitterbuffer and + * which responsibilities do YOU have + * - debug messages explained + */ + + +// configuration constants +/* Number of historical timestamps to use in calculating jitter and jitterbuffer size */ #define JB_HISTORY_SIZE 500 +/* minimum jitterbuffer size, disabled if 0 */ #define JB_MIN_SIZE 0 +/* maximum jitterbuffer size, disabled if 0 */ #define JB_MAX_SIZE 0 + /* maximum successive interpolating frames, disabled if 0 */ #define JB_MAX_SUCCESSIVE_INTERP 0 +/* amount of extra delay allowed before shrinking */ +#define JB_ALLOW_EXTRA_DELAY 30 +/* ms between growing */ +#define JB_WAIT_GROW 60 +/* ms between shrinking */ +#define JB_WAIT_SHRINK 250 +/* ms that the JB max may be off */ +#define JB_MAX_DIFF 6000 //in a RTP stream the max_diff may be 3000 packets (most packets are 20ms) + +//structs +typedef struct jb_speakup_info { + long frames_received; /* Number of frames received by the jitterbuffer */ + long frames_late; /* Number of frames that were late */ + long frames_lost; /* Number of frames that were lost */ + long frames_ooo; /* Number of frames that were Out Of Order */ + long frames_dropped; /* Number of frames that were dropped due shrinkage of the jitterbuffer */ + long frames_dropped_twice; /* Number of frames that were dropped because this timestamp was already in the jitterbuffer */ + + long delay; /* Current delay due the jitterbuffer */ + long jitter; /* jitter measured within current history interval*/ + long losspct; /* recent lost frame percentage (network and jitterbuffer loss) */ + + long delay_target; /* The delay where we want to grow to */ + long losspct_jb; /* recent lost percentage due the jitterbuffer */ + long last_voice_ms; /* the duration of the last voice frame */ + short silence; /* If we are in silence 1-yes 0-no */ + long iqr; /* Inter Quartile Range of current history, if the squareroot is taken it is a good estimate of jitter */ +} jb_speakup_info; + +typedef struct jb_speakup_frame { + void *data; /* the frame data */ + long ts; /* the senders timestamp */ + long ms; /* length of this frame in ms */ + int type; /* the type of frame */ + int codec; /* codec of this frame, undefined if nonvoice */ + struct jb_speakup_frame *next, *prev; /* pointers to the next and previous frames in the queue */ +} jb_speakup_frame; + +typedef struct jb_speakup_hist_element{ + long delay; /* difference between time of arrival and senders timestamp */ + long ts; /* senders timestamp */ + long ms; /* length of this frame in ms */ + int codec; /* wich codec this frame has */ +} jb_hist_element; //this is a private element + +typedef struct jb_speakup_settings { + /* settings */ + long min_jb; /* defines a hard clamp to use in setting the jitterbuffer delay */ + long max_jb; /* defines a hard clamp to use in setting the jitterbuffer delay */ + long max_successive_interp; /* the maximum count of successive interpolations before assuming silence */ + long extra_delay; /* amount of extra delay allowed before shrinking */ + long wait_grow; /* ms between growing */ + long wait_shrink; /* ms between shrinking */ + long max_diff; /* maximum number of milliseconds the jitterbuffer may be off */ +} jb_speakup_settings; + +typedef struct jb_speakup { + struct jb_speakup_hist_elementhist[JB_HISTORY_SIZE]; /* the history of the last received frames */ + long hist_sorted_delay[JB_HISTORY_SIZE]; /* a sorted buffer of the delays (lowest first) */ + long hist_sorted_timestamp[JB_HISTORY_SIZE]; /* a sorted buffer of the timestamps (lowest first) */ + + int hist_pointer; /* points to index in history for next entry */ + long last_adjustment; /* the time of the last adjustment (growing or shrinking) */ + long next_voice_time; /* the next ts is to be read from the jb (senders timestamp) */ + long cnt_successive_interp; /* the count of consecutive interpolation frames */ + long silence_begin_ts; /* the time of the last CNG frame, when in silence */ + long min; /* the clock difference within current history interval */ + long current; /* the present jitterbuffer adjustment */ + long target; /* the target jitterbuffer adjustment */ + long last_delay; /* the delay of the last packet, used for calc. jitter */ + + jb_speakup_frame *voiceframes; /* queued voiceframes */ + jb_speakup_frame *controlframes; /* queued controlframes */ + jb_speakup_settings settings; /* the settings of the jitterbuffer */ + jb_speakup_info info; /* the statistics of the jitterbuffer */ +} jb_speakup; + +//parameter definitions +/* return codes */ +#define JB_OK 0 +#define JB_EMPTY 1 +#define JB_NOFRAME 2 +#define JB_INTERP 3 +#define JB_NOJB 4 + + +/* frame types */ +#define JB_TYPE_CONTROL 1 +#define JB_TYPE_VOICE 2 +#define JB_TYPE_SILENCE 3 + +/* the jitterbuffer behaives different for each codec. + * The codecs are defined like the iana codes specified in RFC3551 + * The codecs that aren't specified in RFC3551 + * I took numbers in the range 1000 - 1100 + * default is g711x behaiviour */ +#define JB_CODEC_PCMU 0 //use this one if you have no PLC +#define JB_CODEC_PCMA 8 //use this one if you have no PLC +#define JB_CODEC_G723 4 +#define JB_CODEC_G729 18 + +#define JB_CODEC_PCMU_PLC 1000 //use this one if you have PLC enabled +#define JB_CODEC_PCMA_PLC 1008 //use this one if you have PLC enabled +#define JB_CODEC_GSM_EFR 1003 //This is GSM-Enhanced Full Rate and not normal GSM + +#define JB_CODEC_OTHER 1100 //Unknown codec for us, we use the g711 alg. + + +/* + * Creates a new jitterbuffer and sets the default settings. + * Always use this function for creating a new jitterbuffer. + */ +jb_speakup *jb_speakup_new(); + +/* + * The control frames and possible personal settings are kept. + * History and voice/silence frames are destroyed. + */ +void jb_speakup_reset(jb_speakup *jb); + +/* + * Resets the jitterbuffer totally, all the control/voice/silence frames are destroyed + * default settings are put as well. + */ +void jb_speakup_reset_all(jb_speakup *jb); + +/* + * Destroy the jitterbuffer and any frame within. + * Always use this function for destroying a jitterbuffer, + * otherwise there is a chance of memory leaking. + */ +void jb_speakup_destroy(jb_speakup *jb); + +/* + * Define your own settings for the jitterbuffer. Only settings !=0 + * are put in the jitterbuffer. + */ +void jb_speakup_set_settings(jb_speakup *jb, jb_speakup_settings *settings); + +/* + * Get the statistics for the jitterbuffer. + * Copying the statistics directly for the jitterbuffer won't work because + * The statistics are only calculated when calling this function. + */ +void jb_speakup_get_info(jb_speakup *jb, jb_speakup_info *stats); + +/* + * Get the current settings of the jitterbuffer. + */ +void jb_speakup_get_settings(jb_speakup *jb, jb_speakup_settings *settings); + +/* + * Gives an estimation of the MOS of a call given the + * packetloss p, delay d, and wich codec is used. + * The assumption is made that the echo cancelation is around 37dB. + */ +float jb_speakup_guess_mos(float p, long d, int codec); + +/* + * returns JB_OK if there are still frames left in the jitterbuffer + * otherwise JB_EMPTY is returned. + */ +int jb_speakup_has_frames(jb_speakup *jb); + +/* + * put a packet(frame) into the jitterbuffer. + * *data - points to the packet + * type - type of packet, JB_CONTROL|JB_VOICE|JB_SILENCE + * ms - duration of frame (only voice) + * ts - timestamp sender + * now - current timestamp (timestamp of arrival) + * codec - which codec the frame holds (only voice), if not defined, g711x will be used + * + * if type==control @REQUIRE: *data, type, ts, now + * if type==voice @REQUIRE: *data, type, ms, ts, now @OPTIONAL: codec + * if type==silence @REQUIRE: *data, type, ts, now + * on return *data is undefined + */ +void jb_speakup_put(jb_speakup *jb, void *data, int type, long ms, long ts, long now, int codec); + +/* + * Get a packet from the jitterbuffer if it's available. + * control packets have a higher priority above voice and silence packets + * they are always delivered as fast as possible. The delay of the jitterbuffer + * doesn't work for these packets. + * @REQUIRE 1settings->extra_delay (=default JB_ALLOW_EXTRA_DELAY) + * + * return will be: + * JB_OK, *data points to the packet + * JB_INTERP, please interpolate for interpl milliseconds + * JB_NOFRAME, no frame scheduled + * JB_EMPTY, the jitterbuffer is empty + */ +int jb_speakup_get(jb_speakup *jb, void **data, long now, long interpl); + +/* debug functions */ +typedef void (*jb_output_function_t)(const char *fmt, ...); +void jb_speakup_setoutput(jb_output_function_t warn, jb_output_function_t err, jb_output_function_t dbg); + + +/******************************* + * The use of the jitterbuffer * + ******************************* + * Always create a new jitterbuffer with jb_speakup_new(). + * Always destroy a jitterbuffer with jb_speakup_destroy(). + * + * There is no lock(mutex) mechanism, that your responsibility. + * The reason for this is that different environments require + * different ways of implementing a lock. + * + * The following functions require a lock on the jitterbuffer: + * jb_speakup_reset(), jb_speakup_reset_all(), jb_speakup_destroy(), jb_speakup_set_settings(), + * jb_speakup_get_info(), jb_speakup_get_settings(), jb_speakup_has_frames(), jb_speakup_put(), + * jb_speakup_get() + * + * The following functions do NOT require a lock on the jitterbuffer: + * jb_speakup_new(), jb_speakup_guess_mos() + * + * Since control packets have a higher priority above any other packet + * a call may already be ended while there is audio left to play. We + * advice that you poll the jitterbuffer if there are frames left. + * + * If the audiopath is oneway (eg. voicemailbox) and the latency doesn't + * matter, we advice to set a minimum jitterbuffer size. Then there is + * less loss and the quality is better. + */ + + +/**************************** + * debug messages explained * + **************************** + * N - jb_speakup_new() + * R - jb_speakup_reset() + * r - jb_speakup_reset_all() + * D - jb_speakup_destroy() + * S - jb_speakup_set_settings() + * H - jb_speakup_has_frames() + * I - jb_speakup_get_info() + * S - jb_speakup_get_settings() + * pC - jb_speakup_put() put Control packet + * pT - jb_speakup_put() Timestamp was already in the queue + * pV - jb_speakup_put() put Voice packet + * pS - jb_speakup_put() put Silence packet + * + * A - jb_speakup_get() + * // below are all the possible debug info when trying to get a packet + * gC - get_control() - there is a control message + * gs - get_voice() - there is a silence frame + * gS - get_voice() - we are in silence + * gL - get_voice() - are in silence, frame is late + * gP - get_voice() - are in silence, play frame (end of silence) + * ag - get_voicecase() - grow little bit (diff < interpl/2) + * aG - get_voicecase() - grow interpl + * as - get_voicecase() - shrink by voiceframe we throw out + * aS - get_voicecase() - shrink by interpl + * aN - get_voicecase() - no time yet + * aL - get_voicecase() - frame is late + * aP - get_voicecase() - play frame + * aI - get_voicecase() - interpolate + */ + +#ifdef __cplusplus +} +#endif + + +#endif +