freeswitch/libs/ilbc/src/enhancer.c

/*
 * iLBC - a library for the iLBC codec
 *
 * enhancer.c - The iLBC low bit rate speech codec.
 *
 * Adapted by Steve Underwood <steveu@coppice.org> from the reference
 * iLBC code supplied in RFC3951.
 *
 * Original code Copyright (C) The Internet Society (2004).
 * All changes to produce this version Copyright (C) 2008 by Steve Underwood
 * All Rights Reserved.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * $Id: enhancer.c,v 1.2 2008/03/06 12:27:38 steveu Exp $
 */

/*! \file */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <inttypes.h>
#include <math.h>
#include <string.h>

#include "ilbc.h"
#include "constants.h"
#include "filter.h"
#include "enhancer.h"

/*----------------------------------------------------------------*
 * Find index in array such that the array element with said
 * index is the element of said array closest to "value"
 * according to the squared-error criterion
 *---------------------------------------------------------------*/

static void NearestNeighbor(int *index,            /* (o) index of array element closest to value */
                            const float *array,    /* (i) data array */
                            float value,           /* (i) value */
                            int arlength)          /* (i) dimension of data array */
{
    int i;
    float bestcrit;
    float crit;

    crit = array[0] - value;
    bestcrit = crit*crit;
    *index = 0;
    for (i = 1;  i < arlength;  i++)
    {
        crit = array[i] - value;
        crit = crit*crit;

        if (crit < bestcrit)
        {
            bestcrit= crit;
            *index = i;
        }
    }
}

/*----------------------------------------------------------------*
 * compute cross correlation between sequences
 *---------------------------------------------------------------*/

static void mycorr1(float *corr,       /* (o) correlation of seq1 and seq2 */
                    float *seq1,       /* (i) first sequence */
                    int dim1,          /* (i) dimension first seq1 */
                    const float *seq2, /* (i) second sequence */
                    int dim2)          /* (i) dimension seq2 */
{
    int i;
    int j;

    for (i = 0;  i <= dim1 - dim2;  i++)
    {
        corr[i] = 0.0f;
        for (j = 0;  j < dim2;  j++)
            corr[i] += seq1[i + j]*seq2[j];
    }
}

/*----------------------------------------------------------------*
 * upsample finite array assuming zeros outside bounds
 *---------------------------------------------------------------*/

static void enh_upsample(float *useq1,     /* (o) upsampled output sequence */
                         float *seq1,      /* (i) unupsampled sequence */
                         int dim1,         /* (i) dimension seq1 */
                         int hfl)          /* (i) polyphase filter length=2*hfl+1 */
{
    float *pu;
    float *ps;
    int i;
    int j;
    int k;
    int q;
    int filterlength;
    int hfl2;
    const float *polyp[ENH_UPS0]; /* pointers to polyphase columns */
    const float *pp;

    /* define pointers for filter */

    filterlength = 2*hfl + 1;

    if (filterlength > dim1)
    {
        hfl2 = (int) (dim1/2);
        for (j = 0;  j < ENH_UPS0;  j++)
            polyp[j] = polyphaserTbl + j*filterlength + hfl - hfl2;
        hfl = hfl2;
        filterlength = 2*hfl + 1;
    }
    else
    {
        for (j = 0;  j < ENH_UPS0;  j++)
            polyp[j] = polyphaserTbl+j*filterlength;
    }

    /* filtering: filter overhangs left side of sequence */
    pu = useq1;
    for (i = hfl;  i < filterlength;  i++)
    {
        for (j = 0;  j < ENH_UPS0;  j++)
        {
            *pu = 0.0f;
            pp = polyp[j];
            ps = seq1 + i;
            for (k = 0;  k <= i;  k++)
                *pu += *ps-- * *pp++;
            pu++;
        }
    }

    /* filtering: simple convolution=inner products */
    for (i = filterlength;  i < dim1;  i++)
    {
        for (j = 0;  j < ENH_UPS0;  j++)
        {
            *pu = 0.0f;
            pp = polyp[j];
            ps = seq1 + i;
            for (k = 0;  k < filterlength;  k++)
                *pu += *ps-- * *pp++;
            pu++;
        }
    }

    /* filtering: filter overhangs right side of sequence */
    for (q = 1;  q <= hfl;  q++)
    {
        for (j = 0;  j < ENH_UPS0;  j++)
        {
            *pu = 0.0f;
            pp = polyp[j] + q;
            ps = seq1 + dim1 - 1;
            for (k = 0;  k < filterlength - q;  k++)
                *pu += *ps-- * *pp++;
            pu++;
        }
    }
}

/*----------------------------------------------------------------*
 * find segment starting near idata+estSegPos that has highest
 * correlation with idata+centerStartPos through
 * idata+centerStartPos+ENH_BLOCKL-1 segment is found at a
 * resolution of ENH_UPSO times the original of the original
 * sampling rate
 *---------------------------------------------------------------*/

static void refiner(float *seg,            /* (o) segment array */
                    float *updStartPos,    /* (o) updated start point */
                    float *idata,          /* (i) original data buffer */
                    int idatal,            /* (i) dimension of idata */
                    int centerStartPos,    /* (i) beginning center segment */
                    float estSegPos,       /* (i) estimated beginning other segment */
                    float period)          /* (i) estimated pitch period */
{
    int estSegPosRounded;
    int searchSegStartPos;
    int searchSegEndPos;
    int corrdim;
    int tloc;
    int tloc2;
    int i;
    int st;
    int en;
    int fraction;
    float vect[ENH_VECTL];
    float corrVec[ENH_CORRDIM];
    float maxv;
    float corrVecUps[ENH_CORRDIM*ENH_UPS0];

    period = period;

    /* defining array bounds */
    estSegPosRounded = (int)(estSegPos - 0.5f);

    searchSegStartPos = estSegPosRounded - ENH_SLOP;

    if (searchSegStartPos < 0)
        searchSegStartPos = 0;
    searchSegEndPos = estSegPosRounded + ENH_SLOP;

    if (searchSegEndPos + ENH_BLOCKL >= idatal)
        searchSegEndPos = idatal - ENH_BLOCKL - 1;
    corrdim = searchSegEndPos - searchSegStartPos + 1;

    /* compute upsampled correlation (corr33) and find location of max */
    mycorr1(corrVec, idata + searchSegStartPos, corrdim + ENH_BLOCKL - 1, idata + centerStartPos, ENH_BLOCKL);
    enh_upsample(corrVecUps, corrVec, corrdim, ENH_FL0);
    tloc = 0;
    maxv = corrVecUps[0];
    for (i = 1;  i < ENH_UPS0*corrdim;  i++)
    {
        if (corrVecUps[i]>maxv)
        {
            tloc = i;
            maxv = corrVecUps[i];
        }
    }

    /* make vector can be upsampled without ever running outside bounds */
    *updStartPos = (float) searchSegStartPos + (float) tloc/(float) ENH_UPS0 + 1.0f;
    tloc2 = (int) (tloc/ENH_UPS0);

    if (tloc > tloc2*ENH_UPS0)
        tloc2++;
    st = searchSegStartPos + tloc2 - ENH_FL0;

    if (st < 0)
    {
        memset(vect, 0, -st*sizeof(float));
        memcpy(&vect[-st], idata, (ENH_VECTL + st)*sizeof(float));
    }
    else
    {
        en = st + ENH_VECTL;

        if (en>idatal)
        {
            memcpy(vect, &idata[st], (ENH_VECTL - (en - idatal))*sizeof(float));
            memset(&vect[ENH_VECTL - (en - idatal)], 0, (en - idatal)*sizeof(float));
        }
        else
        {
            memcpy(vect, &idata[st], ENH_VECTL*sizeof(float));
        }
    }
    fraction = tloc2*ENH_UPS0 - tloc;

    /* compute the segment (this is actually a convolution) */
    mycorr1(seg, vect, ENH_VECTL, polyphaserTbl + (2*ENH_FL0 + 1)*fraction, 2*ENH_FL0 + 1);
}

/*----------------------------------------------------------------*
 * find the smoothed output data
 *---------------------------------------------------------------*/

static void smath(float *odata,    /* (o) smoothed output */
                  float *sseq,     /* (i) said second sequence of waveforms */
                  int hl,          /* (i) 2*hl+1 is sseq dimension */
                  float alpha0)    /* (i) max smoothing energy fraction */
{
    int i;
    int k;
    float w00;
    float w10;
    float w11;
    float A;
    float B;
    float C;
    float *psseq;
    float err,errs;
    float surround[ILBC_BLOCK_LEN_MAX]; /* shape contributed by other than current */
    float wt[2*ENH_HL + 1];     /* waveform weighting to get surround shape */
    float denom;

    /* create shape of contribution from all waveforms except the
       current one */

    for (i = 1;  i <= 2*hl + 1;  i++)
        wt[i - 1] = 0.5f*(1.0f - cosf(2.0f*PI*i/(2.0f*hl + 2.0f)));
    wt[hl] = 0.0f; /* for clarity, not used */
    for (i = 0;  i < ENH_BLOCKL;  i++)
        surround[i] = sseq[i]*wt[0];

    for (k = 1;  k < hl;  k++)
    {
        psseq = sseq + k*ENH_BLOCKL;
        for (i = 0;  i < ENH_BLOCKL;  i++)
            surround[i] += psseq[i]*wt[k];
    }
    for (k = hl + 1;  k <= 2*hl;  k++)
    {
        psseq = sseq + k*ENH_BLOCKL;
        for (i = 0;  i < ENH_BLOCKL;  i++)
            surround[i] += psseq[i]*wt[k];
    }

    /* compute some inner products */
    w00 =
    w10 =
    w11 = 0.0f;
    psseq = sseq + hl*ENH_BLOCKL; /* current block  */
    for (i = 0;  i < ENH_BLOCKL;  i++)
    {
        w00 += psseq[i]*psseq[i];
        w11 += surround[i]*surround[i];
        w10 += surround[i]*psseq[i];
    }

    if (fabsf(w11) < 1.0f)
        w11 = 1.0f;
    C = sqrtf(w00/w11);

    /* first try enhancement without power-constraint */
    errs = 0.0f;
    psseq = sseq + hl*ENH_BLOCKL;
    for (i = 0;  i < ENH_BLOCKL;  i++)
    {
        odata[i] = C*surround[i];
        err = psseq[i] - odata[i];
        errs += err*err;
    }

    /* if constraint violated by first try, add constraint */
    if (errs > alpha0 * w00)
    {
        if (w00 < 1)
            w00 = 1;
        denom = (w11*w00 - w10*w10)/(w00*w00);

        if (denom > 0.0001f)
        {
            /* eliminates numerical problems for if smooth */
            A = sqrtf((alpha0 - alpha0*alpha0/4)/denom);
            B = -alpha0/2.0f - A*w10/w00;
            B = B + 1.0f;
        }
        else
        {
            /* essentially no difference between cycles; smoothing not needed */
            A = 0.0f;
            B = 1.0f;
        }

        /* create smoothed sequence */
        psseq = sseq + hl*ENH_BLOCKL;
        for (i = 0;  i < ENH_BLOCKL;  i++)
            odata[i] = A*surround[i] + B*psseq[i];
    }
}

/*----------------------------------------------------------------*
 * get the pitch-synchronous sample sequence
 *---------------------------------------------------------------*/

static void getsseq(float *sseq,           /* (o) the pitch-synchronous sequence */
                    float *idata,          /* (i) original data */
                    int idatal,            /* (i) dimension of data */
                    int centerStartPos,    /* (i) where current block starts */
                    float *period,         /* (i) rough-pitch-period array */
                    const float *plocs,    /* (i) where periods of period array are taken */
                    int periodl,           /* (i) dimension period array */
                    int hl)                /* (i) 2*hl+1 is the number of sequences */
{
    int i;
    int centerEndPos;
    int q;
    float blockStartPos[2*ENH_HL + 1];
    int lagBlock[2*ENH_HL + 1];
    float plocs2[ENH_PLOCSL];
    float *psseq;

    centerEndPos = centerStartPos + ENH_BLOCKL - 1;

    /* present */
    NearestNeighbor(lagBlock + hl,
                    plocs,
                    0.5f*(centerStartPos + centerEndPos),
                    periodl);

    blockStartPos[hl] = (float) centerStartPos;

    psseq = sseq + ENH_BLOCKL*hl;
    memcpy(psseq, idata + centerStartPos, ENH_BLOCKL*sizeof(float));

    /* past */
    for (q = hl - 1;  q >= 0;  q--)
    {
        blockStartPos[q] = blockStartPos[q + 1] - period[lagBlock[q + 1]];
        NearestNeighbor(lagBlock + q,
                        plocs,
                        blockStartPos[q] + ENH_BLOCKL_HALF - period[lagBlock[q + 1]],
                        periodl);

        if (blockStartPos[q] - ENH_OVERHANG >= 0)
        {
            refiner(sseq + q*ENH_BLOCKL,
                    blockStartPos + q,
                    idata,
                    idatal,
                    centerStartPos,
                    blockStartPos[q],
                    period[lagBlock[q + 1]]);
        }
        else
        {
            psseq = sseq + q*ENH_BLOCKL;
            memset(psseq, 0, ENH_BLOCKL*sizeof(float));
        }
    }

    /* future */
    for (i = 0;  i < periodl;  i++)
        plocs2[i] = plocs[i] - period[i];
    for (q = hl + 1;  q <= 2*hl;  q++)
    {
        NearestNeighbor(lagBlock + q, plocs2, blockStartPos[q - 1] + ENH_BLOCKL_HALF, periodl);

        blockStartPos[q] = blockStartPos[q - 1] + period[lagBlock[q]];
        if (blockStartPos[q] + ENH_BLOCKL + ENH_OVERHANG < idatal)
        {
            refiner(sseq + ENH_BLOCKL*q,
                    blockStartPos + q,
                    idata,
                    idatal,
                    centerStartPos,
                    blockStartPos[q],
                    period[lagBlock[q]]);
        }
        else
        {
            psseq = sseq + q*ENH_BLOCKL;
            memset(psseq, 0, ENH_BLOCKL*sizeof(float));
        }
    }
}

/*----------------------------------------------------------------*
 * perform enhancement on idata+centerStartPos through
 * idata+centerStartPos+ENH_BLOCKL-1
 *---------------------------------------------------------------*/

static void enhancer(float *odata,         /* (o) smoothed block, dimension blockl */
                     float *idata,         /* (i) data buffer used for enhancing */
                     int idatal,           /* (i) dimension idata */
                     int centerStartPos,   /* (i) first sample current block within idata */
                     float alpha0,         /* (i) max correction-energy-fraction (in [0,1]) */
                     float *period,        /* (i) pitch period array */
                     const float *plocs,   /* (i) locations where period array values valid */
                     int periodl)          /* (i) dimension of period and plocs */
{
    float sseq[(2*ENH_HL + 1)*ENH_BLOCKL];

    /* get said second sequence of segments */
    getsseq(sseq, idata, idatal, centerStartPos, period, plocs, periodl, ENH_HL);

    /* compute the smoothed output from said second sequence */
    smath(odata, sseq, ENH_HL, alpha0);
}

/*----------------------------------------------------------------*
 * cross correlation
 *---------------------------------------------------------------*/

float xCorrCoef(float *target,      /* (i) first array */
                float *regressor,   /* (i) second array */
                int subl)           /* (i) dimension arrays */
{
    int i;
    float ftmp1;
    float ftmp2;

    ftmp1 = 0.0f;
    ftmp2 = 0.0f;
    for (i = 0;  i < subl;  i++)
    {
        ftmp1 += target[i]*regressor[i];
        ftmp2 += regressor[i]*regressor[i];
    }

    if (ftmp1 > 0.0f)
        return (float)(ftmp1*ftmp1/ftmp2);
    return 0.0f;
}

/*----------------------------------------------------------------*
 * interface for enhancer
 *---------------------------------------------------------------*/

int enhancerInterface(float *out,                         /* (o) enhanced signal */
                      float *in,                          /* (i) unenhanced signal */
                      ilbc_decode_state_t *iLBCdec_inst)  /* (i) buffers etc */
{
    float *enh_buf;
    float *enh_period;
    int iblock;
    int isample;
    int lag = 0;
    int ilag;
    int i;
    int ioffset;
    float cc;
    float maxcc;
    float ftmp1;
    float ftmp2;
    float *inPtr;
    float *enh_bufPtr1;
    float *enh_bufPtr2;
    float plc_pred[ENH_BLOCKL];

    float lpState[6];
    float downsampled[(ENH_NBLOCKS*ENH_BLOCKL + 120)/2];
    int inLen = ENH_NBLOCKS*ENH_BLOCKL + 120;
    int start;
    int plc_blockl;
    int inlag;

    enh_buf = iLBCdec_inst->enh_buf;
    enh_period = iLBCdec_inst->enh_period;

    memmove(enh_buf,
            &enh_buf[iLBCdec_inst->blockl],
            (ENH_BUFL - iLBCdec_inst->blockl)*sizeof(float));

    memcpy(&enh_buf[ENH_BUFL - iLBCdec_inst->blockl],
           in,
           iLBCdec_inst->blockl*sizeof(float));

    if (iLBCdec_inst->mode == 30)
        plc_blockl = ENH_BLOCKL;
    else
        plc_blockl = 40;

    /* when 20 ms frame, move processing one block */
    ioffset = 0;
    if (iLBCdec_inst->mode == 20)
        ioffset = 1;

    i = 3 - ioffset;
    memmove(enh_period, &enh_period[i], (ENH_NBLOCKS_TOT - i)*sizeof(float));

    /* Set state information to the 6 samples right before
       the samples to be downsampled. */

    memcpy(lpState,
           enh_buf + (ENH_NBLOCKS_EXTRA + ioffset)*ENH_BLOCKL - 126,
           6*sizeof(float));

    /* Down sample a factor 2 to save computations */
    DownSample(enh_buf + (ENH_NBLOCKS_EXTRA + ioffset)*ENH_BLOCKL - 120,
               lpFilt_coefsTbl,
               inLen - ioffset*ENH_BLOCKL,
               lpState,
               downsampled);

    /* Estimate the pitch in the down sampled domain. */
    for (iblock = 0; iblock<ENH_NBLOCKS-ioffset; iblock++)
    {
        lag = 10;
        maxcc = xCorrCoef(downsampled + 60 + iblock*ENH_BLOCKL_HALF,
                          downsampled + 60 + iblock*ENH_BLOCKL_HALF - lag,
                          ENH_BLOCKL_HALF);
        for (ilag = 11;  ilag < 60;  ilag++)
        {
            cc = xCorrCoef(downsampled + 60 + iblock*ENH_BLOCKL_HALF,
                           downsampled + 60 + iblock*ENH_BLOCKL_HALF - ilag,
                           ENH_BLOCKL_HALF);
            if (cc > maxcc)
            {
                maxcc = cc;
                lag = ilag;
            }
        }

        /* Store the estimated lag in the non-downsampled domain */
        enh_period[iblock + ENH_NBLOCKS_EXTRA + ioffset] = (float) lag*2.0f;
    }
    /* PLC was performed on the previous packet */
    if (iLBCdec_inst->prev_enh_pl == 1)
    {
        inlag = (int) enh_period[ENH_NBLOCKS_EXTRA + ioffset];

        lag = inlag - 1;
        maxcc = xCorrCoef(in, in + lag, plc_blockl);
        for (ilag = inlag;  ilag <= inlag + 1;  ilag++)
        {
            cc = xCorrCoef(in, in + ilag, plc_blockl);
            if (cc > maxcc)
            {
                maxcc = cc;
                lag = ilag;
            }
        }

        enh_period[ENH_NBLOCKS_EXTRA + ioffset - 1] = (float) lag;

        /* compute new concealed residual for the old lookahead,
           mix the forward PLC with a backward PLC from
           the new frame */

        inPtr = &in[lag - 1];

        enh_bufPtr1 = &plc_pred[plc_blockl - 1];

        if (lag > plc_blockl)
            start = plc_blockl;
        else
            start = lag;

        for (isample = start;  isample > 0;  isample--)
            *enh_bufPtr1-- = *inPtr--;

        enh_bufPtr2=&enh_buf[ENH_BUFL - 1 - iLBCdec_inst->blockl];
        for (isample = (plc_blockl - 1 - lag);  isample >= 0;  isample--)
            *enh_bufPtr1-- = *enh_bufPtr2--;

        /* limit energy change */
        ftmp2 = 0.0f;
        ftmp1 = 0.0f;
        for (i = 0;  i < plc_blockl;  i++)
        {
            ftmp2 += enh_buf[ENH_BUFL - 1 - iLBCdec_inst->blockl - i]*enh_buf[ENH_BUFL - 1 - iLBCdec_inst->blockl - i];
            ftmp1 += plc_pred[i]*plc_pred[i];
        }
        ftmp1 = sqrtf(ftmp1/(float) plc_blockl);
        ftmp2 = sqrtf(ftmp2/(float) plc_blockl);
        if (ftmp1 > 2.0f*ftmp2  &&  ftmp1 > 0.0f)
        {
            for (i = 0;  i < plc_blockl - 10;  i++)
                plc_pred[i] *= 2.0f*ftmp2/ftmp1;
            for (i = plc_blockl - 10;  i < plc_blockl;  i++)
                plc_pred[i]*=(float)(i - plc_blockl + 10)*(1.0f - 2.0f*ftmp2/ftmp1)/10.0f + 2.0f*ftmp2/ftmp1;
        }

        enh_bufPtr1 = &enh_buf[ENH_BUFL - 1 - iLBCdec_inst->blockl];
        for (i = 0;  i < plc_blockl;  i++)
        {
            ftmp1 = (float) (i + 1)/(float) (plc_blockl + 1);
            *enh_bufPtr1 *= ftmp1;
            *enh_bufPtr1 += (1.0f - ftmp1)*plc_pred[plc_blockl - 1 - i];
            enh_bufPtr1--;
        }
    }

    if (iLBCdec_inst->mode == 20)
    {
        /* Enhancer with 40 samples delay */
        for (iblock = 0;  iblock < 2;  iblock++)
        {
            enhancer(out + iblock*ENH_BLOCKL,
                     enh_buf,
                     ENH_BUFL,
                     (5 + iblock)*ENH_BLOCKL + 40,
                     ENH_ALPHA0,
                     enh_period,
                     enh_plocsTbl,
                     ENH_NBLOCKS_TOT);
        }
    }
    else if (iLBCdec_inst->mode == 30)
    {
        /* Enhancer with 80 samples delay */
        for (iblock = 0;  iblock < 3;  iblock++)
        {
            enhancer(out + iblock*ENH_BLOCKL,
                     enh_buf,
                     ENH_BUFL,
                     (4 + iblock)*ENH_BLOCKL,
                     ENH_ALPHA0,
                     enh_period,
                     enh_plocsTbl,
                     ENH_NBLOCKS_TOT);
        }
    }

    return lag*2;
}