update to snapshot spandsp-20090128

git-svn-id: http://svn.freeswitch.org/svn/freeswitch/trunk@11535 d0543943-73ff-0310-b7d9-9358b9ac24b2
This commit is contained in:
Michael Jerris
2009-01-28 04:48:03 +00:00
parent f2055745eb
commit 314ae8b6f3
264 changed files with 11552 additions and 5852 deletions

View File

@@ -22,26 +22,26 @@
* License along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id: vector_float.c,v 1.14 2008/09/18 13:54:32 steveu Exp $
* $Id: vector_float.c,v 1.19 2009/01/28 03:41:27 steveu Exp $
*/
/*! \file */
#if defined(HAVE_CONFIG_H)
#include <config.h>
#include "config.h"
#endif
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "floating_fudge.h"
#if defined(HAVE_TGMATH_H)
#include <tgmath.h>
#endif
#if defined(HAVE_MATH_H)
#include <math.h>
#endif
#include "floating_fudge.h"
#include <assert.h>
#if defined(SPANDSP_USE_MMX)
@@ -55,7 +55,6 @@
#endif
#if defined(SPANDSP_USE_SSE3)
#include <pmmintrin.h>
#include <tmmintrin.h>
#endif
#if defined(SPANDSP_USE_SSE4_1)
#include <smmintrin.h>
@@ -73,6 +72,32 @@
#include "spandsp/telephony.h"
#include "spandsp/vector_float.h"
#if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
void vec_copyf(float z[], const float x[], int n)
{
int i;
__m128 n1;
if ((i = n & ~3))
{
for (i -= 4; i >= 0; i -= 4)
{
n1 = _mm_loadu_ps(x + i);
_mm_storeu_ps(z + i, n1);
}
}
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
z[n - 3] = x[n - 3];
case 2:
z[n - 2] = x[n - 2];
case 1:
z[n - 1] = x[n - 1];
}
}
#else
void vec_copyf(float z[], const float x[], int n)
{
int i;
@@ -80,6 +105,7 @@ void vec_copyf(float z[], const float x[], int n)
for (i = 0; i < n; i++)
z[i] = x[i];
}
#endif
/*- End of function --------------------------------------------------------*/
void vec_copy(double z[], const double x[], int n)
@@ -102,6 +128,90 @@ void vec_copyl(long double z[], const long double x[], int n)
/*- End of function --------------------------------------------------------*/
#endif
#if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
void vec_negatef(float z[], const float x[], int n)
{
int i;
static const uint32_t mask = 0x80000000;
__m128 n1;
__m128 n2;
if ((i = n & ~3))
{
n2 = _mm_set1_ps(*((float *) &mask));
for (i -= 4; i >= 0; i -= 4)
{
n1 = _mm_loadu_ps(x + i);
n1 = _mm_xor_ps(n1, n2);
_mm_storeu_ps(z + i, n1);
}
}
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
z[n - 3] = -x[n - 3];
case 2:
z[n - 2] = -x[n - 2];
case 1:
z[n - 1] = -x[n - 1];
}
}
#else
void vec_negatef(float z[], const float x[], int n)
{
int i;
for (i = 0; i < n; i++)
z[i] = -x[i];
}
#endif
/*- End of function --------------------------------------------------------*/
void vec_negate(double z[], const double x[], int n)
{
int i;
for (i = 0; i < n; i++)
z[i] = -x[i];
}
/*- End of function --------------------------------------------------------*/
#if defined(HAVE_LONG_DOUBLE)
void vec_negatel(long double z[], const long double x[], int n)
{
int i;
for (i = 0; i < n; i++)
z[i] = -x[i];
}
/*- End of function --------------------------------------------------------*/
#endif
#if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
void vec_zerof(float z[], int n)
{
int i;
__m128 n1;
if ((i = n & ~3))
{
n1 = _mm_setzero_ps();
for (i -= 4; i >= 0; i -= 4)
_mm_storeu_ps(z + i, n1);
}
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
z[n - 3] = 0;
case 2:
z[n - 2] = 0;
case 1:
z[n - 1] = 0;
}
}
#else
void vec_zerof(float z[], int n)
{
int i;
@@ -109,6 +219,7 @@ void vec_zerof(float z[], int n)
for (i = 0; i < n; i++)
z[i] = 0.0f;
}
#endif
/*- End of function --------------------------------------------------------*/
void vec_zero(double z[], int n)
@@ -131,6 +242,30 @@ void vec_zerol(long double z[], int n)
/*- End of function --------------------------------------------------------*/
#endif
#if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
void vec_setf(float z[], float x, int n)
{
int i;
__m128 n1;
if ((i = n & ~3))
{
n1 = _mm_set1_ps(x);
for (i -= 4; i >= 0; i -= 4)
_mm_storeu_ps(z + i, n1);
}
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
z[n - 3] = x;
case 2:
z[n - 2] = x;
case 1:
z[n - 1] = x;
}
}
#else
void vec_setf(float z[], float x, int n)
{
int i;
@@ -138,6 +273,7 @@ void vec_setf(float z[], float x, int n)
for (i = 0; i < n; i++)
z[i] = x;
}
#endif
/*- End of function --------------------------------------------------------*/
void vec_set(double z[], double x, int n)
@@ -160,6 +296,35 @@ void vec_setl(long double z[], long double x, int n)
/*- End of function --------------------------------------------------------*/
#endif
#if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
void vec_addf(float z[], const float x[], const float y[], int n)
{
int i;
__m128 n1;
__m128 n2;
if ((i = n & ~3))
{
for (i -= 4; i >= 0; i -= 4)
{
n1 = _mm_loadu_ps(x + i);
n2 = _mm_loadu_ps(y + i);
n2 = _mm_add_ps(n1, n2);
_mm_storeu_ps(z + i, n2);
}
}
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
z[n - 3] = x[n - 3] + y[n - 3];
case 2:
z[n - 2] = x[n - 2] + y[n - 2];
case 1:
z[n - 1] = x[n - 1] + y[n - 1];
}
}
#else
void vec_addf(float z[], const float x[], const float y[], int n)
{
int i;
@@ -167,6 +332,7 @@ void vec_addf(float z[], const float x[], const float y[], int n)
for (i = 0; i < n; i++)
z[i] = x[i] + y[i];
}
#endif
/*- End of function --------------------------------------------------------*/
void vec_add(double z[], const double x[], const double y[], int n)
@@ -187,16 +353,52 @@ void vec_addl(long double z[], const long double x[], const long double y[], int
}
/*- End of function --------------------------------------------------------*/
void vec_scaled_addf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n)
#if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
void vec_scaledxy_addf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n)
{
int i;
__m128 n1;
__m128 n2;
__m128 n3;
__m128 n4;
if ((i = n & ~3))
{
n3 = _mm_set1_ps(x_scale);
n4 = _mm_set1_ps(y_scale);
for (i -= 4; i >= 0; i -= 4)
{
n1 = _mm_loadu_ps(x + i);
n2 = _mm_loadu_ps(y + i);
n1 = _mm_mul_ps(n1, n3);
n2 = _mm_mul_ps(n2, n4);
n2 = _mm_add_ps(n1, n2);
_mm_storeu_ps(z + i, n2);
}
}
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
z[n - 3] = x[n - 3]*x_scale + y[n - 3]*y_scale;
case 2:
z[n - 2] = x[n - 2]*x_scale + y[n - 2]*y_scale;
case 1:
z[n - 1] = x[n - 1]*x_scale + y[n - 1]*y_scale;
}
}
#else
void vec_scaledxy_addf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n)
{
int i;
for (i = 0; i < n; i++)
z[i] = x[i]*x_scale + y[i]*y_scale;
}
#endif
/*- End of function --------------------------------------------------------*/
void vec_scaled_add(double z[], const double x[], double x_scale, const double y[], double y_scale, int n)
void vec_scaledxy_add(double z[], const double x[], double x_scale, const double y[], double y_scale, int n)
{
int i;
@@ -206,7 +408,7 @@ void vec_scaled_add(double z[], const double x[], double x_scale, const double y
/*- End of function --------------------------------------------------------*/
#if defined(HAVE_LONG_DOUBLE)
void vec_scaled_addl(long double z[], const long double x[], long double x_scale, const long double y[], long double y_scale, int n)
void vec_scaledxy_addl(long double z[], const long double x[], long double x_scale, const long double y[], long double y_scale, int n)
{
int i;
@@ -216,6 +418,97 @@ void vec_scaled_addl(long double z[], const long double x[], long double x_scale
/*- End of function --------------------------------------------------------*/
#endif
#if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
void vec_scaledy_addf(float z[], const float x[], const float y[], float y_scale, int n)
{
int i;
__m128 n1;
__m128 n2;
__m128 n3;
if ((i = n & ~3))
{
n3 = _mm_set1_ps(y_scale);
for (i -= 4; i >= 0; i -= 4)
{
n1 = _mm_loadu_ps(x + i);
n2 = _mm_loadu_ps(y + i);
n2 = _mm_mul_ps(n2, n3);
n2 = _mm_add_ps(n1, n2);
_mm_storeu_ps(z + i, n2);
}
}
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
z[n - 3] = x[n - 3] + y[n - 3]*y_scale;
case 2:
z[n - 2] = x[n - 2] + y[n - 2]*y_scale;
case 1:
z[n - 1] = x[n - 1] + y[n - 1]*y_scale;
}
}
#else
void vec_scaledy_addf(float z[], const float x[], const float y[], float y_scale, int n)
{
int i;
for (i = 0; i < n; i++)
z[i] = x[i] + y[i]*y_scale;
}
#endif
/*- End of function --------------------------------------------------------*/
void vec_scaledy_add(double z[], const double x[], const double y[], double y_scale, int n)
{
int i;
for (i = 0; i < n; i++)
z[i] = x[i] + y[i]*y_scale;
}
/*- End of function --------------------------------------------------------*/
#if defined(HAVE_LONG_DOUBLE)
void vec_scaledy_addl(long double z[], const long double x[], const long double y[], long double y_scale, int n)
{
int i;
for (i = 0; i < n; i++)
z[i] = x[i] + y[i]*y_scale;
}
/*- End of function --------------------------------------------------------*/
#endif
#if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
void vec_subf(float z[], const float x[], const float y[], int n)
{
int i;
__m128 n1;
__m128 n2;
if ((i = n & ~3))
{
for (i -= 4; i >= 0; i -= 4)
{
n1 = _mm_loadu_ps(x + i);
n2 = _mm_loadu_ps(y + i);
n2 = _mm_sub_ps(n1, n2);
_mm_storeu_ps(z + i, n2);
}
}
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
z[n - 3] = x[n - 3] - y[n - 3];
case 2:
z[n - 2] = x[n - 2] - y[n - 2];
case 1:
z[n - 1] = x[n - 1] - y[n - 1];
}
}
#else
void vec_subf(float z[], const float x[], const float y[], int n)
{
int i;
@@ -223,6 +516,7 @@ void vec_subf(float z[], const float x[], const float y[], int n)
for (i = 0; i < n; i++)
z[i] = x[i] - y[i];
}
#endif
/*- End of function --------------------------------------------------------*/
void vec_sub(double z[], const double x[], const double y[], int n)
@@ -245,7 +539,7 @@ void vec_subl(long double z[], const long double x[], const long double y[], int
/*- End of function --------------------------------------------------------*/
#endif
void vec_scaled_subf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n)
void vec_scaledxy_subf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n)
{
int i;
@@ -254,7 +548,7 @@ void vec_scaled_subf(float z[], const float x[], float x_scale, const float y[],
}
/*- End of function --------------------------------------------------------*/
void vec_scaled_sub(double z[], const double x[], double x_scale, const double y[], double y_scale, int n)
void vec_scaledxy_sub(double z[], const double x[], double x_scale, const double y[], double y_scale, int n)
{
int i;
@@ -264,7 +558,7 @@ void vec_scaled_sub(double z[], const double x[], double x_scale, const double y
/*- End of function --------------------------------------------------------*/
#if defined(HAVE_LONG_DOUBLE)
void vec_scaled_subl(long double z[], const long double x[], long double x_scale, const long double y[], long double y_scale, int n)
void vec_scaledxy_subl(long double z[], const long double x[], long double x_scale, const long double y[], long double y_scale, int n)
{
int i;
@@ -274,6 +568,35 @@ void vec_scaled_subl(long double z[], const long double x[], long double x_scale
/*- End of function --------------------------------------------------------*/
#endif
#if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
void vec_scalar_mulf(float z[], const float x[], float y, int n)
{
int i;
__m128 n1;
__m128 n2;
if ((i = n & ~3))
{
n2 = _mm_set1_ps(y);
for (i -= 4; i >= 0; i -= 4)
{
n1 = _mm_loadu_ps(x + i);
n1 = _mm_mul_ps(n1, n2);
_mm_storeu_ps(z + i, n1);
}
}
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
z[n - 3] = x[n - 3]*y;
case 2:
z[n - 2] = x[n - 2]*y;
case 1:
z[n - 1] = x[n - 1]*y;
}
}
#else
void vec_scalar_mulf(float z[], const float x[], float y, int n)
{
int i;
@@ -281,6 +604,7 @@ void vec_scalar_mulf(float z[], const float x[], float y, int n)
for (i = 0; i < n; i++)
z[i] = x[i]*y;
}
#endif
/*- End of function --------------------------------------------------------*/
void vec_scalar_mul(double z[], const double x[], double y, int n)
@@ -292,17 +616,154 @@ void vec_scalar_mul(double z[], const double x[], double y, int n)
}
/*- End of function --------------------------------------------------------*/
#if defined(HAVE_LONG_DOUBLE)
void vec_scalar_mull(long double z[], const long double x[], long double y, int n)
#if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
void vec_scalar_addf(float z[], const float x[], float y, int n)
{
int i;
__m128 n1;
__m128 n2;
if ((i = n & ~3))
{
n2 = _mm_set1_ps(y);
for (i -= 4; i >= 0; i -= 4)
{
n1 = _mm_loadu_ps(x + i);
n1 = _mm_add_ps(n1, n2);
_mm_storeu_ps(z + i, n1);
}
}
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
z[n - 3] = x[n - 3] + y;
case 2:
z[n - 2] = x[n - 2] + y;
case 1:
z[n - 1] = x[n - 1] + y;
}
}
#else
void vec_scalar_addf(float z[], const float x[], float y, int n)
{
int i;
for (i = 0; i < n; i++)
z[i] = x[i]*y;
z[i] = x[i] + y;
}
#endif
/*- End of function --------------------------------------------------------*/
void vec_scalar_add(double z[], const double x[], double y, int n)
{
int i;
for (i = 0; i < n; i++)
z[i] = x[i] + y;
}
/*- End of function --------------------------------------------------------*/
#if defined(HAVE_LONG_DOUBLE)
void vec_scalar_addl(long double z[], const long double x[], long double y, int n)
{
int i;
for (i = 0; i < n; i++)
z[i] = x[i] + y;
}
/*- End of function --------------------------------------------------------*/
#endif
#if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
void vec_scalar_subf(float z[], const float x[], float y, int n)
{
int i;
__m128 n1;
__m128 n2;
if ((i = n & ~3))
{
n2 = _mm_set1_ps(y);
for (i -= 4; i >= 0; i -= 4)
{
n1 = _mm_loadu_ps(x + i);
n1 = _mm_sub_ps(n1, n2);
_mm_storeu_ps(z + i, n1);
}
}
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
z[n - 3] = x[n - 3] - y;
case 2:
z[n - 2] = x[n - 2] - y;
case 1:
z[n - 1] = x[n - 1] - y;
}
}
#else
void vec_scalar_subf(float z[], const float x[], float y, int n)
{
int i;
for (i = 0; i < n; i++)
z[i] = x[i] - y;
}
#endif
/*- End of function --------------------------------------------------------*/
void vec_scalar_sub(double z[], const double x[], double y, int n)
{
int i;
for (i = 0; i < n; i++)
z[i] = x[i] - y;
}
/*- End of function --------------------------------------------------------*/
#if defined(HAVE_LONG_DOUBLE)
void vec_scalar_subl(long double z[], const long double x[], long double y, int n)
{
int i;
for (i = 0; i < n; i++)
z[i] = x[i] - y;
}
/*- End of function --------------------------------------------------------*/
#endif
#if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
void vec_mulf(float z[], const float x[], const float y[], int n)
{
int i;
__m128 n1;
__m128 n2;
__m128 n3;
if ((i = n & ~3))
{
for (i -= 4; i >= 0; i -= 4)
{
n1 = _mm_loadu_ps(x + i);
n2 = _mm_loadu_ps(y + i);
n3 = _mm_mul_ps(n1, n2);
_mm_storeu_ps(z + i, n3);
}
}
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
z[n - 3] = x[n - 3]*y[n - 3];
case 2:
z[n - 2] = x[n - 2]*y[n - 2];
case 1:
z[n - 1] = x[n - 1]*y[n - 1];
}
}
#else
void vec_mulf(float z[], const float x[], const float y[], int n)
{
int i;
@@ -311,6 +772,7 @@ void vec_mulf(float z[], const float x[], const float y[], int n)
z[i] = x[i]*y[i];
}
/*- End of function --------------------------------------------------------*/
#endif
void vec_mul(double z[], const double x[], const double y[], int n)
{
@@ -344,7 +806,7 @@ float vec_dot_prodf(const float x[], const float y[], int n)
z = 0.0f;
if ((i = n & ~3))
{
{
n4 = _mm_setzero_ps(); //sets sum to zero
for (i -= 4; i >= 0; i -= 4)
{
@@ -357,7 +819,7 @@ float vec_dot_prodf(const float x[], const float y[], int n)
n4 = _mm_add_ss(_mm_shuffle_ps(n4, n4, 1), n4);
_mm_store_ss(&z, n4);
}
/* Now deal with the last 1 to 3 elements, which don't fill in an SSE2 register */
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
@@ -419,17 +881,54 @@ float vec_circular_dot_prodf(const float x[], const float y[], int n, int pos)
}
/*- End of function --------------------------------------------------------*/
#define LMS_LEAK_RATE 0.9999f
#if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
void vec_lmsf(const float x[], float y[], int n, float error)
{
int i;
__m128 n1;
__m128 n2;
__m128 n3;
__m128 n4;
if ((i = n & ~3))
{
n3 = _mm_set1_ps(error);
n4 = _mm_set1_ps(LMS_LEAK_RATE);
for (i -= 4; i >= 0; i -= 4)
{
n1 = _mm_loadu_ps(x + i);
n2 = _mm_loadu_ps(y + i);
n1 = _mm_mul_ps(n1, n3);
n2 = _mm_mul_ps(n2, n4);
n1 = _mm_add_ps(n1, n2);
_mm_storeu_ps(y + i, n1);
}
}
/* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
switch (n & 3)
{
case 3:
y[n - 3] = y[n - 3]*LMS_LEAK_RATE + x[n - 3]*error;
case 2:
y[n - 2] = y[n - 2]*LMS_LEAK_RATE + x[n - 2]*error;
case 1:
y[n - 1] = y[n - 1]*LMS_LEAK_RATE + x[n - 1]*error;
}
}
#else
void vec_lmsf(const float x[], float y[], int n, float error)
{
int i;
for (i = 0; i < n; i++)
{
y[i] += x[i]*error;
/* Leak a little to tame uncontrolled wandering */
y[i] *= 0.9999f;
y[i] = y[i]*LMS_LEAK_RATE + x[i]*error;
}
}
#endif
/*- End of function --------------------------------------------------------*/
void vec_circular_lmsf(const float x[], float y[], int n, int pos, float error)