freeswitch/libs/silk/test/Encoder.c

319 lines
12 KiB
C
Raw Normal View History

2014-08-08 15:24:42 +00:00
/***********************************************************************
2014-09-22 20:00:19 +00:00
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, (subject to the limitations in the disclaimer below)
2014-08-08 15:24:42 +00:00
are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2014-09-22 20:00:19 +00:00
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
2014-08-08 15:24:42 +00:00
documentation and/or other materials provided with the distribution.
2014-09-22 20:00:19 +00:00
- Neither the name of Skype Limited, nor the names of specific
contributors, may be used to endorse or promote products derived from
2014-08-08 15:24:42 +00:00
this software without specific prior written permission.
2014-09-22 20:00:19 +00:00
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED
BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
2014-08-08 15:24:42 +00:00
CONTRIBUTORS ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
2014-09-22 20:00:19 +00:00
BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2014-08-08 15:24:42 +00:00
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2014-09-22 20:00:19 +00:00
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2014-08-08 15:24:42 +00:00
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
/*****************************/
/* Silk encoder test program */
/*****************************/
#ifdef _WIN32
#define _CRT_SECURE_NO_DEPRECATE 1
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "SKP_Silk_SDK_API.h"
/* Define codec specific settings */
2014-09-22 20:00:19 +00:00
#define MAX_BYTES_PER_FRAME 250 // Equals peak bitrate of 100 kbps
2014-08-08 15:24:42 +00:00
#define MAX_INPUT_FRAMES 5
#define MAX_LBRR_DELAY 2
#define MAX_FRAME_LENGTH 480
#define FRAME_LENGTH_MS 20
#define MAX_API_FS_KHZ 48
#ifdef _SYSTEM_IS_BIG_ENDIAN
/* Function to convert a little endian int16 to a */
/* big endian int16 or vica verca */
void swap_endian(
SKP_int16 vec[], /* I/O array of */
SKP_int len /* I length */
)
{
SKP_int i;
SKP_int16 tmp;
SKP_uint8 *p1, *p2;
for( i = 0; i < len; i++ ){
tmp = vec[ i ];
p1 = (SKP_uint8 *)&vec[ i ]; p2 = (SKP_uint8 *)&tmp;
p1[ 0 ] = p2[ 1 ]; p1[ 1 ] = p2[ 0 ];
}
}
#endif
static void print_usage( char* argv[] ) {
printf( "\nusage: %s in.pcm out.bit [settings]\n", argv[ 0 ] );
printf( "\nin.pcm : Speech input to encoder" );
printf( "\nstream.bit : Bitstream output from encoder" );
printf( "\n settings:" );
printf( "\n-Fs_API <Hz> : API sampling rate in Hz, default: 24000" );
2014-09-22 20:00:19 +00:00
printf( "\n-Fs_maxInternal <Hz> : Maximum internal sampling rate in Hz, default: 24000" );
2014-08-08 15:24:42 +00:00
printf( "\n-packetlength <ms> : Packet interval in ms, default: 20" );
printf( "\n-rate <bps> : Target bitrate; default: 25000" );
printf( "\n-loss <perc> : Uplink loss estimate, in percent (0-100); default: 0" );
printf( "\n-inbandFEC <flag> : Enable inband FEC usage (0/1); default: 0" );
printf( "\n-complexity <comp> : Set complexity, 0: low, 1: medium, 2: high; default: 2" );
printf( "\n-DTX <flag> : Enable DTX (0/1); default: 0" );
printf( "\n-quiet : Print only some basic values" );
printf( "\n");
}
int main( int argc, char* argv[] )
{
size_t counter;
SKP_int32 k, args, totPackets, totActPackets, ret;
SKP_int16 nBytes;
double sumBytes, sumActBytes, avg_rate, act_rate, nrg;
SKP_uint8 payload[ MAX_BYTES_PER_FRAME * MAX_INPUT_FRAMES ];
SKP_int16 in[ FRAME_LENGTH_MS * MAX_API_FS_KHZ * MAX_INPUT_FRAMES ];
char speechInFileName[ 150 ], bitOutFileName[ 150 ];
FILE *bitOutFile, *speechInFile;
SKP_int32 encSizeBytes;
void *psEnc;
#ifdef _SYSTEM_IS_BIG_ENDIAN
SKP_int16 nBytes_LE;
#endif
/* default settings */
SKP_int32 API_fs_Hz = 24000;
SKP_int32 max_internal_fs_Hz = 0;
SKP_int32 targetRate_bps = 25000;
SKP_int32 packetSize_ms = 20;
SKP_int32 frameSizeReadFromFile_ms = 20;
SKP_int32 packetLoss_perc = 0, complexity_mode = 2, smplsSinceLastPacket;
SKP_int32 INBandFEC_enabled = 0, DTX_enabled = 0, quiet = 0;
SKP_SILK_SDK_EncControlStruct encControl; // Struct for input to encoder
2014-09-22 20:00:19 +00:00
2014-08-08 15:24:42 +00:00
if( argc < 3 ) {
print_usage( argv );
exit( 0 );
2014-09-22 20:00:19 +00:00
}
2014-08-08 15:24:42 +00:00
/* get arguments */
args = 1;
strcpy( speechInFileName, argv[ args ] );
args++;
strcpy( bitOutFileName, argv[ args ] );
args++;
while( args < argc ) {
if( SKP_STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-Fs_API" ) == 0 ) {
sscanf( argv[ args + 1 ], "%d", &API_fs_Hz );
args += 2;
} else if( SKP_STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-Fs_maxInternal" ) == 0 ) {
sscanf( argv[ args + 1 ], "%d", &max_internal_fs_Hz );
args += 2;
} else if( SKP_STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-packetlength" ) == 0 ) {
sscanf( argv[ args + 1 ], "%d", &packetSize_ms );
args += 2;
} else if( SKP_STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-rate" ) == 0 ) {
sscanf( argv[ args + 1 ], "%d", &targetRate_bps );
args += 2;
} else if( SKP_STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-loss" ) == 0 ) {
sscanf( argv[ args + 1 ], "%d", &packetLoss_perc );
args += 2;
} else if( SKP_STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-complexity" ) == 0 ) {
sscanf( argv[ args + 1 ], "%d", &complexity_mode );
args += 2;
} else if( SKP_STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-inbandFEC" ) == 0 ) {
sscanf( argv[ args + 1 ], "%d", &INBandFEC_enabled );
args += 2;
} else if( SKP_STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-DTX") == 0 ) {
sscanf( argv[ args + 1 ], "%d", &DTX_enabled );
args += 2;
} else if( SKP_STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-quiet" ) == 0 ) {
quiet = 1;
args++;
} else {
printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );
print_usage( argv );
exit( 0 );
}
}
/* If no max internal is specified, set to minimum of API fs and 24 kHz */
if( max_internal_fs_Hz == 0 ) {
max_internal_fs_Hz = 24000;
if( API_fs_Hz < max_internal_fs_Hz ) {
max_internal_fs_Hz = API_fs_Hz;
}
}
/* Print options */
if( !quiet ) {
printf("******************* Silk Encoder v %s ****************\n", SKP_Silk_SDK_get_version());
printf("******************* Compiled for %d bit cpu ********* \n", (int)sizeof(void*) * 8 );
printf( "Input: %s\n", speechInFileName );
printf( "Output: %s\n", bitOutFileName );
printf( "API sampling rate: %d Hz\n", API_fs_Hz );
printf( "Maximum internal sampling rate: %d Hz\n", max_internal_fs_Hz );
printf( "Packet interval: %d ms\n", packetSize_ms );
printf( "Inband FEC used: %d\n", INBandFEC_enabled );
printf( "DTX used: %d\n", DTX_enabled );
printf( "Complexity: %d\n", complexity_mode );
printf( "Target bitrate: %d bps\n", targetRate_bps );
}
/* Open files */
speechInFile = fopen( speechInFileName, "rb" );
if( speechInFile == NULL ) {
printf( "Error: could not open input file %s\n", speechInFileName );
exit( 0 );
}
bitOutFile = fopen( bitOutFileName, "wb" );
if( bitOutFile == NULL ) {
printf( "Error: could not open output file %s\n", bitOutFileName );
exit( 0 );
}
/* Add Silk header to stream */
{
static const char Silk_header[] = "#!SILK_V3";
fwrite( Silk_header, sizeof( char ), strlen( Silk_header ), bitOutFile );
}
/* Create Encoder */
ret = SKP_Silk_SDK_Get_Encoder_Size( &encSizeBytes );
if( ret ) {
printf( "\nSKP_Silk_create_encoder returned %d", ret );
}
psEnc = malloc( encSizeBytes );
/* Reset Encoder */
ret = SKP_Silk_SDK_InitEncoder( psEnc, &encControl );
if( ret ) {
printf( "\nSKP_Silk_reset_encoder returned %d", ret );
}
2014-09-22 20:00:19 +00:00
2014-08-08 15:24:42 +00:00
/* Set Encoder parameters */
encControl.API_sampleRate = API_fs_Hz;
encControl.maxInternalSampleRate = max_internal_fs_Hz;
encControl.packetSize = ( packetSize_ms * API_fs_Hz ) / 1000;
encControl.packetLossPercentage = packetLoss_perc;
encControl.useInBandFEC = INBandFEC_enabled;
encControl.useDTX = DTX_enabled;
encControl.complexity = complexity_mode;
encControl.bitRate = ( targetRate_bps > 0 ? targetRate_bps : 0 );
if( API_fs_Hz > MAX_API_FS_KHZ * 1000 || API_fs_Hz < 0 ) {
printf( "\nError: API sampling rate = %d out of range, valid range 8000 - 48000 \n \n", API_fs_Hz );
exit( 0 );
}
totPackets = 0;
totActPackets = 0;
smplsSinceLastPacket = 0;
sumBytes = 0.0;
sumActBytes = 0.0;
2014-09-22 20:00:19 +00:00
2014-08-08 15:24:42 +00:00
while( 1 ) {
/* Read input from file */
counter = fread( in, sizeof( SKP_int16 ), ( frameSizeReadFromFile_ms * API_fs_Hz ) / 1000, speechInFile );
#ifdef _SYSTEM_IS_BIG_ENDIAN
swap_endian( in, counter );
#endif
if( (SKP_int)counter < ( ( frameSizeReadFromFile_ms * API_fs_Hz ) / 1000 ) ) {
break;
}
/* max payload size */
nBytes = MAX_BYTES_PER_FRAME * MAX_INPUT_FRAMES;
/* Silk Encoder */
ret = SKP_Silk_SDK_Encode( psEnc, &encControl, in, (SKP_int16)counter, payload, &nBytes );
if( ret ) {
printf( "\nSKP_Silk_Encode returned %d", ret );
break;
}
/* Get packet size */
packetSize_ms = ( SKP_int )( ( 1000 * ( SKP_int32 )encControl.packetSize ) / encControl.API_sampleRate );
smplsSinceLastPacket += ( SKP_int )counter;
2014-09-22 20:00:19 +00:00
2014-08-08 15:24:42 +00:00
if( ( ( 1000 * smplsSinceLastPacket ) / API_fs_Hz ) == packetSize_ms ) {
/* Sends a dummy zero size packet in case of DTX period */
/* to make it work with the decoder test program. */
/* In practice should be handled by RTP sequence numbers */
totPackets++;
sumBytes += nBytes;
nrg = 0.0;
for( k = 0; k < ( SKP_int )counter; k++ ) {
nrg += in[ k ] * (double)in[ k ];
}
if( ( nrg / ( SKP_int )counter ) > 1e3 ) {
sumActBytes += nBytes;
totActPackets++;
}
/* Write payload size */
#ifdef _SYSTEM_IS_BIG_ENDIAN
nBytes_LE = nBytes;
swap_endian( &nBytes_LE, 1 );
fwrite( &nBytes_LE, sizeof( SKP_int16 ), 1, bitOutFile );
#else
fwrite( &nBytes, sizeof( SKP_int16 ), 1, bitOutFile );
#endif
/* Write payload */
fwrite( payload, sizeof( SKP_uint8 ), nBytes, bitOutFile );
2014-09-22 20:00:19 +00:00
2014-08-08 15:24:42 +00:00
if( !quiet ) {
fprintf( stderr, "\rPackets encoded: %d", totPackets );
}
smplsSinceLastPacket = 0;
}
}
/* Write dummy because it can not end with 0 bytes */
nBytes = -1;
/* Write payload size */
fwrite( &nBytes, sizeof( SKP_int16 ), 1, bitOutFile );
/* Free Encoder */
free( psEnc );
fclose( speechInFile );
fclose( bitOutFile );
avg_rate = 8.0 / packetSize_ms * sumBytes / totPackets;
act_rate = 8.0 / packetSize_ms * sumActBytes / totActPackets;
if( !quiet ) {
printf( "\nAverage bitrate: %.3f kbps", avg_rate );
printf( "\nActive bitrate: %.3f kbps", act_rate );
printf( "\n\n" );
} else {
/* print average and active bitrates */
printf( "%.3f %.3f \n", avg_rate, act_rate );
}
return 0;
}