2010-09-21 12:35:41 -05:00

409 lines
10 KiB
C

/*---------------------------------------------------------------------------*\
FILE........: c2sim.c
AUTHOR......: David Rowe
DATE CREATED: 20/8/2010
Codec2 simulation. Combines encoder and decoder and allows switching in
out various algorithms and quantisation steps.
\*---------------------------------------------------------------------------*/
/*
Copyright (C) 2009 David Rowe
All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License version 2.1, as
published by the Free Software Foundation. This program is
distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <math.h>
#include "defines.h"
#include "sine.h"
#include "nlp.h"
#include "dump.h"
#include "lpc.h"
#include "lsp.h"
#include "quantise.h"
#include "phase.h"
#include "postfilter.h"
#include "interp.h"
/*---------------------------------------------------------------------------*\
switch_present()
Searches the command line arguments for a "switch". If the switch is
found, returns the command line argument where it ws found, else returns
NULL.
\*---------------------------------------------------------------------------*/
int switch_present(sw,argc,argv)
register char sw[]; /* switch in string form */
register int argc; /* number of command line arguments */
register char *argv[]; /* array of command line arguments in string form */
{
register int i; /* loop variable */
for(i=1; i<argc; i++)
if (!strcmp(sw,argv[i]))
return(i);
return 0;
}
void synth_one_frame(short buf[], MODEL *model, float Sn_[], float Pn[]);
/*---------------------------------------------------------------------------*\
MAIN
\*---------------------------------------------------------------------------*/
int main(int argc, char *argv[])
{
FILE *fout; /* output speech file */
FILE *fin; /* input speech file */
short buf[N]; /* input/output buffer */
float Sn[M]; /* float input speech samples */
COMP Sw[FFT_ENC]; /* DFT of Sn[] */
float w[M]; /* time domain hamming window */
COMP W[FFT_ENC]; /* DFT of w[] */
MODEL model;
float Pn[2*N]; /* trapezoidal synthesis window */
float Sn_[2*N]; /* synthesised speech */
int i; /* loop variable */
int frames;
float prev_Wo;
float pitch;
int voiced1;
char out_file[MAX_STR];
int arg;
float snr;
float sum_snr;
int lpc_model, order;
int lsp, lsp_quantiser;
float ak[LPC_MAX];
COMP Sw_[FFT_ENC];
int dump;
int phase0;
float ex_phase[MAX_AMP+1];
int postfilt;
float bg_est;
int hand_voicing;
FILE *fvoicing;
MODEL prev_model, interp_model;
int decimate;
void *nlp_states;
for(i=0; i<M; i++)
Sn[i] = 1.0;
for(i=0; i<2*N; i++)
Sn_[i] = 0;
prev_Wo = TWO_PI/P_MAX;
prev_model.Wo = TWO_PI/P_MIN;
prev_model.L = floor(PI/prev_model.Wo);
for(i=1; i<=prev_model.L; i++) {
prev_model.A[i] = 0.0;
prev_model.phi[i] = 0.0;
}
for(i=1; i<=MAX_AMP; i++) {
ex_phase[i] = 0.0;
}
nlp_states = nlp_create();
if (argc < 2) {
fprintf(stderr, "\nCodec2 - 2400 bit/s speech codec - Simulation Program\n"
"\thttp://rowetel.com/codec2.html\n\n"
"usage: %s InputFile [-o OutputFile]\n"
"\t[-o lpc Order]\n"
"\t[--lsp]\n"
"\t[--phase0]\n"
"\t[--postfilter]\n"
"\t[--hand_voicing]\n"
"\t[--dec]\n"
"\t[--dump DumpFilePrefix]\n", argv[0]);
exit(1);
}
/* Interpret command line arguments -------------------------------------*/
/* Input file */
if ((fin = fopen(argv[1],"rb")) == NULL) {
fprintf(stderr, "Error opening input bit file: %s: %s.\n",
argv[1], strerror(errno));
exit(1);
}
/* Output file */
if ((arg = switch_present("-o",argc,argv))) {
if ((fout = fopen(argv[arg+1],"wb")) == NULL) {
fprintf(stderr, "Error opening output speech file: %s: %s.\n",
argv[arg+1], strerror(errno));
exit(1);
}
strcpy(out_file,argv[arg+1]);
}
else
fout = NULL;
lpc_model = 0;
if ((arg = switch_present("--lpc",argc,argv))) {
lpc_model = 1;
order = atoi(argv[arg+1]);
if ((order < 4) || (order > 20)) {
fprintf(stderr, "Error in lpc order: %d\n", order);
exit(1);
}
}
dump = switch_present("--dump",argc,argv);
if (dump)
dump_on(argv[dump+1]);
lsp = switch_present("--lsp",argc,argv);
lsp_quantiser = 0;
phase0 = switch_present("--phase0",argc,argv);
if (phase0) {
ex_phase[0] = 0;
}
hand_voicing = switch_present("--hand_voicing",argc,argv);
if (hand_voicing) {
fvoicing = fopen(argv[hand_voicing+1],"rt");
assert(fvoicing != NULL);
}
bg_est = 0.0;
postfilt = switch_present("--postfilter",argc,argv);
decimate = switch_present("--dec",argc,argv);
/* Initialise ------------------------------------------------------------*/
make_analysis_window(w,W);
make_synthesis_window(Pn);
quantise_init();
/* Main loop ------------------------------------------------------------*/
frames = 0;
sum_snr = 0;
while(fread(buf,sizeof(short),N,fin)) {
frames++;
/* Read input speech */
for(i=0; i<M-N; i++)
Sn[i] = Sn[i+N];
for(i=0; i<N; i++)
Sn[i+M-N] = buf[i];
/* Estimate pitch */
nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&prev_Wo);
prev_Wo = TWO_PI/pitch;
model.Wo = TWO_PI/pitch;
/* estimate model parameters */
dft_speech(Sw, Sn, w);
two_stage_pitch_refinement(&model, Sw);
estimate_amplitudes(&model, Sw, W);
dump_Sn(Sn); dump_Sw(Sw); dump_model(&model);
/* optional zero-phase modelling */
if (phase0) {
float Wn[M]; /* windowed speech samples */
float Rk[LPC_ORD+1]; /* autocorrelation coeffs */
dump_phase(&model.phi[0], model.L);
/* find aks here, these are overwritten if LPC modelling is enabled */
for(i=0; i<M; i++)
Wn[i] = Sn[i]*w[i];
autocorrelate(Wn,Rk,M,LPC_ORD);
levinson_durbin(Rk,ak,LPC_ORD);
if (lpc_model)
assert(order == LPC_ORD);
dump_ak(ak, LPC_ORD);
/* determine voicing */
snr = est_voicing_mbe(&model, Sw, W, (FS/TWO_PI)*model.Wo, Sw_);
dump_Sw_(Sw_);
dump_snr(snr);
/* just to make sure we are not cheating - kill all phases */
for(i=0; i<MAX_AMP; i++)
model.phi[i] = 0;
if (hand_voicing) {
fscanf(fvoicing,"%d\n",&model.voiced);
}
}
/* optional LPC model amplitudes */
if (lpc_model) {
int lpc_correction;
float e;
float lsps[LPC_ORD];
int lsp_indexes[LPC_ORD];
e = speech_to_uq_lsps(lsps, ak, Sn, w, order);
lpc_correction = need_lpc_correction(&model, ak, e);
if (lsp) {
encode_lsps(lsp_indexes, lsps, LPC_ORD);
/*
for(i=0; i<LPC_ORD; i++)
printf("lsps[%d] = %f lsp_indexes[%d] = %d\n",
i, lsps[i], i, lsp_indexes[i]);
printf("\n");
*/
decode_lsps(lsps, lsp_indexes, LPC_ORD);
bw_expand_lsps(lsps, LPC_ORD);
lsp_to_lpc(lsps, ak, LPC_ORD);
}
e = decode_energy(encode_energy(e));
model.Wo = decode_Wo(encode_Wo(model.Wo));
aks_to_M2(ak, order, &model, e, &snr, 1);
apply_lpc_correction(&model, lpc_correction);
sum_snr += snr;
dump_quantised_model(&model);
}
/* option decimation to 20ms rate, which enables interpolation
routine to synthesise in between frame */
if (decimate) {
if (!phase0) {
printf("needs --phase0 to resample phase for interpolated Wo\n");
exit(0);
}
/* odd frame - interpolate */
if (frames%2) {
#ifdef TEST
model.voiced = 1;
prev_model.voiced = 1;
if (fabs(prev_model.Wo - model.Wo) < 0.1*model.Wo) {
interp_model.voiced = 1;
interpolate(&interp_model, &prev_model, &model);
for(i=0; i<=interp_model.L; i++) {
interp_model.phi[i] = phi1[i];
}
printf("interp\n");
}
else
interp_model = tmp_model;
#endif
interp_model.voiced = voiced1;
interpolate(&interp_model, &prev_model, &model);
if (phase0)
phase_synth_zero_order(&interp_model, ak, ex_phase);
if (postfilt)
postfilter(&interp_model, &bg_est);
synth_one_frame(buf, &interp_model, Sn_, Pn);
if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
if (phase0)
phase_synth_zero_order(&model, ak, ex_phase);
if (postfilt)
postfilter(&model, &bg_est);
synth_one_frame(buf, &model, Sn_, Pn);
if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
prev_model = model;
}
else {
voiced1 = model.voiced;
}
}
else {
if (phase0)
phase_synth_zero_order(&model, ak, ex_phase);
if (postfilt)
postfilter(&model, &bg_est);
synth_one_frame(buf, &model, Sn_, Pn);
if (fout != NULL) fwrite(buf,sizeof(short),N,fout);
}
}
if (fout != NULL)
fclose(fout);
if (lpc_model)
printf("SNR av = %5.2f dB\n", sum_snr/frames);
if (dump)
dump_off();
if (hand_voicing)
fclose(fvoicing);
nlp_destroy(nlp_states);
return 0;
}
void synth_one_frame(short buf[], MODEL *model, float Sn_[], float Pn[])
{
int i;
synthesise(Sn_, model, Pn, 1);
for(i=0; i<N; i++) {
if (Sn_[i] > 32767.0)
buf[i] = 32767;
else if (Sn_[i] < -32767.0)
buf[i] = -32767;
else
buf[i] = Sn_[i];
}
}