492 lines
11 KiB
C
492 lines
11 KiB
C
|
/*--------------------------------------------------------------------------*\
|
||
|
|
||
|
FILE........: vqtrainsp.c
|
||
|
AUTHOR......: David Rowe
|
||
|
DATE CREATED: 7 August 2012
|
||
|
|
||
|
This program trains sparse amplitude vector quantisers.
|
||
|
Modified from vqtrainph.c
|
||
|
|
||
|
\*--------------------------------------------------------------------------*/
|
||
|
|
||
|
/*
|
||
|
Copyright (C) 2012 David Rowe
|
||
|
|
||
|
All rights reserved.
|
||
|
|
||
|
This program is free software; you can redistribute it and/or modify
|
||
|
it under the terms of the GNU Lesser General Public License version 2, as
|
||
|
published by the Free Software Foundation. This program is
|
||
|
distributed in the hope that it will be useful, but WITHOUT ANY
|
||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
||
|
License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Lesser General Public License
|
||
|
along with this program; if not, see <http://www.gnu.org/licenses/>.
|
||
|
*/
|
||
|
|
||
|
/*-----------------------------------------------------------------------*\
|
||
|
|
||
|
INCLUDES
|
||
|
|
||
|
\*-----------------------------------------------------------------------*/
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#include <math.h>
|
||
|
#include <ctype.h>
|
||
|
#include <assert.h>
|
||
|
|
||
|
typedef struct {
|
||
|
float real;
|
||
|
float imag;
|
||
|
} COMP;
|
||
|
|
||
|
/*-----------------------------------------------------------------------* \
|
||
|
|
||
|
DEFINES
|
||
|
|
||
|
\*-----------------------------------------------------------------------*/
|
||
|
|
||
|
#define DELTAQ 0.01 /* quiting distortion */
|
||
|
#define MAX_STR 80 /* maximum string length */
|
||
|
|
||
|
/*-----------------------------------------------------------------------*\
|
||
|
|
||
|
FUNCTION PROTOTYPES
|
||
|
|
||
|
\*-----------------------------------------------------------------------*/
|
||
|
|
||
|
void zero(float v[], int d);
|
||
|
void acc(float v1[], float v2[], int d);
|
||
|
void norm(float v[], int k, int n[]);
|
||
|
int quantise(float cb[], float vec[], int d, int e, float *se);
|
||
|
void print_vec(float cb[], int d, int e);
|
||
|
void split(float cb[], int d, int b);
|
||
|
int gain_shape_quantise(float cb[], float vec[], int d, int e, float *se, float *best_gain);
|
||
|
|
||
|
/*-----------------------------------------------------------------------* \
|
||
|
|
||
|
MAIN
|
||
|
|
||
|
\*-----------------------------------------------------------------------*/
|
||
|
|
||
|
int main(int argc, char *argv[]) {
|
||
|
int d,e; /* dimension and codebook size */
|
||
|
float *vec; /* current vector */
|
||
|
float *cb; /* vector codebook */
|
||
|
float *cent; /* centroids for each codebook entry */
|
||
|
int *n; /* number of vectors in this interval */
|
||
|
int J; /* number of vectors in training set */
|
||
|
int ind; /* index of current vector */
|
||
|
float se; /* total squared error for this iteration */
|
||
|
float var; /* variance */
|
||
|
float var_1; /* previous variance */
|
||
|
float delta; /* improvement in distortion */
|
||
|
FILE *ftrain; /* file containing training set */
|
||
|
FILE *fvq; /* file containing vector quantiser */
|
||
|
int ret;
|
||
|
int i,j, finished, iterations;
|
||
|
float sd;
|
||
|
int var_n, bits, b, levels;
|
||
|
|
||
|
/* Interpret command line arguments */
|
||
|
|
||
|
if (argc < 5) {
|
||
|
printf("usage: %s TrainFile D(dimension) B(number of bits) VQFile [error.txt file]\n", argv[0]);
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
/* Open training file */
|
||
|
|
||
|
ftrain = fopen(argv[1],"rb");
|
||
|
if (ftrain == NULL) {
|
||
|
printf("Error opening training database file: %s\n",argv[1]);
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
/* determine k and m, and allocate arrays */
|
||
|
|
||
|
d = atoi(argv[2]);
|
||
|
bits = atoi(argv[3]);
|
||
|
e = 1<<bits;
|
||
|
printf("\n");
|
||
|
printf("dimension D=%d number of bits B=%d entries E=%d\n", d, bits, e);
|
||
|
vec = (float*)malloc(sizeof(float)*d);
|
||
|
cb = (float*)malloc(sizeof(float)*d*e);
|
||
|
cent = (float*)malloc(sizeof(float)*d*e);
|
||
|
n = (int*)malloc(sizeof(int)*d*e);
|
||
|
if (cb == NULL || cb == NULL || cent == NULL || vec == NULL) {
|
||
|
printf("Error in malloc.\n");
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
/* determine size of training set */
|
||
|
|
||
|
J = 0;
|
||
|
var_n = 0;
|
||
|
while(fread(vec, sizeof(float), d, ftrain) == (size_t)d) {
|
||
|
for(j=0; j<d; j++)
|
||
|
if (vec[j] != 0.0)
|
||
|
var_n++;
|
||
|
J++;
|
||
|
}
|
||
|
printf("J=%d sparse vectors in training set, %d non-zero values\n", J, var_n);
|
||
|
|
||
|
/* set up initial codebook from centroid of training set */
|
||
|
|
||
|
//#define DBG
|
||
|
|
||
|
zero(cent, d);
|
||
|
for(j=0; j<d; j++)
|
||
|
n[j] = 0;
|
||
|
rewind(ftrain);
|
||
|
#ifdef DBG
|
||
|
printf("initial codebook...\n");
|
||
|
#endif
|
||
|
for(i=0; i<J; i++) {
|
||
|
ret = fread(vec, sizeof(float), d, ftrain);
|
||
|
#ifdef DBG
|
||
|
print_vec(vec, d, 1);
|
||
|
#endif
|
||
|
acc(cent, vec, d);
|
||
|
for(j=0; j<d; j++)
|
||
|
if (vec[j] != 0.0)
|
||
|
n[j]++;
|
||
|
}
|
||
|
norm(cent, d, n);
|
||
|
memcpy(cb, cent, d*sizeof(float));
|
||
|
#ifdef DBG
|
||
|
printf("\n");
|
||
|
print_vec(cb, d, 1);
|
||
|
#endif
|
||
|
|
||
|
/* main loop */
|
||
|
|
||
|
printf("\n");
|
||
|
printf("bits Iteration delta var std dev\n");
|
||
|
printf("---------------------------------------\n");
|
||
|
|
||
|
for(b=1; b<=bits; b++) {
|
||
|
levels = 1<<b;
|
||
|
iterations = 0;
|
||
|
finished = 0;
|
||
|
delta = 0;
|
||
|
var_1 = 0.0;
|
||
|
|
||
|
split(cb, d, levels/2);
|
||
|
//print_vec(cb, d, levels);
|
||
|
|
||
|
do {
|
||
|
/* zero centroids */
|
||
|
|
||
|
for(i=0; i<levels; i++) {
|
||
|
zero(¢[i*d], d);
|
||
|
for(j=0; j<d; j++)
|
||
|
n[i*d+j] = 0;
|
||
|
}
|
||
|
|
||
|
//#define DBG
|
||
|
#ifdef DBG
|
||
|
printf("cb...\n");
|
||
|
print_vec(cb, d, levels);
|
||
|
printf("\n\nquantise...\n");
|
||
|
#endif
|
||
|
|
||
|
/* quantise training set */
|
||
|
|
||
|
se = 0.0;
|
||
|
rewind(ftrain);
|
||
|
for(i=0; i<J; i++) {
|
||
|
ret = fread(vec, sizeof(float), d, ftrain);
|
||
|
ind = quantise(cb, vec, d, levels, &se);
|
||
|
//ind = gain_shape_quantise(cb, vec, d, levels, &se, &best_gain);
|
||
|
//for(j=0; j<d; j++)
|
||
|
// if (vec[j] != 0.0)
|
||
|
// vec[j] += best_gain;
|
||
|
#ifdef DBG
|
||
|
print_vec(vec, d, 1);
|
||
|
printf(" ind %d se: %f\n", ind, se);
|
||
|
#endif
|
||
|
acc(¢[ind*d], vec, d);
|
||
|
for(j=0; j<d; j++)
|
||
|
if (vec[j] != 0.0)
|
||
|
n[ind*d+j]++;
|
||
|
}
|
||
|
|
||
|
#ifdef DBG
|
||
|
printf("cent...\n");
|
||
|
print_vec(cent, d, e);
|
||
|
printf("\n");
|
||
|
#endif
|
||
|
|
||
|
/* work out stats */
|
||
|
|
||
|
var = se/var_n;
|
||
|
sd = sqrt(var);
|
||
|
|
||
|
iterations++;
|
||
|
if (iterations > 1) {
|
||
|
if (var > 0.0) {
|
||
|
delta = (var_1 - var)/var;
|
||
|
}
|
||
|
else
|
||
|
delta = 0;
|
||
|
if (delta < DELTAQ)
|
||
|
finished = 1;
|
||
|
}
|
||
|
|
||
|
if (!finished) {
|
||
|
/* determine new codebook from centroids */
|
||
|
|
||
|
for(i=0; i<levels; i++) {
|
||
|
norm(¢[i*d], d, &n[i*d]);
|
||
|
memcpy(&cb[i*d], ¢[i*d], d*sizeof(float));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#ifdef DBG
|
||
|
printf("new cb ...\n");
|
||
|
print_vec(cent, d, e);
|
||
|
printf("\n");
|
||
|
#endif
|
||
|
|
||
|
printf("%2d %2d %4.3f %6.3f %4.3f\r",b,iterations, delta, var, sd);
|
||
|
fflush(stdout);
|
||
|
|
||
|
var_1 = var;
|
||
|
} while (!finished);
|
||
|
printf("\n");
|
||
|
}
|
||
|
|
||
|
|
||
|
//print_vec(cb, d, 1);
|
||
|
|
||
|
/* save codebook to disk */
|
||
|
|
||
|
fvq = fopen(argv[4],"wt");
|
||
|
if (fvq == NULL) {
|
||
|
printf("Error opening VQ file: %s\n",argv[4]);
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
fprintf(fvq,"%d %d\n",d,e);
|
||
|
for(j=0; j<e; j++) {
|
||
|
for(i=0; i<d; i++)
|
||
|
fprintf(fvq,"% 7.3f ", cb[j*d+i]);
|
||
|
fprintf(fvq,"\n");
|
||
|
}
|
||
|
fclose(fvq);
|
||
|
|
||
|
/* optionally dump error file for multi-stage work */
|
||
|
|
||
|
if (argc == 6) {
|
||
|
FILE *ferr = fopen(argv[5],"wt");
|
||
|
assert(ferr != NULL);
|
||
|
rewind(ftrain);
|
||
|
for(i=0; i<J; i++) {
|
||
|
ret = fread(vec, sizeof(float), d, ftrain);
|
||
|
ind = quantise(cb, vec, d, levels, &se);
|
||
|
for(j=0; j<d; j++) {
|
||
|
if (vec[j] != 0.0)
|
||
|
vec[j] -= cb[ind*d+j];
|
||
|
fprintf(ferr, "%f ", vec[j]);
|
||
|
}
|
||
|
fprintf(ferr, "\n");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/*-----------------------------------------------------------------------*\
|
||
|
|
||
|
FUNCTIONS
|
||
|
|
||
|
\*-----------------------------------------------------------------------*/
|
||
|
|
||
|
void print_vec(float cb[], int d, int e)
|
||
|
{
|
||
|
int i,j;
|
||
|
|
||
|
for(j=0; j<e; j++) {
|
||
|
printf(" ");
|
||
|
for(i=0; i<d; i++)
|
||
|
printf("% 7.3f ", cb[j*d+i]);
|
||
|
printf("\n");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/*---------------------------------------------------------------------------*\
|
||
|
|
||
|
FUNCTION....: zero()
|
||
|
|
||
|
AUTHOR......: David Rowe
|
||
|
DATE CREATED: 23/2/95
|
||
|
|
||
|
Zeros a vector of length d.
|
||
|
|
||
|
\*---------------------------------------------------------------------------*/
|
||
|
|
||
|
void zero(float v[], int d)
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
for(i=0; i<d; i++) {
|
||
|
v[i] = 0.0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*---------------------------------------------------------------------------*\
|
||
|
|
||
|
FUNCTION....: acc()
|
||
|
|
||
|
AUTHOR......: David Rowe
|
||
|
DATE CREATED: 23/2/95
|
||
|
|
||
|
Adds d dimensional vectors v1 to v2 and stores the result back
|
||
|
in v1.
|
||
|
|
||
|
An unused entry in a sparse vector is set to zero so won't
|
||
|
affect the accumulation process.
|
||
|
|
||
|
\*---------------------------------------------------------------------------*/
|
||
|
|
||
|
void acc(float v1[], float v2[], int d)
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
for(i=0; i<d; i++)
|
||
|
v1[i] += v2[i];
|
||
|
}
|
||
|
|
||
|
/*---------------------------------------------------------------------------*\
|
||
|
|
||
|
FUNCTION....: norm()
|
||
|
|
||
|
AUTHOR......: David Rowe
|
||
|
DATE CREATED: 23/2/95
|
||
|
|
||
|
Normalises each element in d dimensional vector.
|
||
|
|
||
|
\*---------------------------------------------------------------------------*/
|
||
|
|
||
|
void norm(float v[], int d, int n[])
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
for(i=0; i<d; i++) {
|
||
|
if (n[i] != 0)
|
||
|
v[i] /= n[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*---------------------------------------------------------------------------*\
|
||
|
|
||
|
FUNCTION....: quantise()
|
||
|
|
||
|
AUTHOR......: David Rowe
|
||
|
DATE CREATED: 23/2/95
|
||
|
|
||
|
Quantises vec by choosing the nearest vector in codebook cb, and
|
||
|
returns the vector index. The squared error of the quantised vector
|
||
|
is added to se.
|
||
|
|
||
|
Unused entries in sparse vectors are ignored.
|
||
|
|
||
|
\*---------------------------------------------------------------------------*/
|
||
|
|
||
|
int quantise(float cb[], float vec[], int d, int e, float *se)
|
||
|
{
|
||
|
float error; /* current error */
|
||
|
int besti; /* best index so far */
|
||
|
float best_error; /* best error so far */
|
||
|
int i,j;
|
||
|
float diff;
|
||
|
|
||
|
besti = 0;
|
||
|
best_error = 1E32;
|
||
|
for(j=0; j<e; j++) {
|
||
|
error = 0.0;
|
||
|
for(i=0; i<d; i++) {
|
||
|
if (vec[i] != 0.0) {
|
||
|
diff = cb[j*d+i] - vec[i];
|
||
|
error += diff*diff;
|
||
|
}
|
||
|
}
|
||
|
if (error < best_error) {
|
||
|
best_error = error;
|
||
|
besti = j;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
*se += best_error;
|
||
|
|
||
|
return(besti);
|
||
|
}
|
||
|
|
||
|
int gain_shape_quantise(float cb[], float vec[], int d, int e, float *se, float *best_gain)
|
||
|
{
|
||
|
float error; /* current error */
|
||
|
int besti; /* best index so far */
|
||
|
float best_error; /* best error so far */
|
||
|
int i,j,m;
|
||
|
float diff, metric, best_metric, gain, sumAm, sumCb;
|
||
|
|
||
|
besti = 0;
|
||
|
best_metric = best_error = 1E32;
|
||
|
for(j=0; j<e; j++) {
|
||
|
|
||
|
/* compute optimum gain */
|
||
|
|
||
|
sumAm = sumCb = 0.0;
|
||
|
m = 0;
|
||
|
for(i=0; i<d; i++) {
|
||
|
if (vec[i] != 0.0) {
|
||
|
m++;
|
||
|
sumAm += vec[i];
|
||
|
sumCb += cb[j*d+i];
|
||
|
}
|
||
|
}
|
||
|
gain = (sumAm - sumCb)/m;
|
||
|
|
||
|
/* compute error */
|
||
|
|
||
|
metric = error = 0.0;
|
||
|
for(i=0; i<d; i++) {
|
||
|
if (vec[i] != 0.0) {
|
||
|
diff = vec[i] - cb[j*d+i] - gain;
|
||
|
error += diff*diff;
|
||
|
metric += diff*diff;
|
||
|
}
|
||
|
}
|
||
|
if (metric < best_metric) {
|
||
|
best_error = error;
|
||
|
best_metric = metric;
|
||
|
*best_gain = gain;
|
||
|
besti = j;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
*se += best_error;
|
||
|
|
||
|
return(besti);
|
||
|
}
|
||
|
|
||
|
void split(float cb[], int d, int levels)
|
||
|
{
|
||
|
int i,j;
|
||
|
|
||
|
for (i=0;i<levels;i++) {
|
||
|
for (j=0;j<d;j++) {
|
||
|
float delta = .01*(rand()/(float)RAND_MAX-.5);
|
||
|
cb[i*d+j] += delta;
|
||
|
cb[(i+levels)*d+j] = cb[i*d+j] - delta;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|