/*
    mksam.c is copyright (c) 2006-07 Volker Schatz (noise at volkerschatz 
    dot com).

    mksam.c is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.

    mksam.c is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    A copy of the GNU General Public License can be obtained from the 
    website of the Free Software Foundation, http://www.gnu.org/licenses/.
*/

// compile with:  gcc -O3 -o mksam mksam.c -lfftw3 -lm
// or: icc -O3 -o mksam mksam.c -lfftw3 -lm
// or for Core2Duo: icc -O3 -xT -o mksam mksam.c -lfftw3 -lm

// If you want to set the number of threads mksam uses explicitly, uncomment
// and set the following macro:
//#define MKSAM_THREADS		1
// Otherwise, the number of threads will be set to the number of processors
// determined with sysconf(_SC_NPROCESSORS_ONLN) plus 1 (on multi-processor
// systems).  In my experience, this yielded slightly but consistently better
// performance than a number of threads equal to the number of processors (at
// least on my dual-core box under Linux).

#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <complex.h>
#include <fftw3.h>
#include <math.h>
#include <float.h>
#include <limits.h>
#include <time.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#include <getopt.h>
#include <pthread.h>


typedef struct {
  unsigned int      RIFF;
  unsigned int      totalsizem8;
  unsigned int      WAVE;
  unsigned int      fmt;
  unsigned int      fmtsize;
  unsigned short    audiofmt;
  unsigned short    nchannels;
  unsigned int      samplerate;
  unsigned int      byterate;
  unsigned short    blockalign;
  unsigned short    bitspersample;
  unsigned int      data;
  unsigned int      restsize;
}
wavheader;

typedef struct {
  double	desttime;
  char		*phasefile;
  int		phasefileex;
  int		samprate;
  int		firstsam, lastp1sam;
  int		lininter, lorspread;
  int		phlininter;
  double	lorwidth, lorwexp;
}
mksamopts;

struct convl_common {
  double *dsource, *ddest, *dsrctop;
  double fstep, coeffexp, coeff1Hz, sqrt1_eps;
};
struct convl {
  pthread_t thread;
  const struct convl_common *common;
  int minind, maxind, stride;
};


void usageexit();
void opterrexit();
void generatephases(double *dest, int n);
double *readsourcewav( const char *name, int firstsam, int lastp1sam, int *length, unsigned *samprate );
void interpolphases(fftw_complex *data, int srclen, double *dest, int destlen);
double cphase( fftw_complex *number );
double linintphase(double phase0, double phase1, double frac);
void absspectrum(fftw_complex *data, int len);
void spreadspectrum(fftw_complex *data, int srclen, int destlen);
void interpolspectrum(fftw_complex *data, int srclen, int destlen);
void convlorentzian(fftw_complex *source, fftw_complex *dest, int length, double fstep, double width1Hz, double widthexp);
void *convl_part(void *arg);
void applyphases(double *phases, fftw_complex *data, int len);
int writeresult(double *pcmdata, int len, const char *srcname, int srate);


int main(int argc, char **argv)
{
  static struct option longopts[]= {
    {"help", 0, NULL, 'h'}, { NULL, 0, NULL, 0} };

  mksamopts options;
  fftw_plan planr2c, planc2r;
  fftw_complex *fourdomain, *convbuf;
  struct stat statstr;
  FILE *phasefp;
  double *timedomain, *phases, *sourcedata;
  char *endoptarg;
  double interind;
  unsigned wavsamprate;
  int argind, nsamples, sourcelen, optc;

  options.desttime= -1.;
  options.phasefile= NULL;
  options.samprate= 0;
  options.firstsam= 0;
  options.lastp1sam= 0;
  options.phlininter= 0;
  options.lininter= 0;
  options.lorspread= 0;
  options.lorwidth= 0.001;
  options.lorwexp= 1.;
  if( argc==1 )
    usageexit();
  while( (optc= getopt_long(argc, argv, "ht:p:r:s:e:ijlw:x:", longopts, NULL))>=0 )
  {
    endoptarg= NULL;
    switch( optc )
    {
    case 'h':usageexit();
	     break;
    case 't':options.desttime= strtod(optarg, &endoptarg);
	     break;
    case 'p':options.phasefile= optarg;
	     break;
    case 'r':options.samprate= strtol(optarg, &endoptarg, 0);
	     break;
    case 's':options.firstsam= strtol(optarg, &endoptarg, 0);
	     break;
    case 'e':options.lastp1sam= strtol(optarg, &endoptarg, 0);
	     break;
    case 'j':options.phlininter= 1;
	     break;
    case 'i':options.lininter= 1;
	     break;
    case 'l':options.lorspread= 1;
	     break;
    case 'w':options.lorwidth= strtod(optarg, &endoptarg);
	     break;
    case 'x':options.lorwexp= strtod(optarg, &endoptarg);
	     break;
    default:
    case '?':opterrexit();
	     break;
    }
    if( endoptarg && *endoptarg ) {
      fprintf(stderr, "Could not parse all of `%s'.\n", optarg);
      opterrexit();
    }
  }
  argind= optind;
  if( argind >= argc ) {
    fprintf(stderr, "No input files given.\n");
    opterrexit();
  }

  if( options.phasefile ) {
    options.phasefileex= !stat(options.phasefile, &statstr);
    if( !options.phasefileex )
      if( errno != ENOENT ) {
	perror("Error trying to \"stat\" phase file");
	return -1;
      }
      else if( options.desttime < 0 )
	options.desttime= 10.;
      else;
    else {
      if( options.phlininter ) {
	fprintf(stderr, "The option -j precludes the use of an existing phase file (`%s').  Aborting.\n", options.phasefile);
	return -1;
      }
      if( options.desttime > 0 && 
	(int)trunc(options.desttime*options.samprate+0.5)/2+1 != statstr.st_size/sizeof(double)/2 ) {
	fprintf(stderr, "Length of phase file does not match requested output file length and sampling rate!\n");
	return -1;
      }
      options.desttime= (statstr.st_size/sizeof(double)/2 - 1)*2 / (double)options.samprate;
    }
  }
  else if( options.desttime < 0 )
    options.desttime= 10.;
  sourcedata= readsourcewav(argv[argind], options.firstsam, options.lastp1sam, &sourcelen, &wavsamprate);
  if( !sourcedata ) {
    fprintf(stderr, "mksam: error reading input file `%s'.\n", argv[argind]);
    return -1;
  }
  options.samprate= wavsamprate;
  nsamples= (int)trunc(options.desttime*options.samprate+0.5);
  if( options.lastp1sam > nsamples )
    options.lastp1sam= nsamples;
  fourdomain= fftw_malloc((nsamples/2+1)*sizeof(fftw_complex));
  timedomain= fftw_malloc(nsamples*sizeof(double));
  phases= malloc(2*(nsamples/2+1)*sizeof(double));
  if( !fourdomain || !timedomain || !phases ) {
    fprintf(stderr, "mksam: Could not allocate space for Fourier transform.\n");
    return -1;
  }
  if( !options.phlininter )
    if( options.phasefile )
      if( !options.phasefileex ) {
	generatephases(phases, nsamples/2+1);
	phasefp= fopen(options.phasefile, "w");
	if( phasefp )
	  fwrite(phases, 2*sizeof(double), nsamples/2+1, phasefp);
	if( !phasefp || ferror(phasefp) ) {
	  perror("Could not write phase file");
	  return -1;
	}
	fclose(phasefp);
      }
      else {
	phasefp= fopen(options.phasefile, "r");
	if( phasefp )
	  fread(phases, 2*sizeof(double), nsamples/2+1, phasefp);
	if( !phasefp || ferror(phasefp) ) {
	  perror("Could not read phase file");
	  return -1;
	}
	fclose(phasefp);
      }
    else
      generatephases(phases, nsamples/2+1);
  if( options.lorspread && !(convbuf= malloc(2*(nsamples/2+1)*sizeof(double))) ) {
    fprintf(stderr, "mksam: Could not allocate frequency spread convolution buffer.\n");
    return -1;
  }
  planc2r= fftw_plan_dft_c2r_1d(nsamples, fourdomain, timedomain, FFTW_ESTIMATE);

  while( argind < argc )
  {
    if( !sourcedata ) {
      sourcedata= readsourcewav(argv[argind], options.firstsam, options.lastp1sam, &sourcelen, &wavsamprate);
      if( !sourcedata ) {
	fprintf(stderr, "mksam: error reading input file `%s'.\n", argv[argind]);
	return -1;
      }
      if( sourcelen > nsamples ) {
	fprintf(stderr, "mksam: input file `%s' is longer than output interval.\n", argv[argind]);
	return -1;
      }
      if( wavsamprate != options.samprate )
	fprintf(stderr, "mksam: warning: sampling rate of input file `%s' (%d) deviates from expected (%d).  Ignored.\n", argv[argind], wavsamprate, options.samprate );
    }
    memset(fourdomain, 0, sizeof(fftw_complex)*(nsamples/2+1));
    planr2c= fftw_plan_dft_r2c_1d(sourcelen, sourcedata, fourdomain, FFTW_ESTIMATE);
    fftw_execute(planr2c);
    fftw_destroy_plan(planr2c);
    free(sourcedata);
    if( options.phlininter ) {
      interpolphases(fourdomain, sourcelen/2+1, phases, nsamples/2+1);
      if( options.phasefile ) {
	phasefp= fopen(options.phasefile, "w");
	if( phasefp )
	  fwrite(phases, 2*sizeof(double), nsamples/2+1, phasefp);
	if( !phasefp || ferror(phasefp) ) {
	  perror("Could not write phase file");
	  return -1;
	}
	fclose(phasefp);
	options.phasefile= NULL;
      }
    }
    absspectrum(fourdomain, sourcelen/2+1);
    if( options.lininter )
      interpolspectrum(fourdomain, sourcelen/2+1, nsamples/2+1);
    else
      spreadspectrum(fourdomain, sourcelen/2+1, nsamples/2+1);
    if( options.lorspread ) {
      convlorentzian(fourdomain, convbuf, nsamples/2+1, 1.0/options.desttime, options.lorwidth, options.lorwexp);
      memcpy(fourdomain, convbuf, (nsamples/2+1)*2*sizeof(double));
    }
    applyphases(phases, fourdomain, nsamples/2+1);
    fftw_execute(planc2r);
    if( writeresult(timedomain, nsamples, argv[argind], options.samprate) ) {
      fprintf(stderr, "mksam: error writing output file for `%s'.\n", argv[argind]);
      return -1;
    }
    ++argind;
    sourcedata= NULL;
  }
  fftw_destroy_plan(planc2r);
  if( options.lorspread )
    free(convbuf);
  return 0;
}


// Print usage message and exit.

void usageexit()
{
  printf("Usage: mksam [-t <output file duration>] [-r <sampling rate>] [-s <first sample>] [-e <one-past-last sample>] [-i] [-j] [-l] [-w <width>] [-x <exponent>] [-p <phase file>] <input WAV file> ...\n");
  printf("Creates long loopable samples from input WAV files.  This is done by "
      "Fourier-transforming, randomising the coefficients' phases and reverse "
      "transforming the data.  The phases remain the same for all output files "
      "to allow smoothly fading between them.  The source files may be 8 or 16 "
      "bit WAV but must have the same sampling rate; the output files are 16 "
      "bit, and no resampling is done.  The range of values is scaled to the "
      "whole 16 bits in the output files.  The output file names are the input"
      " file names appended by \"_l\" before the \".wav\" extension.  The "
      "input files have to be shorter or of equal length compared to the "
      "output file duration time.  If input files have multiple channels, only"
      " the first will be processed.\n");
  printf("Options:\n"
      "-t\tLength in seconds of the output samples.  A few seconds at least is "
      "recommended to avoid the result sounding artificial. (Default: 10 s)\n"
      "-p\tName of a file to store the phases of the output files.  If it "
      "exists, it is read; if it doesn't, the phases are generated and written "
      "to that file.  If the phase file exists and the -t option is also given,"
      "the lengths have to correspond.\n"
      "-r\tSampling rate in Hz.  All input files are expected to have been "
      "recorded/generated with this sampling rate.  The default is the rate "
      "in the WAV header of the first input file.  If the rates of subsequent "
      "files differ, a warning is output, but no resampling is performed.\n"
      "-s\tFirst sample of input files to use.  All input files have to contain"
      " the range between the first and last sample.  If this option and the -e"
      " option are not given, the default is always to use the whole input "
      "files, which may be of different lengths.  Negative values given with -s"
      " are interpreted with respect to the end of the files.\n"
      "-e\tFirst sample after the range to use.  Zero or negative values are "
      "interpreted with respect to the end of the files.\n"
      "-i\tInterpolate the Fourier coefficients linearly to obtain intermediate"
      " frequencies contained in the longer output file but not in the source "
      "files.  Otherwise these frequencies' coefficients are zeroed.\n"
      "-j\tInterpolate the phases of the Fourier modes linearly rather than "
      "randomising them.  The interpolation is performed along the shortest "
      "path between two angles on the unit circle.  If this option is "
      "activated, -p cannot be given with an existing phase file.  If -p is"
      " given with a new file name, the phases of the first file are written to"
      " it.\n"
      "-l\tSpread the peaks in the frequency spectrum by convoluting with a "
      "Lorentzian in frequency space.  This creates a polyphonous, choir-like "
      "sound and is an idea of Nasca Octavian Paul (see "
      "http://en.wikibooks.org/wiki/PADsynth_synthesis_algorithm and "
      "http://zynaddsubfx.sourceforge.net/).\n"
      "-w\tWidth of the Lorentzian in Hz at a frequency of 1Hz, defined as w in"
      " 1/(1 + (f-f0)^2 / w^2).  Default: 0.001.\n"
      "-x\tExponent with which the width changes with frequency.  According to "
      "Nasca Paul, a linear dependence is good for acoustical instruments, so "
      "the default is 1.\n"
      "Recommended accessories: Use sox or snd to resample or cut input files "
      "if necessary.\n"
      "Legal note: Extracting samples from copyrighted work and using them to "
      "produce your own music for sale is illegal.\n");
  exit(0);
}


// Print brief note before exiting with error (-1).

void opterrexit()
{
  fprintf(stderr, "Call mksam with option \"-h\" or \"--help\" or without arguments for usage instructions.\n");
  exit(-1);
}


// Write sine and cosine values of random phases to array dest which provides
// space for 2*n (!) double values.  The first and last pair of values are left
// out.

void generatephases(double *dest, int n)
{
  char randstate[256];
  char *oldstate;
  double *write;
  double phase;

  initstate(time(NULL), randstate, 256);
  oldstate= setstate(randstate);
  for( write= dest+2*n-4; write> dest; write -= 2 ) {
    phase= ((double)random()/(double)RAND_MAX + 
	(double)random()/(double)RAND_MAX/(double)RAND_MAX)*2.0*M_PI;
    sincos(phase, write+1, write);
  }
  setstate(oldstate);
}


// Read a WAV file and convert it to double values.  firstsam and lastp1sam
// give the range of samples in the file to read (with #lastp1sam not included).
// Return a pointer to the allocated buffer and its length in length.

double *readsourcewav( const char *name, int firstsam, int lastp1sam, int *length, unsigned *samprate )
{
  FILE *fp;
  wavheader head;
  double *data, *write;
  char *fdata, *read, *fend;
  int len;

  fp= fopen(name, "r");
  if( !fp )
    return NULL;
  fread(&head, sizeof(wavheader), 1L, fp);
  if( ferror(fp) ) {
    fclose(fp);
    return NULL;
  }
  if( head.RIFF!='FFIR' || head.WAVE!='EVAW' || head.fmt!=' tmf' ) {
    fprintf(stderr, "`%s' does not seem to be a WAV file.\n", name);
    fclose(fp);
    return NULL;
  }
  if( head.audiofmt != 1 || 
      (head.bitspersample != 8 && head.bitspersample != 16) ) {
    fprintf(stderr, "`%s' does not contain 8- or 16-bit PCM data.\n", name);
    fclose(fp);
    return NULL;
  }
  while( head.data!='atad' && !feof(fp) && !ferror(fp) ) {
    fseek( fp, head.restsize, SEEK_CUR );
    fread( &head.data, sizeof(unsigned long), 2L, fp );
  }
  if( feof(fp) || ferror(fp) ) {
    fprintf(stderr, "No data chunk found in `%s'.\n", name);
    fclose(fp);
    return NULL;
  }
  if( firstsam < 0 )
    firstsam += head.restsize/head.blockalign;
  if( lastp1sam <= 0 )
    lastp1sam += head.restsize/head.blockalign;
  if( firstsam >= head.restsize/head.blockalign || firstsam < 0 ||
	lastp1sam > head.restsize/head.blockalign || lastp1sam <= 0 ) {
    fprintf(stderr, "`%s' does not contain requested range of samples.\n", name);
    fclose(fp);
    return NULL;
  }
  if( lastp1sam <= firstsam ) {
    fprintf(stderr, "The last requested sample in `%s' lies before the first.\n", name);
    fclose(fp);
    return NULL;
  }
  if( firstsam>0 )
    fseek(fp, firstsam*head.blockalign, SEEK_CUR);
  len= lastp1sam - firstsam;
  data= fftw_malloc(sizeof(double) * len);
  fdata= malloc(head.blockalign * len);
  if( !data || !fdata ) {
    fftw_free(data);
    free(fdata);
    fclose(fp);
    return NULL;
  }
  fread(fdata, head.blockalign * len, 1L, fp);
  if( ferror(fp) ) {
    fftw_free(data);
    free(fdata);
    fclose(fp);
    return NULL;
  }
  fclose(fp);
  fend= fdata + len*head.blockalign;
  write= data;
  if( head.bitspersample==16 )
    for( read= fdata; read< fend; read += head.blockalign )
      *write++ = (double)*(signed short*)read;
  else
    for( read= fdata; read< fend; read += head.blockalign )
      *write++ = (double)*(unsigned char*)read;
  free(fdata);
  *samprate= head.samplerate;
  *length= len;
  return data;
}


// Create a phase array (containing cosine and sine values of each phase) by
// interpolating the phases of a Fourier-transformed complex array.  The
// complex numbers in the source array need not be normalised.

void interpolphases(fftw_complex *data, int srclen, double *dest, int destlen)
{
  double *write;
  double indscale, fltind, fracind, phase, phase1;
  int ind, maxind, oldind, ind1;

  write= dest;
  indscale= (double)(srclen-1)/(double)(destlen-1);
  oldind= ind1= -1;
  while( write < dest+destlen-1 )
  {
    fltind= indscale*(write-dest);
    ind= (int)floor(fltind);
    fracind= fltind-(double)ind;
    if( !fracind || ind>=destlen-1 ) {
      if( ind==ind1 )
	phase= phase1;
      else
	phase= cphase(data+ind);
      sincos(phase, write+1, write);
    }
    else {
      if( ind==oldind )
	if( ind1!=ind+1 ) {
	  phase1= cphase(data+ind+1);
	  ind1= ind+1;
	}
	else;
      else
	if( ind==ind1 ) {
	  phase= phase1;
	  phase1= cphase(data+ind+1);
	  ind1= ind+1;
	}
	else {
	  phase= cphase(data+ind);
	  phase1= cphase(data+ind+1);
	  ind1= ind+1;
	} 
      sincos(linintphase(phase, phase1, fracind), write+1, write);
    }
    oldind= ind;
    write += 2;
  }
}

// Return the phase (argument) of an fftw_complex number. 

double cphase( fftw_complex *number )
{
  return carg(*(double complex *)number);
}

// Interpolate between two phases along the shortest way on the unit circle.
// Both the given phases have to be in [-pi,pi] (so is the result).

double linintphase(double phase0, double phase1, double frac)
{
  double diff;

  diff= phase1-phase0;
  if( diff > M_PI ) {
    diff -= 2*M_PI;
    phase0 += diff*frac;
    if( phase0 < -M_PI )
      phase0 += 2*M_PI;
  }
  else if( diff < -M_PI ) {
    diff += 2*M_PI;
    phase0 += diff*frac;
    if( phase0 > M_PI )
      phase0 -= 2*M_PI;
  }
  else
    phase0 += diff*frac;
  return phase0;
}


// Replace real part of Fourier coefficients by their modulus and zero their
// imaginary part.
// The first and last values are left unchanged, as they are (and should
// remain) real in a transform of real data (assuming an even number of values).

void absspectrum(fftw_complex *data, int len)
{
  double *ddata;

  for( ddata= (double*)(data+len-2); ddata > (double*)data; ddata-=2 ) {
    ddata[0]= sqrt(ddata[0]*ddata[0] + ddata[1]*ddata[1]);
    ddata[1]= 0.0;
  }
}


// Spread the first srclen complex values over the whole array of length
// destlen.  All values except the srclen first are expected to be zero and
// are left unchanged.

void spreadspectrum(fftw_complex *data, int srclen, int destlen)
{
  double *dread;
  double index, step;
  int nextind, blankind;

  dread= (double*)(data+srclen-1);
  step= (double)(destlen-1)/(double)(srclen-1);
  for( index= destlen-0.5; index >= srclen; index -= step, dread -= 2 ) {
    nextind= (int)trunc(index);
    ((double*)(data+nextind))[0]= dread[0];
  }
  blankind= (int)trunc(index+step) - 1;
  for( ; index >= 1.0; index -= step, dread -= 2 ) {
    nextind= (int)trunc(index);
    for( ; blankind> nextind; --blankind ) {
      ((double*)(data+blankind))[0] = 0.0;
    }
    ((double*)(data+nextind))[0]= dread[0];
    blankind= nextind-1;
  }
}


// Spread the first srclen complex values over the whole array of length
// destlen by interpolating linearly.

void interpolspectrum(fftw_complex *data, int srclen, int destlen)
{
  fftw_complex *write;
  double indscale, fltind, fracind;
  int ind;

  write= data+destlen-1;
  indscale= (double)(srclen-1)/(double)(destlen-1);
  while( write > data )
  {
    fltind= indscale * (write-data);
    ind= (int)trunc(fltind);
    fracind= fltind - ind;
    if( fracind == 0.0 ) {
      ((double*)write)[0] = ((double*)(data+ind))[0];
    }
    else {
      double *base= (double*)(data+ind);
      ((double*)write)[0]= base[0] + fracind*(base[2]-base[0]);
    }
    --write;
  }
}


//  Convolute complex frequency spectrum with a lorentzian.  length is the
//  number of complex numbers stored in source; dest has to provide space for
//  as many.  fstep is the frequency increment between successive values.
//  The remaining parameters are the width of the lorentzian at 1Hz and the
//  exponent with which it changes with frequency.

void convlorentzian(fftw_complex *source, fftw_complex *dest, int length, double fstep, double width1Hz, double widthexp)
{
  struct convl_common commondata;
  struct convl *threads;
  int nthreads, thrd, result;

  commondata.dsource= (double*)source;
  commondata.dsrctop= commondata.dsource+2*length;
  commondata.ddest= (double*)dest;
  commondata.fstep= fstep;
  commondata.coeffexp= -widthexp;
  commondata.coeff1Hz= fstep/width1Hz;
  commondata.sqrt1_eps= 1.0/sqrt(1e-7);
  commondata.ddest[0]= commondata.dsource[0];
  commondata.ddest[1]= commondata.dsource[1];

#ifdef MKSAM_THREADS
  nthreads= MKSAM_THREADS;
#else
  nthreads= sysconf(_SC_NPROCESSORS_ONLN);
  if( nthreads> 1 )
    ++nthreads;
#endif
  threads= malloc(nthreads*sizeof(struct convl));
  if( !threads ) {
    fprintf(stderr, "convlorentzian: Could not allocate memory for thread work-sharing.\n");
    exit(-1);
  }
  for( thrd= 0; thrd< nthreads; ++thrd ) {
    threads[thrd].common= &commondata;
    threads[thrd].minind= thrd+1;
    threads[thrd].maxind= length-1;
    threads[thrd].stride= nthreads;
  }

  for( thrd= 1; thrd< nthreads; ++thrd )
    if( pthread_create(&threads[thrd].thread, NULL, convl_part, threads+thrd) )
    {
      fprintf(stderr, "convlorentzian: Could not create all other threads.  Aborting.\n");
      for( --thrd, result= 0; thrd> 0; --thrd )
	result |= pthread_cancel(threads[thrd].thread);
      if( result )
	fprintf(stderr, "Error cancelling threads.\n");
      exit(-1);
    }

  convl_part(threads);

  for( thrd= 1, result= 0; thrd< nthreads; ++thrd )
    result |= pthread_join(threads[thrd].thread, NULL);
  if( result )
    fprintf(stderr, "convlorentzian: Error joining other threads.  Continuing.\n");
}

void *convl_part(void *arg)
{
  struct convl *set;
  const struct convl_common *common;
  double *write, *read;
  double coeff, csum;
  int freq, convind, convmax;

  set= (struct convl *)arg;
  common= set->common;
  read= common->dsource+2*set->minind;
  write= common->ddest+2*set->minind;
  for( freq= set->minind; freq<=set->maxind; freq += set->stride )
  {
    pthread_testcancel();
    coeff= common->coeff1Hz * pow(freq*common->fstep, common->coeffexp); // -> -df/w(f)
    if( common->sqrt1_eps/coeff > (double)(INT_MAX-1) )
      convmax= INT_MAX;
    else
      convmax= (int)ceil(common->sqrt1_eps/coeff);
    convind= -convmax;
    if( convind < -(read-common->dsource)/2 )
      convind= -(read-common->dsource)/2;
    if( convmax >= (common->dsrctop-read)/2-1 )
      convmax= (common->dsrctop-read)/2 - 1;
    csum= 0.0;
    write[0]= 0.0;
    while( convind <= convmax )
    {
      double fact= 1.0/(1.0 + (coeff*convind)*(coeff*convind));
      csum += fact;
      write[0] += fact * read[2*convind];
      ++convind;
    }
    write[0] /= csum;
    read += 2*set->stride;
    write += 2*set->stride;
  }
  return NULL;
}


// Set the phases of the complex data to that of the unit modulus numbers
// stored in phases.  The first and last values are left out.  len is the
// number of complex values stored in data and half the number of real values
// in phases.

void applyphases(double *phases, fftw_complex *data, int len)
{
  double *ddata, *write, *read;

  ddata= (double *)data;
  for( read= phases+2*len-4, write= ddata+2*len-4; write> ddata;
      read -= 2, write -= 2 )
  {
    write[1]= write[0]*read[1];
    write[0]= write[0]*read[0];
  }
}


// Write result data to a 16-bit WAV file after converting them from double.
// The output file name is the source file name with "_l" appended before the
// .wav extension.

int writeresult(double *pcmdata, int len, const char *srcname, int srate)
{
  wavheader head;
  FILE *out;
  double *read;
  signed short *fbuf, *write;
  char *fnamebuf;
  double min, max, scale;
  int fnlen;

  min= max= *pcmdata;
  for( read= pcmdata+len-1; read> pcmdata; --read )
    if( *read > max )
      max= *read;
    else if( *read < min )
      min= *read;
//  printf("min %g max %g\n", min, max );
  scale= (double)0xFFFF / (max - min);
  fnlen= strlen(srcname);
  if( !strcmp(srcname+fnlen-4, ".wav") ) {
    fnamebuf= malloc(fnlen+3);
    if( fnamebuf ) {
      strncpy(fnamebuf, srcname, fnlen-4);
      strncpy(fnamebuf+fnlen-4, "_l.wav", 7);
    }
  }
  else {
    fnamebuf= malloc(fnlen+7);
    if( fnamebuf )
      snprintf(fnamebuf, fnlen+7, "%s_l.wav", srcname);
  }
  fbuf= malloc(len*sizeof(signed short));
  if( !fbuf || !fnamebuf ) {
    fprintf(stderr, "Could not allocate buffers for writing data to disk.\n");
    free(fbuf);
    free(fnamebuf);
    return -1;
  }
  out= fopen(fnamebuf, "w");
  if( !out ) {
    fprintf(stderr, "Could not create output file `%s'.\n", fnamebuf);
    free(fnamebuf);
    free(fbuf);
    return -1;
  }
  free(fnamebuf);
  head.RIFF= 'FFIR';
  head.totalsizem8= 2*len + sizeof(wavheader) - 8;
  head.WAVE= 'EVAW';
  head.fmt= ' tmf';
  head.fmtsize= 16;
  head.audiofmt= 1;
  head.nchannels= 1;
  head.samplerate= srate;
  head.byterate= 2*srate;
  head.blockalign= 2;
  head.bitspersample= 16;
  head.data= 'atad';
  head.restsize= 2*len;
  fwrite(&head, sizeof(wavheader), 1L, out);
  if( ferror(out) ) {
    fprintf(stderr, "Error writing WAV header.\n");
    free(fbuf);
    return -1;
  }
  for( read= pcmdata+len-1, write= fbuf+len-1; write>= fbuf; --read, --write )
    *write= (signed short)(scale*(*read-min) + SHRT_MIN);
  fwrite(fbuf, 2, len, out);
  free(fbuf);
  if( ferror(out) ) {
    fprintf(stderr, "Error writing data to disk.\n");
    return -1;
  }
  fclose(out);
  return 0;
}





