fir.h

00001 /*
00002  * SpanDSP - a series of DSP components for telephony
00003  *
00004  * fir.h - General telephony FIR routines
00005  *
00006  * Written by Steve Underwood <steveu@coppice.org>
00007  *
00008  * Copyright (C) 2002 Steve Underwood
00009  *
00010  * All rights reserved.
00011  *
00012  * This program is free software; you can redistribute it and/or modify
00013  * it under the terms of the GNU Lesser General Public License version 2.1,
00014  * as published by the Free Software Foundation.
00015  *
00016  * This program is distributed in the hope that it will be useful,
00017  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019  * GNU Lesser General Public License for more details.
00020  *
00021  * You should have received a copy of the GNU Lesser General Public
00022  * License along with this program; if not, write to the Free Software
00023  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00024  *
00025  * $Id: fir.h,v 1.13 2008/04/17 14:27:00 steveu Exp $
00026  */
00027 
00028 /*! \page fir_page FIR filtering
00029 \section fir_page_sec_1 What does it do?
00030 ???.
00031 
00032 \section fir_page_sec_2 How does it work?
00033 ???.
00034 */
00035 
00036 #if !defined(_SPANDSP_FIR_H_)
00037 #define _SPANDSP_FIR_H_
00038 
00039 #if defined(USE_MMX)  ||  defined(USE_SSE2)
00040 #include "mmx.h"
00041 #endif
00042 
00043 /*!
00044     16 bit integer FIR descriptor. This defines the working state for a single
00045     instance of an FIR filter using 16 bit integer coefficients.
00046 */
00047 typedef struct
00048 {
00049     int taps;
00050     int curr_pos;
00051     const int16_t *coeffs;
00052     int16_t *history;
00053 } fir16_state_t;
00054 
00055 /*!
00056     32 bit integer FIR descriptor. This defines the working state for a single
00057     instance of an FIR filter using 32 bit integer coefficients, and filtering
00058     16 bit integer data.
00059 */
00060 typedef struct
00061 {
00062     int taps;
00063     int curr_pos;
00064     const int32_t *coeffs;
00065     int16_t *history;
00066 } fir32_state_t;
00067 
00068 /*!
00069     Floating point FIR descriptor. This defines the working state for a single
00070     instance of an FIR filter using floating point coefficients and data.
00071 */
00072 typedef struct
00073 {
00074     int taps;
00075     int curr_pos;
00076     const float *coeffs;
00077     float *history;
00078 } fir_float_state_t;
00079 
00080 #if defined(__cplusplus)
00081 extern "C"
00082 {
00083 #endif
00084 
00085 static __inline__ const int16_t *fir16_create(fir16_state_t *fir,
00086                                               const int16_t *coeffs,
00087                                               int taps)
00088 {
00089     fir->taps = taps;
00090     fir->curr_pos = taps - 1;
00091     fir->coeffs = coeffs;
00092 #if defined(USE_MMX)  ||  defined(USE_SSE2)
00093     if ((fir->history = malloc(2*taps*sizeof(int16_t))))
00094         memset(fir->history, 0, 2*taps*sizeof(int16_t));
00095 #else
00096     if ((fir->history = (int16_t *) malloc(taps*sizeof(int16_t))))
00097         memset(fir->history, 0, taps*sizeof(int16_t));
00098 #endif
00099     return fir->history;
00100 }
00101 /*- End of function --------------------------------------------------------*/
00102 
00103 static __inline__ void fir16_flush(fir16_state_t *fir)
00104 {
00105 #if defined(USE_MMX)  ||  defined(USE_SSE2)
00106     memset(fir->history, 0, 2*fir->taps*sizeof(int16_t));
00107 #else
00108     memset(fir->history, 0, fir->taps*sizeof(int16_t));
00109 #endif
00110 }
00111 /*- End of function --------------------------------------------------------*/
00112 
00113 static __inline__ void fir16_free(fir16_state_t *fir)
00114 {
00115     free(fir->history);
00116 }
00117 /*- End of function --------------------------------------------------------*/
00118 
00119 static __inline__ int16_t fir16(fir16_state_t *fir, int16_t sample)
00120 {
00121     int i;
00122     int32_t y;
00123 #if defined(USE_MMX)
00124     mmx_t *mmx_coeffs;
00125     mmx_t *mmx_hist;
00126 
00127     fir->history[fir->curr_pos] = sample;
00128     fir->history[fir->curr_pos + fir->taps] = sample;
00129 
00130     mmx_coeffs = (mmx_t *) fir->coeffs;
00131     mmx_hist = (mmx_t *) &fir->history[fir->curr_pos];
00132     i = fir->taps;
00133     pxor_r2r(mm4, mm4);
00134     /* 8 samples per iteration, so the filter must be a multiple of 8 long. */
00135     while (i > 0)
00136     {
00137         movq_m2r(mmx_coeffs[0], mm0);
00138         movq_m2r(mmx_coeffs[1], mm2);
00139         movq_m2r(mmx_hist[0], mm1);
00140         movq_m2r(mmx_hist[1], mm3);
00141         mmx_coeffs += 2;
00142         mmx_hist += 2;
00143         pmaddwd_r2r(mm1, mm0);
00144         pmaddwd_r2r(mm3, mm2);
00145         paddd_r2r(mm0, mm4);
00146         paddd_r2r(mm2, mm4);
00147         i -= 8;
00148     }
00149     movq_r2r(mm4, mm0);
00150     psrlq_i2r(32, mm0);
00151     paddd_r2r(mm0, mm4);
00152     movd_r2m(mm4, y);
00153     emms();
00154 #elif defined(USE_SSE2)
00155     xmm_t *xmm_coeffs;
00156     xmm_t *xmm_hist;
00157 
00158     fir->history[fir->curr_pos] = sample;
00159     fir->history[fir->curr_pos + fir->taps] = sample;
00160 
00161     xmm_coeffs = (xmm_t *) fir->coeffs;
00162     xmm_hist = (xmm_t *) &fir->history[fir->curr_pos];
00163     i = fir->taps;
00164     pxor_r2r(xmm4, xmm4);
00165     /* 16 samples per iteration, so the filter must be a multiple of 16 long. */
00166     while (i > 0)
00167     {
00168         movdqu_m2r(xmm_coeffs[0], xmm0);
00169         movdqu_m2r(xmm_coeffs[1], xmm2);
00170         movdqu_m2r(xmm_hist[0], xmm1);
00171         movdqu_m2r(xmm_hist[1], xmm3);
00172         xmm_coeffs += 2;
00173         xmm_hist += 2;
00174         pmaddwd_r2r(xmm1, xmm0);
00175         pmaddwd_r2r(xmm3, xmm2);
00176         paddd_r2r(xmm0, xmm4);
00177         paddd_r2r(xmm2, xmm4);
00178         i -= 16;
00179     }
00180     movdqa_r2r(xmm4, xmm0);
00181     psrldq_i2r(8, xmm0);
00182     paddd_r2r(xmm0, xmm4);
00183     movdqa_r2r(xmm4, xmm0);
00184     psrldq_i2r(4, xmm0);
00185     paddd_r2r(xmm0, xmm4);
00186     movd_r2m(xmm4, y);
00187 #else
00188     int offset1;
00189     int offset2;
00190 
00191     fir->history[fir->curr_pos] = sample;
00192 
00193     offset2 = fir->curr_pos;
00194     offset1 = fir->taps - offset2;
00195     y = 0;
00196     for (i = fir->taps - 1;  i >= offset1;  i--)
00197         y += fir->coeffs[i]*fir->history[i - offset1];
00198     for (  ;  i >= 0;  i--)
00199         y += fir->coeffs[i]*fir->history[i + offset2];
00200 #endif
00201     if (fir->curr_pos <= 0)
00202         fir->curr_pos = fir->taps;
00203     fir->curr_pos--;
00204     return (int16_t) (y >> 15);
00205 }
00206 /*- End of function --------------------------------------------------------*/
00207 
00208 static __inline__ const int16_t *fir32_create(fir32_state_t *fir,
00209                                               const int32_t *coeffs,
00210                                               int taps)
00211 {
00212     fir->taps = taps;
00213     fir->curr_pos = taps - 1;
00214     fir->coeffs = coeffs;
00215     fir->history = (int16_t *) malloc(taps*sizeof(int16_t));
00216     if (fir->history)
00217         memset(fir->history, '\0', taps*sizeof(int16_t));
00218     return fir->history;
00219 }
00220 /*- End of function --------------------------------------------------------*/
00221 
00222 static __inline__ void fir32_flush(fir32_state_t *fir)
00223 {
00224     memset(fir->history, 0, fir->taps*sizeof(int16_t));
00225 }
00226 /*- End of function --------------------------------------------------------*/
00227 
00228 static __inline__ void fir32_free(fir32_state_t *fir)
00229 {
00230     free(fir->history);
00231 }
00232 /*- End of function --------------------------------------------------------*/
00233 
00234 static __inline__ int16_t fir32(fir32_state_t *fir, int16_t sample)
00235 {
00236     int i;
00237     int32_t y;
00238     int offset1;
00239     int offset2;
00240 
00241     fir->history[fir->curr_pos] = sample;
00242     offset2 = fir->curr_pos;
00243     offset1 = fir->taps - offset2;
00244     y = 0;
00245     for (i = fir->taps - 1;  i >= offset1;  i--)
00246         y += fir->coeffs[i]*fir->history[i - offset1];
00247     for (  ;  i >= 0;  i--)
00248         y += fir->coeffs[i]*fir->history[i + offset2];
00249     if (fir->curr_pos <= 0)
00250         fir->curr_pos = fir->taps;
00251     fir->curr_pos--;
00252     return (int16_t) (y >> 15);
00253 }
00254 /*- End of function --------------------------------------------------------*/
00255 
00256 static __inline__ const float *fir_float_create(fir_float_state_t *fir,
00257                                                 const float *coeffs,
00258                                                 int taps)
00259 {
00260     fir->taps = taps;
00261     fir->curr_pos = taps - 1;
00262     fir->coeffs = coeffs;
00263     fir->history = (float *) malloc(taps*sizeof(float));
00264     if (fir->history)
00265         memset(fir->history, '\0', taps*sizeof(float));
00266     return fir->history;
00267 }
00268 /*- End of function --------------------------------------------------------*/
00269     
00270 static __inline__ void fir_float_free(fir_float_state_t *fir)
00271 {
00272     free(fir->history);
00273 }
00274 /*- End of function --------------------------------------------------------*/
00275 
00276 static __inline__ int16_t fir_float(fir_float_state_t *fir, int16_t sample)
00277 {
00278     int i;
00279     float y;
00280     int offset1;
00281     int offset2;
00282 
00283     fir->history[fir->curr_pos] = sample;
00284 
00285     offset2 = fir->curr_pos;
00286     offset1 = fir->taps - offset2;
00287     y = 0;
00288     for (i = fir->taps - 1;  i >= offset1;  i--)
00289         y += fir->coeffs[i]*fir->history[i - offset1];
00290     for (  ;  i >= 0;  i--)
00291         y += fir->coeffs[i]*fir->history[i + offset2];
00292     if (fir->curr_pos <= 0)
00293         fir->curr_pos = fir->taps;
00294     fir->curr_pos--;
00295     return  (int16_t) y;
00296 }
00297 /*- End of function --------------------------------------------------------*/
00298 
00299 #if defined(__cplusplus)
00300 }
00301 #endif
00302 
00303 #endif
00304 /*- End of file ------------------------------------------------------------*/