time_scale.h

00001 /*
00002  * SpanDSP - a series of DSP components for telephony
00003  *
00004  * time_scale.h - Time scaling for linear speech data
00005  *
00006  * Written by Steve Underwood <steveu@coppice.org>
00007  *
00008  * Copyright (C) 2004 Steve Underwood
00009  *
00010  * All rights reserved.
00011  *
00012  * This program is free software; you can redistribute it and/or modify
00013  * it under the terms of the GNU Lesser General Public License version 2.1,
00014  * as published by the Free Software Foundation.
00015  *
00016  * This program is distributed in the hope that it will be useful,
00017  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019  * GNU Lesser General Public License for more details.
00020  *
00021  * You should have received a copy of the GNU Lesser General Public
00022  * License along with this program; if not, write to the Free Software
00023  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00024  *
00025  * $Id: time_scale.h,v 1.16 2008/07/28 15:14:30 steveu Exp $
00026  */
00027 
00028 #if !defined(_SPANDSP_TIME_SCALE_H_)
00029 #define _SPANDSP_TIME_SCALE_H_
00030 
00031 /*! \page time_scale_page Time scaling speech
00032 \section time_scale_page_sec_1 What does it do?
00033 The time scaling module allows speech files to be played back at a
00034 different speed from the speed at which they were recorded. If this
00035 were done by simply speeding up or slowing down replay, the pitch of
00036 the voice would change, and sound very odd. This module keeps the pitch
00037 of the voice at its original level.
00038 
00039 The speed of the voice may be altered over a wide range. However, the practical
00040 useful rates are between about half normal speed and twice normal speed.
00041 
00042 \section time_scale_page_sec_2 How does it work?
00043 The time scaling module is based on the Pointer Interval Controlled
00044 OverLap and Add (PICOLA) method, developed by Morita Naotaka.
00045 Mikio Ikeda has an excellent web page on this subject at
00046 http://keizai.yokkaichi-u.ac.jp/~ikeda/research/picola.html
00047 There is also working code there. This implementation uses
00048 exactly the same algorithms, but the code is a complete rewrite.
00049 Mikio's code batch processes files. This version works incrementally
00050 on streams, and allows multiple streams to be processed concurrently.
00051 
00052 \section time_scale_page_sec_3 How do I used it?
00053 The output buffer must be big enough to hold the maximum number of samples which
00054 could result from the data in the input buffer, which is:
00055 
00056     input_len*playout_rate + sample_rate/TIME_SCALE_MIN_PITCH + 1
00057 */
00058 
00059 #define TIME_SCALE_MAX_SAMPLE_RATE  48000
00060 #define TIME_SCALE_MIN_PITCH        60
00061 #define TIME_SCALE_MAX_PITCH        250
00062 #define TIME_SCALE_BUF_LEN          (2*TIME_SCALE_MAX_SAMPLE_RATE/TIME_SCALE_MIN_PITCH)
00063 
00064 /*! Audio time scaling descriptor. */
00065 typedef struct
00066 {
00067     int sample_rate;
00068     int min_pitch;
00069     int max_pitch;
00070     int buf_len;
00071     float playout_rate;
00072     double rcomp;
00073     double rate_nudge;
00074     int fill;
00075     int lcp;
00076     int16_t buf[TIME_SCALE_BUF_LEN];
00077 } time_scale_state_t;
00078 
00079 #if defined(__cplusplus)
00080 extern "C"
00081 {
00082 #endif
00083 
00084 /*! Initialise a time scale context. This must be called before the first
00085     use of the context, to initialise its contents.
00086     \brief Initialise a time scale context.
00087     \param s The time scale context.
00088     \param sample_rate The sample rate of the signal.
00089     \param playout_rate The ratio between the output speed and the input speed.
00090     \return A pointer to the context, or NULL if there was a problem. */
00091 time_scale_state_t *time_scale_init(time_scale_state_t *s, int sample_rate, float playout_rate);
00092 
00093 /*! \brief Free a time scale context.
00094     \param s The time scale context.
00095     \return 0 for OK, else -1. */
00096 int time_scale_free(time_scale_state_t *s);
00097 
00098 /*! Change the time scale rate.
00099     \brief Change the time scale rate.
00100     \param s The time scale context.
00101     \param playout_rate The ratio between the output speed and the input speed.
00102     \return 0 if changed OK, else -1. */
00103 int time_scale_rate(time_scale_state_t *s, float playout_rate);
00104 
00105 /*! Find the maximum possible samples which could result from scaling the specified
00106     number of input samples, at the current playback rate.
00107     \brief Find the maximum possible output samples.
00108     \param s The time scale context.
00109     \param input_len The number of input samples.
00110     \return The maximum possible output samples. */
00111 int time_scale_max_output_len(time_scale_state_t *s, int input_len);
00112 
00113 /*! Time scale a chunk of audio samples.
00114     \brief Time scale a chunk of audio samples.
00115     \param s The time scale context.
00116     \param out The output audio sample buffer. This must be large enough to accept
00117            the longest possible result from processing the input data. See the
00118            algorithm documentation for how the longest possible result may be calculated.
00119     \param in The input audio sample buffer.
00120     \param len The number of input samples.
00121     \return The number of output samples.
00122 */
00123 int time_scale(time_scale_state_t *s, int16_t out[], int16_t in[], int len);
00124 
00125 #if defined(__cplusplus)
00126 }
00127 #endif
00128 
00129 #endif
00130 /*- End of file ------------------------------------------------------------*/