time_scale.h

00001 /*
00002  * SpanDSP - a series of DSP components for telephony
00003  *
00004  * time_scale.h - Time scaling for linear speech data
00005  *
00006  * Written by Steve Underwood <steveu@coppice.org>
00007  *
00008  * Copyright (C) 2004 Steve Underwood
00009  *
00010  * All rights reserved.
00011  *
00012  * This program is free software; you can redistribute it and/or modify
00013  * it under the terms of the GNU Lesser General Public License version 2.1,
00014  * as published by the Free Software Foundation.
00015  *
00016  * This program is distributed in the hope that it will be useful,
00017  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019  * GNU Lesser General Public License for more details.
00020  *
00021  * You should have received a copy of the GNU Lesser General Public
00022  * License along with this program; if not, write to the Free Software
00023  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00024  *
00025  * $Id: time_scale.h,v 1.15 2008/04/17 14:27:01 steveu Exp $
00026  */
00027 
00028 #if !defined(_SPANDSP_TIME_SCALE_H_)
00029 #define _SPANDSP_TIME_SCALE_H_
00030 
00031 /*! \page time_scale_page Time scaling speech
00032 \section time_scale_page_sec_1 What does it do?
00033 The time scaling module allows speech files to be played back at a
00034 different speed, from the speed at which they were recorded. If this
00035 were done by simply speeding up or slowing down replay, the pitch of
00036 the voice would change, and sound very odd. This modules keeps the pitch
00037 of the voice normal.
00038 
00039 \section time_scale_page_sec_2 How does it work?
00040 The time scaling module is based on the Pointer Interval Controlled
00041 OverLap and Add (PICOLA) method, developed by Morita Naotaka.
00042 Mikio Ikeda has an excellent web page on this subject at
00043 http://keizai.yokkaichi-u.ac.jp/~ikeda/research/picola.html
00044 There is also working code there. This implementation uses
00045 exactly the same algorithms, but the code is a complete rewrite.
00046 Mikio's code batch processes files. This version works incrementally
00047 on streams, and allows multiple streams to be processed concurrently.
00048 */
00049 
00050 #define TIME_SCALE_MIN_PITCH    60
00051 #define TIME_SCALE_MAX_PITCH    250
00052 #define TIME_SCALE_BUF_LEN      (2*SAMPLE_RATE/TIME_SCALE_MIN_PITCH)
00053 
00054 /*! Audio time scaling descriptor. */
00055 typedef struct
00056 {
00057     double rate;
00058     double rcomp;
00059     double rate_nudge;
00060     int fill;
00061     int lcp;
00062     int16_t buf[TIME_SCALE_BUF_LEN];
00063 } time_scale_state_t;
00064 
00065 #if defined(__cplusplus)
00066 extern "C"
00067 {
00068 #endif
00069 
00070 /*! Initialise a time scale context. This must be called before the first
00071     use of the context, to initialise its contents.
00072     \brief Initialise a time scale context.
00073     \param s The time scale context.
00074     \param rate The ratio between the output speed and the input speed.
00075     \return A pointer to the context, or NULL if there was a problem. */
00076 time_scale_state_t *time_scale_init(time_scale_state_t *s, float rate);
00077 
00078 /*! \brief Free a time scale context.
00079     \param s The time scale context.
00080     \return 0 for OK, else -1. */
00081 int time_scale_free(time_scale_state_t *s);
00082 
00083 /*! Change the time scale rate.
00084     \brief Change the time scale rate.
00085     \param s The time scale context.
00086     \param rate The ratio between the output speed and the input speed.
00087     \return 0 if changed OK, else -1. */
00088 int time_scale_rate(time_scale_state_t *s, float rate);
00089 
00090 /*! Time scale a chunk of audio samples.
00091     \brief Time scale a chunk of audio samples.
00092     \param s The time sclae context.
00093     \param out The output audio sample buffer.
00094     \param in The input audio sample buffer.
00095     \param len The number of input samples.
00096     \return The number of output samples.
00097 */
00098 int time_scale(time_scale_state_t *s, int16_t out[], int16_t in[], int len);
00099 
00100 #if defined(__cplusplus)
00101 }
00102 #endif
00103 
00104 #endif
00105 /*- End of file ------------------------------------------------------------*/

Generated on Mon Jul 28 18:29:34 2008 for libspandsp by  doxygen 1.5.1