00001 /* 00002 * SpanDSP - a series of DSP components for telephony 00003 * 00004 * plc.h 00005 * 00006 * Written by Steve Underwood <steveu@coppice.org> 00007 * 00008 * Copyright (C) 2004 Steve Underwood 00009 * 00010 * All rights reserved. 00011 * 00012 * This program is free software; you can redistribute it and/or modify 00013 * it under the terms of the GNU Lesser General Public License version 2.1, 00014 * as published by the Free Software Foundation. 00015 * 00016 * This program is distributed in the hope that it will be useful, 00017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00019 * GNU Lesser General Public License for more details. 00020 * 00021 * You should have received a copy of the GNU Lesser General Public 00022 * License along with this program; if not, write to the Free Software 00023 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00024 * 00025 * $Id: plc.h,v 1.18 2008/04/17 14:27:00 steveu Exp $ 00026 */ 00027 00028 /*! \file */ 00029 00030 #if !defined(_SPANDSP_PLC_H_) 00031 #define _SPANDSP_PLC_H_ 00032 00033 /*! \page plc_page Packet loss concealment 00034 \section plc_page_sec_1 What does it do? 00035 The packet loss concealment module provides a synthetic fill-in signal, to minimise 00036 the audible effect of lost packets in VoIP applications. It is not tied to any 00037 particular codec, and could be used with almost any codec which does not 00038 specify its own procedure for packet loss concealment. 00039 00040 Where a codec specific concealment procedure exists, that algorithm is usually built 00041 around knowledge of the characteristics of the particular codec. It will, therefore, 00042 generally give better results for that particular codec than this generic concealer will. 00043 00044 The PLC code implements an algorithm similar to the one described in Appendix 1 of G.711. 00045 However, the G.711 algorithm is optimised for 10ms packets. Few people use such small 00046 packets. 20ms is a much more common value, and longer packets are also quite common. The 00047 algorithm has been adjusted with this in mind. Also, the G.711 approach causes an 00048 algorithmic delay, and requires significant buffer manipulation when there is no packet 00049 loss. The algorithm used here avoids this. It causes no delay, and achieves comparable 00050 quality with normal speech. 00051 00052 Note that both this algorithm, and the one in G.711 are optimised for speech. For most kinds 00053 of music a much slower decay on bursts of lost packets give better results. 00054 00055 \section plc_page_sec_2 How does it work? 00056 While good packets are being received, the plc_rx() routine keeps a record of the trailing 00057 section of the known speech signal. If a packet is missed, plc_fillin() is called to produce 00058 a synthetic replacement for the real speech signal. The average mean difference function 00059 (AMDF) is applied to the last known good signal, to determine its effective pitch. 00060 Based on this, the last pitch period of signal is saved. Essentially, this cycle of speech 00061 will be repeated over and over until the real speech resumes. However, several refinements 00062 are needed to obtain smooth pleasant sounding results. 00063 00064 - The two ends of the stored cycle of speech will not always fit together smoothly. This can 00065 cause roughness, or even clicks, at the joins between cycles. To soften this, the 00066 1/4 pitch period of real speech preceeding the cycle to be repeated is blended with the last 00067 1/4 pitch period of the cycle to be repeated, using an overlap-add (OLA) technique (i.e. 00068 in total, the last 5/4 pitch periods of real speech are used). 00069 00070 - The start of the synthetic speech will not always fit together smoothly with the tail of 00071 real speech passed on before the erasure was identified. Ideally, we would like to modify 00072 the last 1/4 pitch period of the real speech, to blend it into the synthetic speech. However, 00073 it is too late for that. We could have delayed the real speech a little, but that would 00074 require more buffer manipulation, and hurt the efficiency of the no-lost-packets case 00075 (which we hope is the dominant case). Instead we use a degenerate form of OLA to modify 00076 the start of the synthetic data. The last 1/4 pitch period of real speech is time reversed, 00077 and OLA is used to blend it with the first 1/4 pitch period of synthetic speech. The result 00078 seems quite acceptable. 00079 00080 - As we progress into the erasure, the chances of the synthetic signal being anything like 00081 correct steadily fall. Therefore, the volume of the synthesized signal is made to decay 00082 linearly, such that after 50ms of missing audio it is reduced to silence. 00083 00084 - When real speech resumes, an extra 1/4 pitch period of synthetic speech is blended with the 00085 start of the real speech. If the erasure is small, this smoothes the transition. If the erasure 00086 is long, and the synthetic signal has faded to zero, the blending softens the start up of the 00087 real signal, avoiding a kind of "click" or "pop" effect that might occur with a sudden onset. 00088 00089 \section plc_page_sec_3 How do I use it? 00090 Before audio is processed, call plc_init() to create an instance of the packet loss 00091 concealer. For each received audio packet that is acceptable (i.e. not including those being 00092 dropped for being too late) call plc_rx() to record the content of the packet. Note this may 00093 modify the packet a little after a period of packet loss, to blend real synthetic data smoothly. 00094 When a real packet is not available in time, call plc_fillin() to create a sythetic substitute. 00095 That's it! 00096 */ 00097 00098 /*! Minimum allowed pitch (66 Hz) */ 00099 #define PLC_PITCH_MIN 120 00100 /*! Maximum allowed pitch (200 Hz) */ 00101 #define PLC_PITCH_MAX 40 00102 /*! Maximum pitch OLA window */ 00103 #define PLC_PITCH_OVERLAP_MAX (PLC_PITCH_MIN >> 2) 00104 /*! The length over which the AMDF function looks for similarity (20 ms) */ 00105 #define CORRELATION_SPAN 160 00106 /*! History buffer length. The buffer much also be at leat 1.25 times 00107 PLC_PITCH_MIN, but that is much smaller than the buffer needs to be for 00108 the pitch assessment. */ 00109 #define PLC_HISTORY_LEN (CORRELATION_SPAN + PLC_PITCH_MIN) 00110 00111 /*! 00112 The generic packet loss concealer context. 00113 */ 00114 typedef struct 00115 { 00116 /*! Consecutive erased samples */ 00117 int missing_samples; 00118 /*! Current offset into pitch period */ 00119 int pitch_offset; 00120 /*! Pitch estimate */ 00121 int pitch; 00122 /*! Buffer for a cycle of speech */ 00123 float pitchbuf[PLC_PITCH_MIN]; 00124 /*! History buffer */ 00125 int16_t history[PLC_HISTORY_LEN]; 00126 /*! Current pointer into the history buffer */ 00127 int buf_ptr; 00128 } plc_state_t; 00129 00130 00131 #if defined(__cplusplus) 00132 extern "C" 00133 { 00134 #endif 00135 00136 /*! Process a block of received audio samples for PLC. 00137 \brief Process a block of received audio samples for PLC. 00138 \param s The packet loss concealer context. 00139 \param amp The audio sample buffer. 00140 \param len The number of samples in the buffer. 00141 \return The number of samples in the buffer. */ 00142 int plc_rx(plc_state_t *s, int16_t amp[], int len); 00143 00144 /*! Fill-in a block of missing audio samples. 00145 \brief Fill-in a block of missing audio samples. 00146 \param s The packet loss concealer context. 00147 \param amp The audio sample buffer. 00148 \param len The number of samples to be synthesised. 00149 \return The number of samples synthesized. */ 00150 int plc_fillin(plc_state_t *s, int16_t amp[], int len); 00151 00152 /*! Initialise a packet loss concealer context. 00153 \brief Initialise a PLC context. 00154 \param s The packet loss concealer context. 00155 \return A pointer to the the packet loss concealer context. */ 00156 plc_state_t *plc_init(plc_state_t *s); 00157 00158 /*! Free a packet loss concealer context. 00159 \param s The packet loss concealer context. 00160 \return 0 for OK. */ 00161 int plc_free(plc_state_t *s); 00162 00163 #if defined(__cplusplus) 00164 } 00165 #endif 00166 00167 #endif 00168 /*- End of file ------------------------------------------------------------*/