00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. July 2011 00005 * $Revision: V1.0.10 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_biquad_cascade_df1_32x64_q31.c 00009 * 00010 * Description: High precision Q31 Biquad cascade filter processing function 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Version 1.0.10 2011/7/15 00015 * Big Endian support added and Merged M0 and M3/M4 Source code. 00016 * 00017 * Version 1.0.3 2010/11/29 00018 * Re-organized the CMSIS folders and updated documentation. 00019 * 00020 * Version 1.0.2 2010/11/11 00021 * Documentation updated. 00022 * 00023 * Version 1.0.1 2010/10/05 00024 * Production release and review comments incorporated. 00025 * 00026 * Version 1.0.0 2010/09/20 00027 * Production release and review comments incorporated. 00028 * 00029 * Version 0.0.7 2010/06/10 00030 * Misra-C changes done 00031 * -------------------------------------------------------------------- */ 00032 00033 #include "arm_math.h" 00034 00176 void arm_biquad_cas_df1_32x64_q31( 00177 const arm_biquad_cas_df1_32x64_ins_q31 * S, 00178 q31_t * pSrc, 00179 q31_t * pDst, 00180 uint32_t blockSize) 00181 { 00182 q31_t *pIn = pSrc; /* input pointer initialization */ 00183 q31_t *pOut = pDst; /* output pointer initialization */ 00184 q63_t *pState = S->pState; /* state pointer initialization */ 00185 q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */ 00186 q63_t acc; /* accumulator */ 00187 q63_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */ 00188 q31_t b0, b1, b2, a1, a2; /* Filter coefficients */ 00189 q63_t Xn; /* temporary input */ 00190 int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */ 00191 uint32_t sample, stage = S->numStages; /* loop counters */ 00192 00193 00194 #ifndef ARM_MATH_CM0 00195 00196 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00197 00198 do 00199 { 00200 /* Reading the coefficients */ 00201 b0 = *pCoeffs++; 00202 b1 = *pCoeffs++; 00203 b2 = *pCoeffs++; 00204 a1 = *pCoeffs++; 00205 a2 = *pCoeffs++; 00206 00207 /* Reading the state values */ 00208 Xn1 = pState[0]; 00209 Xn2 = pState[1]; 00210 Yn1 = pState[2]; 00211 Yn2 = pState[3]; 00212 00213 /* Apply loop unrolling and compute 4 output values simultaneously. */ 00214 /* The variable acc hold output value that is being computed and 00215 * stored in the destination buffer 00216 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00217 */ 00218 00219 sample = blockSize >> 2u; 00220 00221 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00222 ** a second loop below computes the remaining 1 to 3 samples. */ 00223 while(sample > 0u) 00224 { 00225 /* Read the input */ 00226 Xn = *pIn++; 00227 00228 /* The value is shifted to the MSB to perform 32x64 multiplication */ 00229 Xn = Xn << 32; 00230 00231 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00232 00233 /* acc = b0 * x[n] */ 00234 acc = mult32x64(Xn, b0); 00235 /* acc += b1 * x[n-1] */ 00236 acc += mult32x64(Xn1, b1); 00237 /* acc += b[2] * x[n-2] */ 00238 acc += mult32x64(Xn2, b2); 00239 /* acc += a1 * y[n-1] */ 00240 acc += mult32x64(Yn1, a1); 00241 /* acc += a2 * y[n-2] */ 00242 acc += mult32x64(Yn2, a2); 00243 00244 /* The result is converted to 1.63 , Yn2 variable is reused */ 00245 Yn2 = acc << shift; 00246 00247 /* Store the output in the destination buffer in 1.31 format. */ 00248 *pOut++ = (q31_t) (acc >> (32 - shift)); 00249 00250 /* Read the second input into Xn2, to reuse the value */ 00251 Xn2 = *pIn++; 00252 00253 /* The value is shifted to the MSB to perform 32x64 multiplication */ 00254 Xn2 = Xn2 << 32; 00255 00256 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00257 00258 /* acc = b0 * x[n] */ 00259 acc = mult32x64(Xn2, b0); 00260 /* acc += b1 * x[n-1] */ 00261 acc += mult32x64(Xn, b1); 00262 /* acc += b[2] * x[n-2] */ 00263 acc += mult32x64(Xn1, b2); 00264 /* acc += a1 * y[n-1] */ 00265 acc += mult32x64(Yn2, a1); 00266 /* acc += a2 * y[n-2] */ 00267 acc += mult32x64(Yn1, a2); 00268 00269 /* The result is converted to 1.63, Yn1 variable is reused */ 00270 Yn1 = acc << shift; 00271 00272 /* The result is converted to 1.31 */ 00273 /* Store the output in the destination buffer. */ 00274 *pOut++ = (q31_t) (acc >> (32 - shift)); 00275 00276 /* Read the third input into Xn1, to reuse the value */ 00277 Xn1 = *pIn++; 00278 00279 /* The value is shifted to the MSB to perform 32x64 multiplication */ 00280 Xn1 = Xn1 << 32; 00281 00282 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00283 /* acc = b0 * x[n] */ 00284 acc = mult32x64(Xn1, b0); 00285 /* acc += b1 * x[n-1] */ 00286 acc += mult32x64(Xn2, b1); 00287 /* acc += b[2] * x[n-2] */ 00288 acc += mult32x64(Xn, b2); 00289 /* acc += a1 * y[n-1] */ 00290 acc += mult32x64(Yn1, a1); 00291 /* acc += a2 * y[n-2] */ 00292 acc += mult32x64(Yn2, a2); 00293 00294 /* The result is converted to 1.63, Yn2 variable is reused */ 00295 Yn2 = acc << shift; 00296 00297 /* Store the output in the destination buffer in 1.31 format. */ 00298 *pOut++ = (q31_t) (acc >> (32 - shift)); 00299 00300 /* Read the fourth input into Xn, to reuse the value */ 00301 Xn = *pIn++; 00302 00303 /* The value is shifted to the MSB to perform 32x64 multiplication */ 00304 Xn = Xn << 32; 00305 00306 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00307 /* acc = b0 * x[n] */ 00308 acc = mult32x64(Xn, b0); 00309 /* acc += b1 * x[n-1] */ 00310 acc += mult32x64(Xn1, b1); 00311 /* acc += b[2] * x[n-2] */ 00312 acc += mult32x64(Xn2, b2); 00313 /* acc += a1 * y[n-1] */ 00314 acc += mult32x64(Yn2, a1); 00315 /* acc += a2 * y[n-2] */ 00316 acc += mult32x64(Yn1, a2); 00317 00318 /* The result is converted to 1.63, Yn1 variable is reused */ 00319 Yn1 = acc << shift; 00320 00321 /* Every time after the output is computed state should be updated. */ 00322 /* The states should be updated as: */ 00323 /* Xn2 = Xn1 */ 00324 /* Xn1 = Xn */ 00325 /* Yn2 = Yn1 */ 00326 /* Yn1 = acc */ 00327 Xn2 = Xn1; 00328 Xn1 = Xn; 00329 00330 /* Store the output in the destination buffer in 1.31 format. */ 00331 *pOut++ = (q31_t) (acc >> (32 - shift)); 00332 00333 /* decrement the loop counter */ 00334 sample--; 00335 } 00336 00337 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00338 ** No loop unrolling is used. */ 00339 sample = (blockSize & 0x3u); 00340 00341 while(sample > 0u) 00342 { 00343 /* Read the input */ 00344 Xn = *pIn++; 00345 00346 /* The value is shifted to the MSB to perform 32x64 multiplication */ 00347 Xn = Xn << 32; 00348 00349 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00350 /* acc = b0 * x[n] */ 00351 acc = mult32x64(Xn, b0); 00352 /* acc += b1 * x[n-1] */ 00353 acc += mult32x64(Xn1, b1); 00354 /* acc += b[2] * x[n-2] */ 00355 acc += mult32x64(Xn2, b2); 00356 /* acc += a1 * y[n-1] */ 00357 acc += mult32x64(Yn1, a1); 00358 /* acc += a2 * y[n-2] */ 00359 acc += mult32x64(Yn2, a2); 00360 00361 /* Every time after the output is computed state should be updated. */ 00362 /* The states should be updated as: */ 00363 /* Xn2 = Xn1 */ 00364 /* Xn1 = Xn */ 00365 /* Yn2 = Yn1 */ 00366 /* Yn1 = acc */ 00367 Xn2 = Xn1; 00368 Xn1 = Xn; 00369 Yn2 = Yn1; 00370 Yn1 = acc << shift; 00371 00372 /* Store the output in the destination buffer in 1.31 format. */ 00373 *pOut++ = (q31_t) (acc >> (32 - shift)); 00374 00375 /* decrement the loop counter */ 00376 sample--; 00377 } 00378 00379 /* The first stage output is given as input to the second stage. */ 00380 pIn = pDst; 00381 00382 /* Reset to destination buffer working pointer */ 00383 pOut = pDst; 00384 00385 /* Store the updated state variables back into the pState array */ 00386 *pState++ = Xn1; 00387 *pState++ = Xn2; 00388 *pState++ = Yn1; 00389 *pState++ = Yn2; 00390 00391 } while(--stage); 00392 00393 #else 00394 00395 /* Run the below code for Cortex-M0 */ 00396 00397 do 00398 { 00399 /* Reading the coefficients */ 00400 b0 = *pCoeffs++; 00401 b1 = *pCoeffs++; 00402 b2 = *pCoeffs++; 00403 a1 = *pCoeffs++; 00404 a2 = *pCoeffs++; 00405 00406 /* Reading the state values */ 00407 Xn1 = pState[0]; 00408 Xn2 = pState[1]; 00409 Yn1 = pState[2]; 00410 Yn2 = pState[3]; 00411 00412 /* The variable acc hold output value that is being computed and 00413 * stored in the destination buffer 00414 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00415 */ 00416 00417 sample = blockSize; 00418 00419 while(sample > 0u) 00420 { 00421 /* Read the input */ 00422 Xn = *pIn++; 00423 00424 /* The value is shifted to the MSB to perform 32x64 multiplication */ 00425 Xn = Xn << 32; 00426 00427 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00428 /* acc = b0 * x[n] */ 00429 acc = mult32x64(Xn, b0); 00430 /* acc += b1 * x[n-1] */ 00431 acc += mult32x64(Xn1, b1); 00432 /* acc += b[2] * x[n-2] */ 00433 acc += mult32x64(Xn2, b2); 00434 /* acc += a1 * y[n-1] */ 00435 acc += mult32x64(Yn1, a1); 00436 /* acc += a2 * y[n-2] */ 00437 acc += mult32x64(Yn2, a2); 00438 00439 /* Every time after the output is computed state should be updated. */ 00440 /* The states should be updated as: */ 00441 /* Xn2 = Xn1 */ 00442 /* Xn1 = Xn */ 00443 /* Yn2 = Yn1 */ 00444 /* Yn1 = acc */ 00445 Xn2 = Xn1; 00446 Xn1 = Xn; 00447 Yn2 = Yn1; 00448 Yn1 = acc << shift; 00449 00450 /* Store the output in the destination buffer in 1.31 format. */ 00451 *pOut++ = (q31_t) (acc >> (32 - shift)); 00452 00453 /* decrement the loop counter */ 00454 sample--; 00455 } 00456 00457 /* The first stage output is given as input to the second stage. */ 00458 pIn = pDst; 00459 00460 /* Reset to destination buffer working pointer */ 00461 pOut = pDst; 00462 00463 /* Store the updated state variables back into the pState array */ 00464 *pState++ = Xn1; 00465 *pState++ = Xn2; 00466 *pState++ = Yn1; 00467 *pState++ = Yn2; 00468 00469 } while(--stage); 00470 00471 #endif /* #ifndef ARM_MATH_CM0 */ 00472 } 00473