Camera_driver: initialize project and libraries

This commit is contained in:
Petr Malanik
2022-08-15 18:21:50 +02:00
parent 4da5aefb5a
commit 34b9eaafc2
1280 changed files with 1099270 additions and 0 deletions

View File

@ -0,0 +1,63 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: BasicMathFunctions.c
* Description: Combination of all basic math function source files.
*
* $Date: 18. March 2019
* $Revision: V1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_abs_f32.c"
#include "arm_abs_q15.c"
#include "arm_abs_q31.c"
#include "arm_abs_q7.c"
#include "arm_add_f32.c"
#include "arm_add_q15.c"
#include "arm_add_q31.c"
#include "arm_add_q7.c"
#include "arm_dot_prod_f32.c"
#include "arm_dot_prod_q15.c"
#include "arm_dot_prod_q31.c"
#include "arm_dot_prod_q7.c"
#include "arm_mult_f32.c"
#include "arm_mult_q15.c"
#include "arm_mult_q31.c"
#include "arm_mult_q7.c"
#include "arm_negate_f32.c"
#include "arm_negate_q15.c"
#include "arm_negate_q31.c"
#include "arm_negate_q7.c"
#include "arm_offset_f32.c"
#include "arm_offset_q15.c"
#include "arm_offset_q31.c"
#include "arm_offset_q7.c"
#include "arm_scale_f32.c"
#include "arm_scale_q15.c"
#include "arm_scale_q31.c"
#include "arm_scale_q7.c"
#include "arm_shift_q15.c"
#include "arm_shift_q31.c"
#include "arm_shift_q7.c"
#include "arm_sub_f32.c"
#include "arm_sub_q15.c"
#include "arm_sub_q31.c"
#include "arm_sub_q7.c"

View File

@ -0,0 +1,16 @@
cmake_minimum_required (VERSION 3.6)
project(CMSISDSPBasicMath)
file(GLOB SRC "./*_*.c")
add_library(CMSISDSPBasicMath STATIC ${SRC})
configdsp(CMSISDSPBasicMath ..)
### Includes
target_include_directories(CMSISDSPBasicMath PUBLIC "${DSP}/../../Include")

View File

@ -0,0 +1,146 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_abs_f32.c
* Description: Floating-point vector absolute value
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include <math.h>
/**
@ingroup groupMath
*/
/**
@defgroup BasicAbs Vector Absolute Value
Computes the absolute value of a vector on an element-by-element basis.
<pre>
pDst[n] = abs(pSrc[n]), 0 <= n < blockSize.
</pre>
The functions support in-place computation allowing the source and
destination pointers to reference the same memory buffer.
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicAbs
@{
*/
/**
@brief Floating-point vector absolute value.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_abs_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute values and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrc);
res = vabsq_f32(vec1);
vst1q_f32(pDst, res);
/* Increment pointers */
pSrc += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute and store result in destination buffer. */
*pDst++ = fabsf(*pSrc++);
*pDst++ = fabsf(*pSrc++);
*pDst++ = fabsf(*pSrc++);
*pDst++ = fabsf(*pSrc++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute and store result in destination buffer. */
*pDst++ = fabsf(*pSrc++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAbs group
*/

View File

@ -0,0 +1,132 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_abs_q15.c
* Description: Q15 vector absolute value
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicAbs
@{
*/
/**
@brief Q15 vector absolute value.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
*/
void arm_abs_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q15_t in; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7fff) and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7fff) and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAbs group
*/

View File

@ -0,0 +1,132 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_abs_q31.c
* Description: Q31 vector absolute value
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicAbs
@{
*/
/**
@brief Q31 vector absolute value.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
*/
void arm_abs_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q31_t in; /* Temporary variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAbs group
*/

View File

@ -0,0 +1,134 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_abs_q7.c
* Description: Q7 vector absolute value
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicAbs
@{
*/
/**
@brief Q7 vector absolute value.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Conditions for optimum performance
Input and output buffers should be aligned by 32-bit
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
*/
void arm_abs_q7(
const q7_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q7_t in; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7f) and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q7_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q7_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q7_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q7_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7f) and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q7_t) __QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAbs group
*/

View File

@ -0,0 +1,145 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_add_f32.c
* Description: Floating-point vector addition
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicAdd Vector Addition
Element-by-element addition of two vectors.
<pre>
pDst[n] = pSrcA[n] + pSrcB[n], 0 <= n < blockSize.
</pre>
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicAdd
@{
*/
/**
@brief Floating-point vector addition.
@param[in] pSrcA points to first input vector
@param[in] pSrcB points to second input vector
@param[out] pDst points to output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_add_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t vec2;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrcA);
vec2 = vld1q_f32(pSrcB);
res = vaddq_f32(vec1, vec2);
vst1q_f32(pDst, res);
/* Increment pointers */
pSrcA += 4;
pSrcB += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and store result in destination buffer. */
*pDst++ = (*pSrcA++) + (*pSrcB++);
*pDst++ = (*pSrcA++) + (*pSrcB++);
*pDst++ = (*pSrcA++) + (*pSrcB++);
*pDst++ = (*pSrcA++) + (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and store result in destination buffer. */
*pDst++ = (*pSrcA++) + (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAdd group
*/

View File

@ -0,0 +1,126 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_add_q15.c
* Description: Q15 vector addition
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicAdd
@{
*/
/**
@brief Q15 vector addition.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
void arm_add_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t inA1, inA2;
q31_t inB1, inB2;
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + B */
#if defined (ARM_MATH_DSP)
/* read 2 times 2 samples at a time from sourceA */
inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
/* read 2 times 2 samples at a time from sourceB */
inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
/* Add and store 2 times 2 samples at a time */
write_q15x2_ia (&pDst, __QADD16(inA1, inB1));
write_q15x2_ia (&pDst, __QADD16(inA2, inB2));
#else
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
*pDst++ = (q15_t) __QADD16(*pSrcA++, *pSrcB++);
#else
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAdd group
*/

View File

@ -0,0 +1,108 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_add_q31.c
* Description: Q31 vector addition
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicAdd
@{
*/
/**
@brief Q31 vector addition.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
*/
void arm_add_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and store result in destination buffer. */
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and store result in destination buffer. */
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAdd group
*/

View File

@ -0,0 +1,109 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_add_q7.c
* Description: Q7 vector addition
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicAdd
@{
*/
/**
@brief Q7 vector addition.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
*/
void arm_add_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + B */
#if defined (ARM_MATH_DSP)
/* Add and store result in destination buffer (4 samples at a time). */
write_q7x4_ia (&pDst, __QADD8 (read_q7x4_ia ((q7_t **) &pSrcA), read_q7x4_ia ((q7_t **) &pSrcB)));
#else
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and store result in destination buffer. */
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ + *pSrcB++, 8);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAdd group
*/

View File

@ -0,0 +1,163 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dot_prod_f32.c
* Description: Floating-point dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicDotProd Vector Dot Product
Computes the dot product of two vectors.
The vectors are multiplied element-by-element and then summed.
<pre>
sum = pSrcA[0]*pSrcB[0] + pSrcA[1]*pSrcB[1] + ... + pSrcA[blockSize-1]*pSrcB[blockSize-1]
</pre>
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicDotProd
@{
*/
/**
@brief Dot product of floating-point vectors.
@param[in] pSrcA points to the first input vector.
@param[in] pSrcB points to the second input vector.
@param[in] blockSize number of samples in each vector.
@param[out] result output result returned here.
@return none
*/
void arm_dot_prod_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * result)
{
uint32_t blkCnt; /* Loop counter */
float32_t sum = 0.0f; /* Temporary return variable */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t vec2;
float32x4_t res;
float32x4_t accum = vdupq_n_f32(0);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
vec1 = vld1q_f32(pSrcA);
vec2 = vld1q_f32(pSrcB);
while (blkCnt > 0U)
{
/* C = A[0]*B[0] + A[1]*B[1] + A[2]*B[2] + ... + A[blockSize-1]*B[blockSize-1] */
/* Calculate dot product and then store the result in a temporary buffer. */
accum = vmlaq_f32(accum, vec1, vec2);
/* Increment pointers */
pSrcA += 4;
pSrcB += 4;
vec1 = vld1q_f32(pSrcA);
vec2 = vld1q_f32(pSrcB);
/* Decrement the loop counter */
blkCnt--;
}
#if __aarch64__
sum = vpadds_f32(vpadd_f32(vget_low_f32(accum), vget_high_f32(accum)));
#else
sum = (vpadd_f32(vget_low_f32(accum), vget_high_f32(accum)))[0] + (vpadd_f32(vget_low_f32(accum), vget_high_f32(accum)))[1];
#endif
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and store result in a temporary buffer. */
sum += (*pSrcA++) * (*pSrcB++);
sum += (*pSrcA++) * (*pSrcB++);
sum += (*pSrcA++) * (*pSrcB++);
sum += (*pSrcA++) * (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and store result in a temporary buffer. */
sum += (*pSrcA++) * (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = sum;
}
/**
@} end of BasicDotProd group
*/

View File

@ -0,0 +1,120 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dot_prod_q15.c
* Description: Q15 dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicDotProd
@{
*/
/**
@brief Dot product of Q15 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in each vector
@param[out] result output result returned here
@return none
@par Scaling and Overflow Behavior
The intermediate multiplications are in 1.15 x 1.15 = 2.30 format and these
results are added to a 64-bit accumulator in 34.30 format.
Nonsaturating additions are used and given that there are 33 guard bits in the accumulator
there is no risk of overflow.
The return result is in 34.30 format.
*/
void arm_dot_prod_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
uint32_t blockSize,
q63_t * result)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
#if defined (ARM_MATH_DSP)
/* Calculate dot product and store result in a temporary buffer. */
sum = __SMLALD(read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB), sum);
sum = __SMLALD(read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB), sum);
#else
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and store result in a temporary buffer. */
//#if defined (ARM_MATH_DSP)
// sum = __SMLALD(*pSrcA++, *pSrcB++, sum);
//#else
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
//#endif
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer in 34.30 format */
*result = sum;
}
/**
@} end of BasicDotProd group
*/

View File

@ -0,0 +1,115 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dot_prod_q31.c
* Description: Q31 dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicDotProd
@{
*/
/**
@brief Dot product of Q31 vectors.
@param[in] pSrcA points to the first input vector.
@param[in] pSrcB points to the second input vector.
@param[in] blockSize number of samples in each vector.
@param[out] result output result returned here.
@return none
@par Scaling and Overflow Behavior
The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these
are truncated to 2.48 format by discarding the lower 14 bits.
The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
There are 15 guard bits in the accumulator and there is no risk of overflow as long as
the length of the vectors is less than 2^16 elements.
The return result is in 16.48 format.
*/
void arm_dot_prod_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
uint32_t blockSize,
q63_t * result)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and store result in a temporary buffer. */
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and store result in a temporary buffer. */
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer in 16.48 format */
*result = sum;
}
/**
@} end of BasicDotProd group
*/

View File

@ -0,0 +1,139 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dot_prod_q7.c
* Description: Q7 dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicDotProd
@{
*/
/**
@brief Dot product of Q7 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in each vector
@param[out] result output result returned here
@return none
@par Scaling and Overflow Behavior
The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these
results are added to an accumulator in 18.14 format.
Nonsaturating additions are used and there is no danger of wrap around as long as
the vectors are less than 2^18 elements long.
The return result is in 18.14 format.
*/
void arm_dot_prod_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
uint32_t blockSize,
q31_t * result)
{
uint32_t blkCnt; /* Loop counter */
q31_t sum = 0; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t input1, input2; /* Temporary variables */
q31_t inA1, inA2, inB1, inB2; /* Temporary variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
#if defined (ARM_MATH_DSP)
/* read 4 samples at a time from sourceA */
input1 = read_q7x4_ia ((q7_t **) &pSrcA);
/* read 4 samples at a time from sourceB */
input2 = read_q7x4_ia ((q7_t **) &pSrcB);
/* extract two q7_t samples to q15_t samples */
inA1 = __SXTB16(__ROR(input1, 8));
/* extract reminaing two samples */
inA2 = __SXTB16(input1);
/* extract two q7_t samples to q15_t samples */
inB1 = __SXTB16(__ROR(input2, 8));
/* extract reminaing two samples */
inB2 = __SXTB16(input2);
/* multiply and accumulate two samples at a time */
sum = __SMLAD(inA1, inB1, sum);
sum = __SMLAD(inA2, inB2, sum);
#else
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and store result in a temporary buffer. */
//#if defined (ARM_MATH_DSP)
// sum = __SMLAD(*pSrcA++, *pSrcB++, sum);
//#else
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
//#endif
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer in 18.14 format */
*result = sum;
}
/**
@} end of BasicDotProd group
*/

View File

@ -0,0 +1,148 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mult_f32.c
* Description: Floating-point vector multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicMult Vector Multiplication
Element-by-element multiplication of two vectors.
<pre>
pDst[n] = pSrcA[n] * pSrcB[n], 0 <= n < blockSize.
</pre>
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicMult
@{
*/
/**
@brief Floating-point vector multiplication.
@param[in] pSrcA points to the first input vector.
@param[in] pSrcB points to the second input vector.
@param[out] pDst points to the output vector.
@param[in] blockSize number of samples in each vector.
@return none
*/
void arm_mult_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t vec2;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply the inputs and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrcA);
vec2 = vld1q_f32(pSrcB);
res = vmulq_f32(vec1, vec2);
vst1q_f32(pDst, res);
/* Increment pointers */
pSrcA += 4;
pSrcB += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply inputs and store result in destination buffer. */
*pDst++ = (*pSrcA++) * (*pSrcB++);
*pDst++ = (*pSrcA++) * (*pSrcB++);
*pDst++ = (*pSrcA++) * (*pSrcB++);
*pDst++ = (*pSrcA++) * (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply input and store result in destination buffer. */
*pDst++ = (*pSrcA++) * (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicMult group
*/

View File

@ -0,0 +1,143 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mult_q15.c
* Description: Q15 vector multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicMult
@{
*/
/**
@brief Q15 vector multiplication
@param[in] pSrcA points to first input vector
@param[in] pSrcB points to second input vector
@param[out] pDst points to output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
void arm_mult_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t inA1, inA2, inB1, inB2; /* Temporary input variables */
q15_t out1, out2, out3, out4; /* Temporary output variables */
q31_t mul1, mul2, mul3, mul4; /* Temporary variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * B */
#if defined (ARM_MATH_DSP)
/* read 2 samples at a time from sourceA */
inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
/* read 2 samples at a time from sourceB */
inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
/* read 2 samples at a time from sourceA */
inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
/* read 2 samples at a time from sourceB */
inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
/* multiply mul = sourceA * sourceB */
mul1 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
mul2 = (q31_t) ((q15_t) (inA1 ) * (q15_t) (inB1 ));
mul3 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB2 >> 16));
mul4 = (q31_t) ((q15_t) (inA2 ) * (q15_t) (inB2 ));
/* saturate result to 16 bit */
out1 = (q15_t) __SSAT(mul1 >> 15, 16);
out2 = (q15_t) __SSAT(mul2 >> 15, 16);
out3 = (q15_t) __SSAT(mul3 >> 15, 16);
out4 = (q15_t) __SSAT(mul4 >> 15, 16);
/* store result to destination */
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pDst, __PKHBT(out2, out1, 16));
write_q15x2_ia (&pDst, __PKHBT(out4, out3, 16));
#else
write_q15x2_ia (&pDst, __PKHBT(out1, out2, 16));
write_q15x2_ia (&pDst, __PKHBT(out3, out4, 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
#else
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply inputs and store result in destination buffer. */
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicMult group
*/

View File

@ -0,0 +1,119 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mult_q31.c
* Description: Q31 vector multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicMult
@{
*/
/**
@brief Q31 vector multiplication.
@param[in] pSrcA points to the first input vector.
@param[in] pSrcB points to the second input vector.
@param[out] pDst points to the output vector.
@param[in] blockSize number of samples in each vector.
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated.
*/
void arm_mult_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q31_t out; /* Temporary output variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply inputs and store result in destination buffer. */
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
out = __SSAT(out, 31);
*pDst++ = out << 1U;
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
out = __SSAT(out, 31);
*pDst++ = out << 1U;
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
out = __SSAT(out, 31);
*pDst++ = out << 1U;
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
out = __SSAT(out, 31);
*pDst++ = out << 1U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply inputs and store result in destination buffer. */
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
out = __SSAT(out, 31);
*pDst++ = out << 1U;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicMult group
*/

View File

@ -0,0 +1,119 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mult_q7.c
* Description: Q7 vector multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicMult
@{
*/
/**
@brief Q7 vector multiplication
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
*/
void arm_mult_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q7_t out1, out2, out3, out4; /* Temporary output variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * B */
#if defined (ARM_MATH_DSP)
/* Multiply inputs and store results in temporary variables */
out1 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
out2 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
out3 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
out4 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
/* Pack and store result in destination buffer (in single write) */
write_q7x4_ia (&pDst, __PACKq7(out1, out2, out3, out4));
#else
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply input and store result in destination buffer. */
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicMult group
*/

View File

@ -0,0 +1,145 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_negate_f32.c
* Description: Negates floating-point vectors
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicNegate Vector Negate
Negates the elements of a vector.
<pre>
pDst[n] = -pSrc[n], 0 <= n < blockSize.
</pre>
The functions support in-place computation allowing the source and
destination pointers to reference the same memory buffer.
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicNegate
@{
*/
/**
@brief Negates the elements of a floating-point vector.
@param[in] pSrc points to input vector.
@param[out] pDst points to output vector.
@param[in] blockSize number of samples in each vector.
@return none
*/
void arm_negate_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
float32x4_t vec1;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrc);
res = vnegq_f32(vec1);
vst1q_f32(pDst, res);
/* Increment pointers */
pSrc += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and store result in destination buffer. */
*pDst++ = -*pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and store result in destination buffer. */
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicNegate group
*/

View File

@ -0,0 +1,126 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_negate_q15.c
* Description: Negates Q15 vectors
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicNegate
@{
*/
/**
@brief Negates the elements of a Q15 vector.
@param[in] pSrc points to the input vector.
@param[out] pDst points to the output vector.
@param[in] blockSize number of samples in each vector.
@return none
@par Conditions for optimum performance
Input and output buffers should be aligned by 32-bit
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q15 value -1 (0x8000) is saturated to the maximum allowable positive value 0x7FFF.
*/
void arm_negate_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q15_t in; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t in1; /* Temporary input variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = -A */
#if defined (ARM_MATH_DSP)
/* Negate and store result in destination buffer (2 samples at a time). */
in1 = read_q15x2_ia ((q15_t **) &pSrc);
write_q15x2_ia (&pDst, __QSUB16(0, in1));
in1 = read_q15x2_ia ((q15_t **) &pSrc);
write_q15x2_ia (&pDst, __QSUB16(0, in1));
#else
in = *pSrc++;
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
in = *pSrc++;
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
in = *pSrc++;
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
in = *pSrc++;
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and store result in destination buffer. */
in = *pSrc++;
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicNegate group
*/

View File

@ -0,0 +1,132 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_negate_q31.c
* Description: Negates Q31 vectors
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicNegate
@{
*/
/**
@brief Negates the elements of a Q31 vector.
@param[in] pSrc points to the input vector.
@param[out] pDst points to the output vector.
@param[in] blockSize number of samples in each vector.
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q31 value -1 (0x80000000) is saturated to the maximum allowable positive value 0x7FFFFFFF.
*/
void arm_negate_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q31_t in; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicNegate group
*/

View File

@ -0,0 +1,126 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_negate_q7.c
* Description: Negates Q7 vectors
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicNegate
@{
*/
/**
@brief Negates the elements of a Q7 vector.
@param[in] pSrc points to the input vector.
@param[out] pDst points to the output vector.
@param[in] blockSize number of samples in each vector.
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q7 value -1 (0x80) is saturated to the maximum allowable positive value 0x7F.
*/
void arm_negate_q7(
const q7_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q7_t in; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t in1; /* Temporary input variable */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = -A */
#if defined (ARM_MATH_DSP)
/* Negate and store result in destination buffer (4 samples at a time). */
in1 = read_q7x4_ia ((q7_t **) &pSrc);
write_q7x4_ia (&pDst, __QSUB8(0, in1));
#else
in = *pSrc++;
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
in = *pSrc++;
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
in = *pSrc++;
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
in = *pSrc++;
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (q7_t) __QSUB(0, in);
#else
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicNegate group
*/

View File

@ -0,0 +1,147 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_offset_f32.c
* Description: Floating-point vector offset
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicOffset Vector Offset
Adds a constant offset to each element of a vector.
<pre>
pDst[n] = pSrc[n] + offset, 0 <= n < blockSize.
</pre>
The functions support in-place computation allowing the source and
destination pointers to reference the same memory buffer.
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicOffset
@{
*/
/**
@brief Adds a constant offset to a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] offset is the offset to be added
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_offset_f32(
const float32_t * pSrc,
float32_t offset,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
float32x4_t vec1;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrc);
res = vaddq_f32(vec1,vdupq_n_f32(offset));
vst1q_f32(pDst, res);
/* Increment pointers */
pSrc += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and store result in destination buffer. */
*pDst++ = (*pSrc++) + offset;
*pDst++ = (*pSrc++) + offset;
*pDst++ = (*pSrc++) + offset;
*pDst++ = (*pSrc++) + offset;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and store result in destination buffer. */
*pDst++ = (*pSrc++) + offset;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicOffset group
*/

View File

@ -0,0 +1,121 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_offset_q15.c
* Description: Q15 vector offset
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicOffset
@{
*/
/**
@brief Adds a constant offset to a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] offset is the offset to be added
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
void arm_offset_q15(
const q15_t * pSrc,
q15_t offset,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t offset_packed; /* Offset packed to 32 bit */
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
offset_packed = __PKHBT(offset, offset, 16);
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + offset */
#if defined (ARM_MATH_DSP)
/* Add offset and store result in destination buffer (2 samples at a time). */
write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia ((q15_t **) &pSrc), offset_packed));
write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia ((q15_t **) &pSrc), offset_packed));
#else
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
*pDst++ = (q15_t) __QADD16(*pSrc++, offset);
#else
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicOffset group
*/

View File

@ -0,0 +1,128 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_offset_q31.c
* Description: Q31 vector offset
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicOffset
@{
*/
/**
@brief Adds a constant offset to a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] offset is the offset to be added
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
*/
void arm_offset_q31(
const q31_t * pSrc,
q31_t offset,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
*pDst++ = __QADD(*pSrc++, offset);
#else
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
#endif
#if defined (ARM_MATH_DSP)
*pDst++ = __QADD(*pSrc++, offset);
#else
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
#endif
#if defined (ARM_MATH_DSP)
*pDst++ = __QADD(*pSrc++, offset);
#else
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
#endif
#if defined (ARM_MATH_DSP)
*pDst++ = __QADD(*pSrc++, offset);
#else
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
*pDst++ = __QADD(*pSrc++, offset);
#else
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicOffset group
*/

View File

@ -0,0 +1,116 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_offset_q7.c
* Description: Q7 vector offset
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicOffset
@{
*/
/**
@brief Adds a constant offset to a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] offset is the offset to be added
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
*/
void arm_offset_q7(
const q7_t * pSrc,
q7_t offset,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t offset_packed; /* Offset packed to 32 bit */
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
offset_packed = __PACKq7(offset, offset, offset, offset);
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + offset */
#if defined (ARM_MATH_DSP)
/* Add offset and store result in destination buffer (4 samples at a time). */
write_q7x4_ia (&pDst, __QADD8(read_q7x4_ia ((q7_t **) &pSrc), offset_packed));
#else
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and store result in destination buffer. */
*pDst++ = (q7_t) __SSAT((q15_t) *pSrc++ + offset, 8);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicOffset group
*/

View File

@ -0,0 +1,159 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_scale_f32.c
* Description: Multiplies a floating-point vector by a scalar
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicScale Vector Scale
Multiply a vector by a scalar value. For floating-point data, the algorithm used is:
<pre>
pDst[n] = pSrc[n] * scale, 0 <= n < blockSize.
</pre>
In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
The shift allows the gain of the scaling operation to exceed 1.0.
The algorithm used with fixed-point data is:
<pre>
pDst[n] = (pSrc[n] * scaleFract) << shift, 0 <= n < blockSize.
</pre>
The overall scale factor applied to the fixed-point data is
<pre>
scale = scaleFract * 2^shift.
</pre>
The functions support in-place computation allowing the source and destination
pointers to reference the same memory buffer.
*/
/**
@addtogroup BasicScale
@{
*/
/**
@brief Multiplies a floating-point vector by a scalar.
@param[in] pSrc points to the input vector
@param[in] scale scale factor to be applied
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_scale_f32(
const float32_t *pSrc,
float32_t scale,
float32_t *pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
float32x4_t vec1;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale the input and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrc);
res = vmulq_f32(vec1, vdupq_n_f32(scale));
vst1q_f32(pDst, res);
/* Increment pointers */
pSrc += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
*pDst++ = (*pSrc++) * scale;
*pDst++ = (*pSrc++) * scale;
*pDst++ = (*pSrc++) * scale;
*pDst++ = (*pSrc++) * scale;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
*pDst++ = (*pSrc++) * scale;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicScale group
*/

View File

@ -0,0 +1,144 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_scale_q15.c
* Description: Multiplies a Q15 vector by a scalar
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicScale
@{
*/
/**
@brief Multiplies a Q15 vector by a scalar.
@param[in] pSrc points to the input vector
@param[in] scaleFract fractional portion of the scale value
@param[in] shift number of bits to shift the result by
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.15 format.
These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format.
*/
void arm_scale_q15(
const q15_t *pSrc,
q15_t scaleFract,
int8_t shift,
q15_t *pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
int8_t kShift = 15 - shift; /* Shift to apply after scaling */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t inA1, inA2;
q31_t out1, out2, out3, out4; /* Temporary output variables */
q15_t in1, in2, in3, in4; /* Temporary input variables */
#endif
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * scale */
#if defined (ARM_MATH_DSP)
/* read 2 times 2 samples at a time from source */
inA1 = read_q15x2_ia ((q15_t **) &pSrc);
inA2 = read_q15x2_ia ((q15_t **) &pSrc);
/* Scale inputs and store result in temporary variables
* in single cycle by packing the outputs */
out1 = (q31_t) ((q15_t) (inA1 >> 16) * scaleFract);
out2 = (q31_t) ((q15_t) (inA1 ) * scaleFract);
out3 = (q31_t) ((q15_t) (inA2 >> 16) * scaleFract);
out4 = (q31_t) ((q15_t) (inA2 ) * scaleFract);
/* apply shifting */
out1 = out1 >> kShift;
out2 = out2 >> kShift;
out3 = out3 >> kShift;
out4 = out4 >> kShift;
/* saturate the output */
in1 = (q15_t) (__SSAT(out1, 16));
in2 = (q15_t) (__SSAT(out2, 16));
in3 = (q15_t) (__SSAT(out3, 16));
in4 = (q15_t) (__SSAT(out4, 16));
/* store result to destination */
write_q15x2_ia (&pDst, __PKHBT(in2, in1, 16));
write_q15x2_ia (&pDst, __PKHBT(in4, in3, 16));
#else
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicScale group
*/

View File

@ -0,0 +1,191 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_scale_q31.c
* Description: Multiplies a Q31 vector by a scalar
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicScale
@{
*/
/**
@brief Multiplies a Q31 vector by a scalar.
@param[in] pSrc points to the input vector
@param[in] scaleFract fractional portion of the scale value
@param[in] shift number of bits to shift the result by
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.
*/
void arm_scale_q31(
const q31_t *pSrc,
q31_t scaleFract,
int8_t shift,
q31_t *pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q31_t in, out; /* Temporary variables */
int8_t kShift = shift + 1; /* Shift to apply after scaling */
int8_t sign = (kShift & 0x80);
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
in = *pSrc++; /* read input from source */
in = ((q63_t) in * scaleFract) >> 32; /* multiply input with scaler value */
out = in << kShift; /* apply shifting */
if (in != (out >> kShift)) /* saturate the result */
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out; /* Store result destination */
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in << kShift;
if (in != (out >> kShift))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in << kShift;
if (in != (out >> kShift))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in << kShift;
if (in != (out >> kShift))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
in = *pSrc++; /* read four inputs from source */
in = ((q63_t) in * scaleFract) >> 32; /* multiply input with scaler value */
out = in >> -kShift; /* apply shifting */
*pDst++ = out; /* Store result destination */
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in >> -kShift;
*pDst++ = out;
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in >> -kShift;
*pDst++ = out;
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in >> -kShift;
*pDst++ = out;
/* Decrement loop counter */
blkCnt--;
}
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in << kShift;
if (in != (out >> kShift))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in >> -kShift;
*pDst++ = out;
/* Decrement loop counter */
blkCnt--;
}
}
}
/**
@} end of BasicScale group
*/

View File

@ -0,0 +1,129 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_scale_q7.c
* Description: Multiplies a Q7 vector by a scalar
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicScale
@{
*/
/**
@brief Multiplies a Q7 vector by a scalar.
@param[in] pSrc points to the input vector
@param[in] scaleFract fractional portion of the scale value
@param[in] shift number of bits to shift the result by
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.7 format.
These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to 1.7 format.
*/
void arm_scale_q7(
const q7_t * pSrc,
q7_t scaleFract,
int8_t shift,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
int8_t kShift = 7 - shift; /* Shift to apply after scaling */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q7_t in1, in2, in3, in4; /* Temporary input variables */
q7_t out1, out2, out3, out4; /* Temporary output variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * scale */
#if defined (ARM_MATH_DSP)
/* Reading 4 inputs from memory */
in1 = *pSrc++;
in2 = *pSrc++;
in3 = *pSrc++;
in4 = *pSrc++;
/* Scale inputs and store result in the temporary variable. */
out1 = (q7_t) (__SSAT(((in1) * scaleFract) >> kShift, 8));
out2 = (q7_t) (__SSAT(((in2) * scaleFract) >> kShift, 8));
out3 = (q7_t) (__SSAT(((in3) * scaleFract) >> kShift, 8));
out4 = (q7_t) (__SSAT(((in4) * scaleFract) >> kShift, 8));
/* Pack and store result in destination buffer (in single write) */
write_q7x4_ia (&pDst, __PACKq7(out1, out2, out3, out4));
#else
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicScale group
*/

View File

@ -0,0 +1,201 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_shift_q15.c
* Description: Shifts the elements of a Q15 vector by a specified number of bits
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicShift
@{
*/
/**
@brief Shifts the elements of a Q15 vector a specified number of bits
@param[in] pSrc points to the input vector
@param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
void arm_shift_q15(
const q15_t * pSrc,
int8_t shiftBits,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q15_t in1, in2; /* Temporary input variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
/* If the shift value is positive then do right shift else left shift */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A << shiftBits */
#if defined (ARM_MATH_DSP)
/* read 2 samples from source */
in1 = *pSrc++;
in2 = *pSrc++;
/* Shift the inputs and then store the results in the destination buffer. */
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pDst, __PKHBT(__SSAT((in1 << shiftBits), 16),
__SSAT((in2 << shiftBits), 16), 16));
#else
write_q15x2_ia (&pDst, __PKHBT(__SSAT((in2 << shiftBits), 16),
__SSAT((in1 << shiftBits), 16), 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* read 2 samples from source */
in1 = *pSrc++;
in2 = *pSrc++;
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pDst, __PKHBT(__SSAT((in1 << shiftBits), 16),
__SSAT((in2 << shiftBits), 16), 16));
#else
write_q15x2_ia (&pDst, __PKHBT(__SSAT((in2 << shiftBits), 16),
__SSAT((in1 << shiftBits), 16), 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
#else
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
#if defined (ARM_MATH_DSP)
/* read 2 samples from source */
in1 = *pSrc++;
in2 = *pSrc++;
/* Shift the inputs and then store the results in the destination buffer. */
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pDst, __PKHBT((in1 >> -shiftBits),
(in2 >> -shiftBits), 16));
#else
write_q15x2_ia (&pDst, __PKHBT((in2 >> -shiftBits),
(in1 >> -shiftBits), 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* read 2 samples from source */
in1 = *pSrc++;
in2 = *pSrc++;
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pDst, __PKHBT((in1 >> -shiftBits),
(in2 >> -shiftBits), 16));
#else
write_q15x2_ia (&pDst, __PKHBT((in2 >> -shiftBits),
(in1 >> -shiftBits), 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
#else
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
/* If the shift value is positive then do right shift else left shift */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Shift input and store result in destination buffer. */
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Shift input and store result in destination buffer. */
*pDst++ = (*pSrc++ >> -shiftBits);
/* Decrement loop counter */
blkCnt--;
}
}
}
/**
@} end of BasicShift group
*/

View File

@ -0,0 +1,181 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_shift_q31.c
* Description: Shifts the elements of a Q31 vector by a specified number of bits
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicShift Vector Shift
Shifts the elements of a fixed-point vector by a specified number of bits.
There are separate functions for Q7, Q15, and Q31 data types.
The underlying algorithm used is:
<pre>
pDst[n] = pSrc[n] << shift, 0 <= n < blockSize.
</pre>
If <code>shift</code> is positive then the elements of the vector are shifted to the left.
If <code>shift</code> is negative then the elements of the vector are shifted to the right.
The functions support in-place computation allowing the source and destination
pointers to reference the same memory buffer.
*/
/**
@addtogroup BasicShift
@{
*/
/**
@brief Shifts the elements of a Q31 vector a specified number of bits.
@param[in] pSrc points to the input vector
@param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in the vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
*/
void arm_shift_q31(
const q31_t * pSrc,
int8_t shiftBits,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
#if defined (ARM_MATH_LOOPUNROLL)
q31_t in, out; /* Temporary variables */
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
/* If the shift value is positive then do right shift else left shift */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Shift input and store result in destination buffer. */
in = *pSrc++;
out = in << shiftBits;
if (in != (out >> shiftBits))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
in = *pSrc++;
out = in << shiftBits;
if (in != (out >> shiftBits))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
in = *pSrc++;
out = in << shiftBits;
if (in != (out >> shiftBits))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
in = *pSrc++;
out = in << shiftBits;
if (in != (out >> shiftBits))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Shift input and store results in destination buffer. */
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
/* Decrement loop counter */
blkCnt--;
}
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
/* If the shift value is positive then do right shift else left shift */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Shift input and store result in destination buffer. */
*pDst++ = clip_q63_to_q31((q63_t) *pSrc++ << shiftBits);
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Shift input and store result in destination buffer. */
*pDst++ = (*pSrc++ >> -shiftBits);
/* Decrement loop counter */
blkCnt--;
}
}
}
/**
@} end of BasicShift group
*/

View File

@ -0,0 +1,175 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_shift_q7.c
* Description: Processing function for the Q7 Shifting
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicShift
@{
*/
/**
@brief Shifts the elements of a Q7 vector a specified number of bits
@param[in] pSrc points to the input vector
@param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par onditions for optimum performance
Input and output buffers should be aligned by 32-bit
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
*/
void arm_shift_q7(
const q7_t * pSrc,
int8_t shiftBits,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q7_t in1, in2, in3, in4; /* Temporary input variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
/* If the shift value is positive then do right shift else left shift */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A << shiftBits */
#if defined (ARM_MATH_DSP)
/* Read 4 inputs */
in1 = *pSrc++;
in2 = *pSrc++;
in3 = *pSrc++;
in4 = *pSrc++;
/* Pack and store result in destination buffer (in single write) */
write_q7x4_ia (&pDst, __PACKq7(__SSAT((in1 << shiftBits), 8),
__SSAT((in2 << shiftBits), 8),
__SSAT((in3 << shiftBits), 8),
__SSAT((in4 << shiftBits), 8) ));
#else
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
#if defined (ARM_MATH_DSP)
/* Read 4 inputs */
in1 = *pSrc++;
in2 = *pSrc++;
in3 = *pSrc++;
in4 = *pSrc++;
/* Pack and store result in destination buffer (in single write) */
write_q7x4_ia (&pDst, __PACKq7((in1 >> -shiftBits),
(in2 >> -shiftBits),
(in3 >> -shiftBits),
(in4 >> -shiftBits) ));
#else
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
/* If the shift value is positive then do right shift else left shift */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Shift input and store result in destination buffer. */
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Shift input and store result in destination buffer. */
*pDst++ = (*pSrc++ >> -shiftBits);
/* Decrement loop counter */
blkCnt--;
}
}
}
/**
@} end of BasicShift group
*/

View File

@ -0,0 +1,148 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sub_f32.c
* Description: Floating-point vector subtraction
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicSub Vector Subtraction
Element-by-element subtraction of two vectors.
<pre>
pDst[n] = pSrcA[n] - pSrcB[n], 0 <= n < blockSize.
</pre>
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicSub
@{
*/
/**
@brief Floating-point vector subtraction.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_sub_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t vec2;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrcA);
vec2 = vld1q_f32(pSrcB);
res = vsubq_f32(vec1, vec2);
vst1q_f32(pDst, res);
/* Increment pointers */
pSrcA += 4;
pSrcB += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and store result in destination buffer. */
*pDst++ = (*pSrcA++) - (*pSrcB++);
*pDst++ = (*pSrcA++) - (*pSrcB++);
*pDst++ = (*pSrcA++) - (*pSrcB++);
*pDst++ = (*pSrcA++) - (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and store result in destination buffer. */
*pDst++ = (*pSrcA++) - (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicSub group
*/

View File

@ -0,0 +1,126 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sub_q15.c
* Description: Q15 vector subtraction
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicSub
@{
*/
/**
@brief Q15 vector subtraction.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
void arm_sub_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t inA1, inA2;
q31_t inB1, inB2;
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A - B */
#if defined (ARM_MATH_DSP)
/* read 2 times 2 samples at a time from sourceA */
inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
/* read 2 times 2 samples at a time from sourceB */
inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
/* Subtract and store 2 times 2 samples at a time */
write_q15x2_ia (&pDst, __QSUB16(inA1, inB1));
write_q15x2_ia (&pDst, __QSUB16(inA2, inB2));
#else
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
*pDst++ = (q15_t) __QSUB16(*pSrcA++, *pSrcB++);
#else
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicSub group
*/

View File

@ -0,0 +1,108 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sub_q31.c
* Description: Q31 vector subtraction
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicSub
@{
*/
/**
@brief Q31 vector subtraction.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
*/
void arm_sub_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and store result in destination buffer. */
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and store result in destination buffer. */
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicSub group
*/

View File

@ -0,0 +1,109 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sub_q7.c
* Description: Q7 vector subtraction
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicSub
@{
*/
/**
@brief Q7 vector subtraction.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
*/
void arm_sub_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A - B */
#if defined (ARM_MATH_DSP)
/* Subtract and store result in destination buffer (4 samples at a time). */
write_q7x4_ia (&pDst, __QSUB8(read_q7x4_ia ((q7_t **) &pSrcA), read_q7x4_ia ((q7_t **) &pSrcB)));
#else
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and store result in destination buffer. */
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicSub group
*/

View File

@ -0,0 +1,223 @@
cmake_minimum_required (VERSION 3.6)
project(CMSISDSP)
# Needed to find the config modules
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/..)
# Select which parts of the CMSIS-DSP must be compiled.
# There are some dependencies between the parts but they are not tracked
# by this cmake. So, enabling some functions may require to enable some
# other ones.
option(BASICMATH "Basic Math Functions" ON)
option(COMPLEXMATH "Complex Math Functions" ON)
option(CONTROLLER "Controller Functions" ON)
option(FASTMATH "Fast Math Functions" ON)
option(FILTERING "Filtering Functions" ON)
option(MATRIX "Matrix Functions" ON)
option(STATISTICS "Statistics Functions" ON)
option(SUPPORT "Support Functions" ON)
option(TRANSFORM "Transform Functions" ON)
# When OFF it is the default behavior : all tables are included.
option(CONFIGTABLE "Configuration of table allowed" OFF)
# When CONFIGTABLE is ON, select if all interpolation tables must be included
option(ALLFAST "All interpolation tables included" OFF)
# When CONFIGTABLE is ON, select if all FFT tables must be included
option(ALLFFT "All fft tables included" OFF)
# Features which require inclusion of a data table.
# Since some tables may be big, the corresponding feature can be
# disabled.
# Those options are taken into account only when CONFIGTABLE is ON
option(ARM_COS_F32 "cos f32" OFF)
option(ARM_COS_Q31 "cos q31" OFF)
option(ARM_COS_Q15 "cos q15" OFF)
option(ARM_SIN_F32 "sin f32" OFF)
option(ARM_SIN_Q31 "sin q31" OFF)
option(ARM_SIN_Q15 "sin q15" OFF)
option(ARM_SIN_COS_F32 "sin cos f32" OFF)
option(ARM_SIN_COS_Q31 "sin cos q31" OFF)
option(ARM_LMS_NORM_Q31 "lms norm q31" OFF)
option(ARM_LMS_NORM_Q15 "lms norm q15" OFF)
option(CFFT_F32_16 "cfft f32 16" OFF)
option(CFFT_F32_32 "cfft f32 32" OFF)
option(CFFT_F32_64 "cfft f32 64" OFF)
option(CFFT_F32_128 "cfft f32 128" OFF)
option(CFFT_F32_256 "cfft f32 256" OFF)
option(CFFT_F32_512 "cfft f32 512" OFF)
option(CFFT_F32_1024 "cfft f32 1024" OFF)
option(CFFT_F32_2048 "cfft f32 2048" OFF)
option(CFFT_F32_4096 "cfft f32 4096" OFF)
option(CFFT_Q31_16 "cfft q31 16" OFF)
option(CFFT_Q31_32 "cfft q31 32" OFF)
option(CFFT_Q31_64 "cfft q31 64" OFF)
option(CFFT_Q31_128 "cfft q31 128" OFF)
option(CFFT_Q31_256 "cfft q31 256" OFF)
option(CFFT_Q31_512 "cfft q31 512" OFF)
option(CFFT_Q31_1024 "cfft q31 1024" OFF)
option(CFFT_Q31_2048 "cfft q31 2048" OFF)
option(CFFT_Q31_4096 "cfft q31 4096" OFF)
option(CFFT_Q15_16 "cfft q15 16" OFF)
option(CFFT_Q15_32 "cfft q15 32" OFF)
option(CFFT_Q15_64 "cfft q15 64" OFF)
option(CFFT_Q15_128 "cfft q15 128" OFF)
option(CFFT_Q15_256 "cfft q15 256" OFF)
option(CFFT_Q15_512 "cfft q15 512" OFF)
option(CFFT_Q15_1024 "cfft q15 1024" OFF)
option(CFFT_Q15_2048 "cfft q15 2048" OFF)
option(CFFT_Q15_4096 "cfft q15 4096" OFF)
option(RFFT_FAST_F32_32 "rfft fast f32 32" OFF)
option(RFFT_FAST_F32_64 "rfft fast f32 64" OFF)
option(RFFT_FAST_F32_128 "rfft fast f32 128" OFF)
option(RFFT_FAST_F32_256 "rfft fast f32 256" OFF)
option(RFFT_FAST_F32_512 "rfft fast f32 512" OFF)
option(RFFT_FAST_F32_1024 "rfft fast f32 1024" OFF)
option(RFFT_FAST_F32_2048 "rfft fast f32 2048" OFF)
option(RFFT_FAST_F32_4096 "rfft fast f32 4096" OFF)
option(RFFT_F32_128 "rfft f32 128" OFF)
option(RFFT_F32_512 "rfft f32 512" OFF)
option(RFFT_F32_2048 "rfft f32 2048" OFF)
option(RFFT_F32_8192 "rfft f32 8192" OFF)
option(RFFT_Q31_32 "rfft q31 32" OFF)
option(RFFT_Q31_64 "rfft q31 64" OFF)
option(RFFT_Q31_128 "rfft q31 128" OFF)
option(RFFT_Q31_256 "rfft q31 256" OFF)
option(RFFT_Q31_512 "rfft q31 512" OFF)
option(RFFT_Q31_1024 "rfft q31 1024" OFF)
option(RFFT_Q31_2048 "rfft q31 2048" OFF)
option(RFFT_Q31_4096 "rfft q31 4096" OFF)
option(RFFT_Q31_8192 "rfft q31 8192" OFF)
option(RFFT_Q15_32 "rfft q15 32" OFF)
option(RFFT_Q15_64 "rfft q15 64" OFF)
option(RFFT_Q15_128 "rfft q15 128" OFF)
option(RFFT_Q15_256 "rfft q15 256" OFF)
option(RFFT_Q15_512 "rfft q15 512" OFF)
option(RFFT_Q15_1024 "rfft q15 1024" OFF)
option(RFFT_Q15_2048 "rfft q15 2048" OFF)
option(RFFT_Q15_4096 "rfft q15 4096" OFF)
option(RFFT_Q15_8192 "rfft q15 8192" OFF)
option(DCT4_F32_128 "dct4 f32 128" OFF)
option(DCT4_F32_512 "dct4 f32 512" OFF)
option(DCT4_F32_2048 "dct4 f32 2048" OFF)
option(DCT4_F32_8192 "dct4 f32 8192" OFF)
option(DCT4_Q31_128 "dct4 q31 128" OFF)
option(DCT4_Q31_512 "dct4 q31 512" OFF)
option(DCT4_Q31_2048 "dct4 q31 2048" OFF)
option(DCT4_Q31_8192 "dct4 q31 8192" OFF)
option(DCT4_Q15_128 "dct4 q15 128" OFF)
option(DCT4_Q15_512 "dct4 q15 512" OFF)
option(DCT4_Q15_2048 "dct4 q15 2048" OFF)
option(DCT4_Q15_8192 "dct4 q15 8192" OFF)
###########################
#
# CMSIS DSP
#
###########################
# DSP Sources
SET(DSP ".")
add_library(CMSISDSP INTERFACE)
include(config)
if (BASICMATH)
add_subdirectory(BasicMathFunctions)
target_link_libraries(CMSISDSP INTERFACE CMSISDSPBasicMath)
endif()
if (COMPLEXMATH)
add_subdirectory(ComplexMathFunctions)
target_link_libraries(CMSISDSP INTERFACE CMSISDSPComplexMath)
endif()
if (CONTROLLER)
add_subdirectory(ControllerFunctions)
# Fast tables inclusion is allowed
if (CONFIGTABLE)
target_compile_definitions(CMSISDSPController PUBLIC ARM_FAST_ALLOW_TABLES)
endif()
target_link_libraries(CMSISDSP INTERFACE CMSISDSPController)
endif()
if (FASTMATH)
add_subdirectory(FastMathFunctions)
# Fast tables inclusion is allowed
if (CONFIGTABLE)
target_compile_definitions(CMSISDSPFastMath PUBLIC ARM_FAST_ALLOW_TABLES)
endif()
target_link_libraries(CMSISDSP INTERFACE CMSISDSPFastMath)
endif()
if (FILTERING)
add_subdirectory(FilteringFunctions)
# Fast tables inclusion is allowed
if (CONFIGTABLE)
target_compile_definitions(CMSISDSPFiltering PUBLIC ARM_FAST_ALLOW_TABLES)
endif()
target_link_libraries(CMSISDSP INTERFACE CMSISDSPFiltering)
endif()
if (MATRIX)
add_subdirectory(MatrixFunctions)
target_link_libraries(CMSISDSP INTERFACE CMSISDSPMatrix)
endif()
if (STATISTICS)
add_subdirectory(StatisticsFunctions)
target_link_libraries(CMSISDSP INTERFACE CMSISDSPStatistics)
endif()
if (SUPPORT)
add_subdirectory(SupportFunctions)
target_link_libraries(CMSISDSP INTERFACE CMSISDSPSupport)
endif()
if (TRANSFORM)
add_subdirectory(TransformFunctions)
# FFT tables inclusion is allowed
if (CONFIGTABLE)
target_compile_definitions(CMSISDSPTransform PUBLIC ARM_FFT_ALLOW_TABLES)
endif()
target_link_libraries(CMSISDSP INTERFACE CMSISDSPTransform)
endif()
if (FILTERING OR CONTROLLER OR FASTMATH OR TRANSFORM)
add_subdirectory(CommonTables)
if (TRANSFORM)
# FFT tables inclusion is allowed
if (CONFIGTABLE)
target_compile_definitions(CMSISDSPCommon PUBLIC ARM_FFT_ALLOW_TABLES)
endif()
endif()
if (FILTERING OR CONTROLLER OR FASTMATH)
# Select which tables to include
if (CONFIGTABLE)
target_compile_definitions(CMSISDSPCommon PUBLIC ARM_FAST_ALLOW_TABLES)
endif()
endif()
target_link_libraries(CMSISDSP INTERFACE CMSISDSPCommon)
endif()
### Includes
target_include_directories(CMSISDSP INTERFACE "${DSP}/../Include")

View File

@ -0,0 +1,31 @@
cmake_minimum_required (VERSION 3.6)
project(CMSISDSPCommon)
add_library(CMSISDSPCommon STATIC arm_common_tables.c)
if (CONFIGTABLE AND ALLFFT)
target_compile_definitions(CMSISDSPCommon PUBLIC ARM_ALL_FFT_TABLES)
endif()
if (CONFIGTABLE AND ALLFAST)
target_compile_definitions(CMSISDSPCommon PUBLIC ARM_ALL_FAST_TABLES)
endif()
include(fft)
fft(CMSISDSPCommon)
include(interpol)
interpol(CMSISDSPCommon)
target_sources(CMSISDSPCommon PRIVATE arm_const_structs.c)
configdsp(CMSISDSPCommon ..)
### Includes
target_include_directories(CMSISDSPCommon PUBLIC "${DSP}/../../Include")

View File

@ -0,0 +1,31 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: CommonTables.c
* Description: Combination of all common table source files.
*
* $Date: 18. March 2019
* $Revision: V1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_common_tables.c"
#include "arm_const_structs.c"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,486 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_const_structs.c
* Description: Constant structs that are initialized for user convenience.
* For example, some can be given as arguments to the arm_cfft_f32() or arm_rfft_f32() functions.
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_const_structs.h"
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
/* Floating-point structs */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
const arm_cfft_instance_f32 arm_cfft_sR_f32_len16 = {
16, twiddleCoef_16, armBitRevIndexTable16, ARMBITREVINDEXTABLE_16_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32))
const arm_cfft_instance_f32 arm_cfft_sR_f32_len32 = {
32, twiddleCoef_32, armBitRevIndexTable32, ARMBITREVINDEXTABLE_32_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64))
const arm_cfft_instance_f32 arm_cfft_sR_f32_len64 = {
64, twiddleCoef_64, armBitRevIndexTable64, ARMBITREVINDEXTABLE_64_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128))
const arm_cfft_instance_f32 arm_cfft_sR_f32_len128 = {
128, twiddleCoef_128, armBitRevIndexTable128, ARMBITREVINDEXTABLE_128_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256))
const arm_cfft_instance_f32 arm_cfft_sR_f32_len256 = {
256, twiddleCoef_256, armBitRevIndexTable256, ARMBITREVINDEXTABLE_256_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512))
const arm_cfft_instance_f32 arm_cfft_sR_f32_len512 = {
512, twiddleCoef_512, armBitRevIndexTable512, ARMBITREVINDEXTABLE_512_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024))
const arm_cfft_instance_f32 arm_cfft_sR_f32_len1024 = {
1024, twiddleCoef_1024, armBitRevIndexTable1024, ARMBITREVINDEXTABLE_1024_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048))
const arm_cfft_instance_f32 arm_cfft_sR_f32_len2048 = {
2048, twiddleCoef_2048, armBitRevIndexTable2048, ARMBITREVINDEXTABLE_2048_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096))
const arm_cfft_instance_f32 arm_cfft_sR_f32_len4096 = {
4096, twiddleCoef_4096, armBitRevIndexTable4096, ARMBITREVINDEXTABLE_4096_TABLE_LENGTH
};
#endif
/* Fixed-point structs */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
const arm_cfft_instance_q31 arm_cfft_sR_q31_len16 = {
16, twiddleCoef_16_q31, armBitRevIndexTable_fixed_16, ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
const arm_cfft_instance_q31 arm_cfft_sR_q31_len32 = {
32, twiddleCoef_32_q31, armBitRevIndexTable_fixed_32, ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
const arm_cfft_instance_q31 arm_cfft_sR_q31_len64 = {
64, twiddleCoef_64_q31, armBitRevIndexTable_fixed_64, ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
const arm_cfft_instance_q31 arm_cfft_sR_q31_len128 = {
128, twiddleCoef_128_q31, armBitRevIndexTable_fixed_128, ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
const arm_cfft_instance_q31 arm_cfft_sR_q31_len256 = {
256, twiddleCoef_256_q31, armBitRevIndexTable_fixed_256, ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
const arm_cfft_instance_q31 arm_cfft_sR_q31_len512 = {
512, twiddleCoef_512_q31, armBitRevIndexTable_fixed_512, ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
const arm_cfft_instance_q31 arm_cfft_sR_q31_len1024 = {
1024, twiddleCoef_1024_q31, armBitRevIndexTable_fixed_1024, ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
const arm_cfft_instance_q31 arm_cfft_sR_q31_len2048 = {
2048, twiddleCoef_2048_q31, armBitRevIndexTable_fixed_2048, ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
const arm_cfft_instance_q31 arm_cfft_sR_q31_len4096 = {
4096, twiddleCoef_4096_q31, armBitRevIndexTable_fixed_4096, ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
const arm_cfft_instance_q15 arm_cfft_sR_q15_len16 = {
16, twiddleCoef_16_q15, armBitRevIndexTable_fixed_16, ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
const arm_cfft_instance_q15 arm_cfft_sR_q15_len32 = {
32, twiddleCoef_32_q15, armBitRevIndexTable_fixed_32, ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
const arm_cfft_instance_q15 arm_cfft_sR_q15_len64 = {
64, twiddleCoef_64_q15, armBitRevIndexTable_fixed_64, ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
const arm_cfft_instance_q15 arm_cfft_sR_q15_len128 = {
128, twiddleCoef_128_q15, armBitRevIndexTable_fixed_128, ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
const arm_cfft_instance_q15 arm_cfft_sR_q15_len256 = {
256, twiddleCoef_256_q15, armBitRevIndexTable_fixed_256, ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
const arm_cfft_instance_q15 arm_cfft_sR_q15_len512 = {
512, twiddleCoef_512_q15, armBitRevIndexTable_fixed_512, ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
const arm_cfft_instance_q15 arm_cfft_sR_q15_len1024 = {
1024, twiddleCoef_1024_q15, armBitRevIndexTable_fixed_1024, ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
const arm_cfft_instance_q15 arm_cfft_sR_q15_len2048 = {
2048, twiddleCoef_2048_q15, armBitRevIndexTable_fixed_2048, ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096 = {
4096, twiddleCoef_4096_q15, armBitRevIndexTable_fixed_4096, ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH
};
#endif
/* Structure for real-value inputs */
/* Floating-point structs */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32))
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len32 = {
{ 16, twiddleCoef_32, armBitRevIndexTable32, ARMBITREVINDEXTABLE_16_TABLE_LENGTH },
32U,
(float32_t *)twiddleCoef_rfft_32
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64))
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len64 = {
{ 32, twiddleCoef_32, armBitRevIndexTable32, ARMBITREVINDEXTABLE_32_TABLE_LENGTH },
64U,
(float32_t *)twiddleCoef_rfft_64
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128))
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len128 = {
{ 64, twiddleCoef_64, armBitRevIndexTable64, ARMBITREVINDEXTABLE_64_TABLE_LENGTH },
128U,
(float32_t *)twiddleCoef_rfft_128
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256))
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len256 = {
{ 128, twiddleCoef_128, armBitRevIndexTable128, ARMBITREVINDEXTABLE_128_TABLE_LENGTH },
256U,
(float32_t *)twiddleCoef_rfft_256
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512))
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len512 = {
{ 256, twiddleCoef_256, armBitRevIndexTable256, ARMBITREVINDEXTABLE_256_TABLE_LENGTH },
512U,
(float32_t *)twiddleCoef_rfft_512
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024))
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len1024 = {
{ 512, twiddleCoef_512, armBitRevIndexTable512, ARMBITREVINDEXTABLE_512_TABLE_LENGTH },
1024U,
(float32_t *)twiddleCoef_rfft_1024
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048))
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len2048 = {
{ 1024, twiddleCoef_1024, armBitRevIndexTable1024, ARMBITREVINDEXTABLE_1024_TABLE_LENGTH },
2048U,
(float32_t *)twiddleCoef_rfft_2048
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096))
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len4096 = {
{ 2048, twiddleCoef_2048, armBitRevIndexTable2048, ARMBITREVINDEXTABLE_2048_TABLE_LENGTH },
4096U,
(float32_t *)twiddleCoef_rfft_4096
};
#endif
/* Fixed-point structs */
/* q31_t */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
const arm_rfft_instance_q31 arm_rfft_sR_q31_len32 = {
32U,
0,
1,
256U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len16
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
const arm_rfft_instance_q31 arm_rfft_sR_q31_len64 = {
64U,
0,
1,
128U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len32
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
const arm_rfft_instance_q31 arm_rfft_sR_q31_len128 = {
128U,
0,
1,
64U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len64
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
const arm_rfft_instance_q31 arm_rfft_sR_q31_len256 = {
256U,
0,
1,
32U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len128
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
const arm_rfft_instance_q31 arm_rfft_sR_q31_len512 = {
512U,
0,
1,
16U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len256
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
const arm_rfft_instance_q31 arm_rfft_sR_q31_len1024 = {
1024U,
0,
1,
8U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len512
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
const arm_rfft_instance_q31 arm_rfft_sR_q31_len2048 = {
2048U,
0,
1,
4U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len1024
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
const arm_rfft_instance_q31 arm_rfft_sR_q31_len4096 = {
4096U,
0,
1,
2U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len2048
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q31) && defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
const arm_rfft_instance_q31 arm_rfft_sR_q31_len8192 = {
8192U,
0,
1,
1U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len4096
};
#endif
/* q15_t */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
const arm_rfft_instance_q15 arm_rfft_sR_q15_len32 = {
32U,
0,
1,
256U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len16
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
const arm_rfft_instance_q15 arm_rfft_sR_q15_len64 = {
64U,
0,
1,
128U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len32
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
const arm_rfft_instance_q15 arm_rfft_sR_q15_len128 = {
128U,
0,
1,
64U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len64
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
const arm_rfft_instance_q15 arm_rfft_sR_q15_len256 = {
256U,
0,
1,
32U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len128
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
const arm_rfft_instance_q15 arm_rfft_sR_q15_len512 = {
512U,
0,
1,
16U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len256
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
const arm_rfft_instance_q15 arm_rfft_sR_q15_len1024 = {
1024U,
0,
1,
8U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len512
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
const arm_rfft_instance_q15 arm_rfft_sR_q15_len2048 = {
2048U,
0,
1,
4U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len1024
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
const arm_rfft_instance_q15 arm_rfft_sR_q15_len4096 = {
4096U,
0,
1,
2U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len2048
};
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_REALCOEF_Q15) && defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
const arm_rfft_instance_q15 arm_rfft_sR_q15_len8192 = {
8192U,
0,
1,
1U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len4096
};
#endif
#endif

View File

@ -0,0 +1,16 @@
cmake_minimum_required (VERSION 3.6)
project(CMSISDSPComplexMath)
file(GLOB SRC "./*_*.c")
add_library(CMSISDSPComplexMath STATIC ${SRC})
configdsp(CMSISDSPComplexMath ..)
### Includes
target_include_directories(CMSISDSPComplexMath PUBLIC "${DSP}/../../Include")

View File

@ -0,0 +1,46 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: CompexMathFunctions.c
* Description: Combination of all comlex math function source files.
*
* $Date: 18. March 2019
* $Revision: V1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_cmplx_conj_f32.c"
#include "arm_cmplx_conj_q15.c"
#include "arm_cmplx_conj_q31.c"
#include "arm_cmplx_dot_prod_f32.c"
#include "arm_cmplx_dot_prod_q15.c"
#include "arm_cmplx_dot_prod_q31.c"
#include "arm_cmplx_mag_f32.c"
#include "arm_cmplx_mag_q15.c"
#include "arm_cmplx_mag_q31.c"
#include "arm_cmplx_mag_squared_f32.c"
#include "arm_cmplx_mag_squared_q15.c"
#include "arm_cmplx_mag_squared_q31.c"
#include "arm_cmplx_mult_cmplx_f32.c"
#include "arm_cmplx_mult_cmplx_q15.c"
#include "arm_cmplx_mult_cmplx_q31.c"
#include "arm_cmplx_mult_real_f32.c"
#include "arm_cmplx_mult_real_q15.c"
#include "arm_cmplx_mult_real_q31.c"

View File

@ -0,0 +1,161 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_conj_f32.c
* Description: Floating-point complex conjugate
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_conj Complex Conjugate
Conjugates the elements of a complex data vector.
The <code>pSrc</code> points to the source data and
<code>pDst</code> points to the destination data where the result should be written.
<code>numSamples</code> specifies the number of complex samples
and the data in each array is stored in an interleaved fashion
(real, imag, real, imag, ...).
Each array has a total of <code>2*numSamples</code> values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[(2*n) ] = pSrc[(2*n) ]; // real part
pDst[(2*n)+1] = -pSrc[(2*n)+1]; // imag part
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_conj
@{
*/
/**
@brief Floating-point complex conjugate.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] numSamples number of samples in each vector
@return none
*/
void arm_cmplx_conj_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t zero;
float32x4x2_t vec;
zero = vdupq_n_f32(0.0);
/* Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0]+jC[1] = A[0]+(-1)*jA[1] */
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
vec = vld2q_f32(pSrc);
vec.val[1] = vsubq_f32(zero,vec.val[1]);
vst2q_f32(pDst,vec);
/* Increment pointers */
pSrc += 8;
pDst += 8;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = numSamples & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined (ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_conj group
*/

View File

@ -0,0 +1,157 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_conj_q15.c
* Description: Q15 complex conjugate
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_conj
@{
*/
/**
@brief Q15 complex conjugate.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q15 value -1 (0x8000) is saturated to the maximum allowable positive value 0x7FFF.
*/
void arm_cmplx_conj_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q31_t in1; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
q31_t in2, in3, in4; /* Temporary input variables */
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
in1 = read_q15x2_ia ((q15_t **) &pSrc);
in2 = read_q15x2_ia ((q15_t **) &pSrc);
in3 = read_q15x2_ia ((q15_t **) &pSrc);
in4 = read_q15x2_ia ((q15_t **) &pSrc);
#ifndef ARM_MATH_BIG_ENDIAN
in1 = __QASX(0, in1);
in2 = __QASX(0, in2);
in3 = __QASX(0, in3);
in4 = __QASX(0, in4);
#else
in1 = __QSAX(0, in1);
in2 = __QSAX(0, in2);
in3 = __QSAX(0, in3);
in4 = __QSAX(0, in4);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
in1 = ((uint32_t) in1 >> 16) | ((uint32_t) in1 << 16);
in2 = ((uint32_t) in2 >> 16) | ((uint32_t) in2 << 16);
in3 = ((uint32_t) in3 >> 16) | ((uint32_t) in3 << 16);
in4 = ((uint32_t) in4 >> 16) | ((uint32_t) in4 << 16);
write_q15x2_ia (&pDst, in1);
write_q15x2_ia (&pDst, in2);
write_q15x2_ia (&pDst, in3);
write_q15x2_ia (&pDst, in4);
#else
*pDst++ = *pSrc++;
in1 = *pSrc++;
*pDst++ = (in1 == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in1;
*pDst++ = *pSrc++;
in1 = *pSrc++;
*pDst++ = (in1 == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in1;
*pDst++ = *pSrc++;
in1 = *pSrc++;
*pDst++ = (in1 == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in1;
*pDst++ = *pSrc++;
in1 = *pSrc++;
*pDst++ = (in1 == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in1;
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
in1 = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __SSAT(-in1, 16);
#else
*pDst++ = (in1 == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in1;
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_conj group
*/

View File

@ -0,0 +1,137 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_conj_q31.c
* Description: Q31 complex conjugate
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_conj
@{
*/
/**
@brief Q31 complex conjugate.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q31 value -1 (0x80000000) is saturated to the maximum allowable positive value 0x7FFFFFFF.
*/
void arm_cmplx_conj_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q31_t in; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
*pDst++ = *pSrc++;
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
*pDst++ = *pSrc++;
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
*pDst++ = *pSrc++;
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_conj group
*/

View File

@ -0,0 +1,233 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_dot_prod_f32.c
* Description: Floating-point complex dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_dot_prod Complex Dot Product
Computes the dot product of two complex vectors.
The vectors are multiplied element-by-element and then summed.
The <code>pSrcA</code> points to the first complex input vector and
<code>pSrcB</code> points to the second complex input vector.
<code>numSamples</code> specifies the number of complex samples
and the data in each array is stored in an interleaved fashion
(real, imag, real, imag, ...).
Each array has a total of <code>2*numSamples</code> values.
The underlying algorithm is used:
<pre>
realResult = 0;
imagResult = 0;
for (n = 0; n < numSamples; n++) {
realResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
imagResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_dot_prod
@{
*/
/**
@brief Floating-point complex dot product.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] numSamples number of samples in each vector
@param[out] realResult real part of the result returned here
@param[out] imagResult imaginary part of the result returned here
@return none
*/
void arm_cmplx_dot_prod_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t numSamples,
float32_t * realResult,
float32_t * imagResult)
{
uint32_t blkCnt; /* Loop counter */
float32_t real_sum = 0.0f, imag_sum = 0.0f; /* Temporary result variables */
float32_t a0,b0,c0,d0;
#if defined(ARM_MATH_NEON)
float32x4x2_t vec1,vec2,vec3,vec4;
float32x4_t accR,accI;
float32x2_t accum = vdup_n_f32(0);
accR = vdupq_n_f32(0.0);
accI = vdupq_n_f32(0.0);
/* Loop unrolling: Compute 8 outputs at a time */
blkCnt = numSamples >> 3U;
while (blkCnt > 0U)
{
/* C = (A[0]+jA[1])*(B[0]+jB[1]) + ... */
/* Calculate dot product and then store the result in a temporary buffer. */
vec1 = vld2q_f32(pSrcA);
vec2 = vld2q_f32(pSrcB);
/* Increment pointers */
pSrcA += 8;
pSrcB += 8;
/* Re{C} = Re{A}*Re{B} - Im{A}*Im{B} */
accR = vmlaq_f32(accR,vec1.val[0],vec2.val[0]);
accR = vmlsq_f32(accR,vec1.val[1],vec2.val[1]);
/* Im{C} = Re{A}*Im{B} + Im{A}*Re{B} */
accI = vmlaq_f32(accI,vec1.val[1],vec2.val[0]);
accI = vmlaq_f32(accI,vec1.val[0],vec2.val[1]);
vec3 = vld2q_f32(pSrcA);
vec4 = vld2q_f32(pSrcB);
/* Increment pointers */
pSrcA += 8;
pSrcB += 8;
/* Re{C} = Re{A}*Re{B} - Im{A}*Im{B} */
accR = vmlaq_f32(accR,vec3.val[0],vec4.val[0]);
accR = vmlsq_f32(accR,vec3.val[1],vec4.val[1]);
/* Im{C} = Re{A}*Im{B} + Im{A}*Re{B} */
accI = vmlaq_f32(accI,vec3.val[1],vec4.val[0]);
accI = vmlaq_f32(accI,vec3.val[0],vec4.val[1]);
/* Decrement the loop counter */
blkCnt--;
}
accum = vpadd_f32(vget_low_f32(accR), vget_high_f32(accR));
real_sum += accum[0] + accum[1];
accum = vpadd_f32(vget_low_f32(accI), vget_high_f32(accI));
imag_sum += accum[0] + accum[1];
/* Tail */
blkCnt = numSamples & 0x7;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
/* Decrement loop counter */
blkCnt--;
}
/* Store real and imaginary result in destination buffer. */
*realResult = real_sum;
*imagResult = imag_sum;
}
/**
@} end of cmplx_dot_prod group
*/

View File

@ -0,0 +1,154 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_dot_prod_q15.c
* Description: Processing function for the Q15 Complex Dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_dot_prod
@{
*/
/**
@brief Q15 complex dot product.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] numSamples number of samples in each vector
@param[out] realResult real part of the result returned here
@param[out] imagResult imaginary part of the result returned her
@return none
@par Scaling and Overflow Behavior
The function is implemented using an internal 64-bit accumulator.
The intermediate 1.15 by 1.15 multiplications are performed with full precision and yield a 2.30 result.
These are accumulated in a 64-bit accumulator with 34.30 precision.
As a final step, the accumulators are converted to 8.24 format.
The return results <code>realResult</code> and <code>imagResult</code> are in 8.24 format.
*/
void arm_cmplx_dot_prod_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
uint32_t numSamples,
q31_t * realResult,
q31_t * imagResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t real_sum = 0, imag_sum = 0; /* Temporary result variables */
q15_t a0,b0,c0,d0;
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += (q31_t)a0 * c0;
imag_sum += (q31_t)a0 * d0;
real_sum -= (q31_t)b0 * d0;
imag_sum += (q31_t)b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += (q31_t)a0 * c0;
imag_sum += (q31_t)a0 * d0;
real_sum -= (q31_t)b0 * d0;
imag_sum += (q31_t)b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += (q31_t)a0 * c0;
imag_sum += (q31_t)a0 * d0;
real_sum -= (q31_t)b0 * d0;
imag_sum += (q31_t)b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += (q31_t)a0 * c0;
imag_sum += (q31_t)a0 * d0;
real_sum -= (q31_t)b0 * d0;
imag_sum += (q31_t)b0 * c0;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += (q31_t)a0 * c0;
imag_sum += (q31_t)a0 * d0;
real_sum -= (q31_t)b0 * d0;
imag_sum += (q31_t)b0 * c0;
/* Decrement loop counter */
blkCnt--;
}
/* Store real and imaginary result in 8.24 format */
/* Convert real data in 34.30 to 8.24 by 6 right shifts */
*realResult = (q31_t) (real_sum >> 6);
/* Convert imaginary data in 34.30 to 8.24 by 6 right shifts */
*imagResult = (q31_t) (imag_sum >> 6);
}
/**
@} end of cmplx_dot_prod group
*/

View File

@ -0,0 +1,153 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_dot_prod_q31.c
* Description: Q31 complex dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_dot_prod
@{
*/
/**
@brief Q31 complex dot product.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] numSamples number of samples in each vector
@param[out] realResult real part of the result returned here
@param[out] imagResult imaginary part of the result returned here
@return none
@par Scaling and Overflow Behavior
The function is implemented using an internal 64-bit accumulator.
The intermediate 1.31 by 1.31 multiplications are performed with 64-bit precision and then shifted to 16.48 format.
The internal real and imaginary accumulators are in 16.48 format and provide 15 guard bits.
Additions are nonsaturating and no overflow will occur as long as <code>numSamples</code> is less than 32768.
The return results <code>realResult</code> and <code>imagResult</code> are in 16.48 format.
Input down scaling is not required.
*/
void arm_cmplx_dot_prod_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
uint32_t numSamples,
q63_t * realResult,
q63_t * imagResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t real_sum = 0, imag_sum = 0; /* Temporary result variables */
q31_t a0,b0,c0,d0;
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += ((q63_t)a0 * c0) >> 14;
imag_sum += ((q63_t)a0 * d0) >> 14;
real_sum -= ((q63_t)b0 * d0) >> 14;
imag_sum += ((q63_t)b0 * c0) >> 14;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += ((q63_t)a0 * c0) >> 14;
imag_sum += ((q63_t)a0 * d0) >> 14;
real_sum -= ((q63_t)b0 * d0) >> 14;
imag_sum += ((q63_t)b0 * c0) >> 14;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += ((q63_t)a0 * c0) >> 14;
imag_sum += ((q63_t)a0 * d0) >> 14;
real_sum -= ((q63_t)b0 * d0) >> 14;
imag_sum += ((q63_t)b0 * c0) >> 14;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += ((q63_t)a0 * c0) >> 14;
imag_sum += ((q63_t)a0 * d0) >> 14;
real_sum -= ((q63_t)b0 * d0) >> 14;
imag_sum += ((q63_t)b0 * c0) >> 14;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += ((q63_t)a0 * c0) >> 14;
imag_sum += ((q63_t)a0 * d0) >> 14;
real_sum -= ((q63_t)b0 * d0) >> 14;
imag_sum += ((q63_t)b0 * c0) >> 14;
/* Decrement loop counter */
blkCnt--;
}
/* Store real and imaginary result in 16.48 format */
*realResult = real_sum;
*imagResult = imag_sum;
}
/**
@} end of cmplx_dot_prod group
*/

View File

@ -0,0 +1,188 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_f32.c
* Description: Floating-point complex magnitude
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_mag Complex Magnitude
Computes the magnitude of the elements of a complex data vector.
The <code>pSrc</code> points to the source data and
<code>pDst</code> points to the where the result should be written.
<code>numSamples</code> specifies the number of complex samples
in the input array and the data is stored in an interleaved fashion
(real, imag, real, imag, ...).
The input array has a total of <code>2*numSamples</code> values;
the output array has a total of <code>numSamples</code> values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_mag
@{
*/
/**
@brief Floating-point complex magnitude.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
*/
void arm_cmplx_mag_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* loop counter */
float32_t real, imag; /* Temporary variables to hold input values */
#if defined(ARM_MATH_NEON)
float32x4x2_t vecA;
float32x4_t vRealA;
float32x4_t vImagA;
float32x4_t vMagSqA;
float32x4x2_t vecB;
float32x4_t vRealB;
float32x4_t vImagB;
float32x4_t vMagSqB;
/* Loop unrolling: Compute 8 outputs at a time */
blkCnt = numSamples >> 3;
while (blkCnt > 0U)
{
/* out = sqrt((real * real) + (imag * imag)) */
vecA = vld2q_f32(pSrc);
pSrc += 8;
vecB = vld2q_f32(pSrc);
pSrc += 8;
vRealA = vmulq_f32(vecA.val[0], vecA.val[0]);
vImagA = vmulq_f32(vecA.val[1], vecA.val[1]);
vMagSqA = vaddq_f32(vRealA, vImagA);
vRealB = vmulq_f32(vecB.val[0], vecB.val[0]);
vImagB = vmulq_f32(vecB.val[1], vecB.val[1]);
vMagSqB = vaddq_f32(vRealB, vImagB);
/* Store the result in the destination buffer. */
vst1q_f32(pDst, __arm_vec_sqrt_f32_neon(vMagSqA));
pDst += 4;
vst1q_f32(pDst, __arm_vec_sqrt_f32_neon(vMagSqB));
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
blkCnt = numSamples & 7;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
/* store result in destination buffer. */
arm_sqrt_f32((real * real) + (imag * imag), pDst++);
real = *pSrc++;
imag = *pSrc++;
arm_sqrt_f32((real * real) + (imag * imag), pDst++);
real = *pSrc++;
imag = *pSrc++;
arm_sqrt_f32((real * real) + (imag * imag), pDst++);
real = *pSrc++;
imag = *pSrc++;
arm_sqrt_f32((real * real) + (imag * imag), pDst++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
/* store result in destination buffer. */
arm_sqrt_f32((real * real) + (imag * imag), pDst++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_mag group
*/

View File

@ -0,0 +1,162 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_q15.c
* Description: Q15 complex magnitude
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_mag
@{
*/
/**
@brief Q15 complex magnitude.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format.
*/
void arm_cmplx_mag_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_DSP)
q31_t in;
q31_t acc0; /* Accumulators */
#else
q15_t real, imag; /* Temporary input variables */
q31_t acc0, acc1; /* Accumulators */
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
/* store result in 2.14 format in destination buffer. */
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
#else
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
/* store result in 2.14 format in destination buffer. */
arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
/* store result in 2.14 format in destination buffer. */
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
#else
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
/* store result in 2.14 format in destination buffer. */
arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_mag group
*/

View File

@ -0,0 +1,130 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_q31.c
* Description: Q31 complex magnitude
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_mag
@{
*/
/**
@brief Q31 complex magnitude.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function implements 1.31 by 1.31 multiplications and finally output is converted into 2.30 format.
Input down scaling is not required.
*/
void arm_cmplx_mag_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q31_t real, imag; /* Temporary input variables */
q31_t acc0, acc1; /* Accumulators */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
/* store result in 2.30 format in destination buffer. */
arm_sqrt_q31(acc0 + acc1, pDst++);
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
arm_sqrt_q31(acc0 + acc1, pDst++);
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
arm_sqrt_q31(acc0 + acc1, pDst++);
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
arm_sqrt_q31(acc0 + acc1, pDst++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
/* store result in 2.30 format in destination buffer. */
arm_sqrt_q31(acc0 + acc1, pDst++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_mag group
*/

View File

@ -0,0 +1,184 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_squared_f32.c
* Description: Floating-point complex magnitude squared
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_mag_squared Complex Magnitude Squared
Computes the magnitude squared of the elements of a complex data vector.
The <code>pSrc</code> points to the source data and
<code>pDst</code> points to the where the result should be written.
<code>numSamples</code> specifies the number of complex samples
in the input array and the data is stored in an interleaved fashion
(real, imag, real, imag, ...).
The input array has a total of <code>2*numSamples</code> values;
the output array has a total of <code>numSamples</code> values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_mag_squared
@{
*/
/**
@brief Floating-point complex magnitude squared.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
*/
void arm_cmplx_mag_squared_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
float32_t real, imag; /* Temporary input variables */
#if defined(ARM_MATH_NEON)
float32x4x2_t vecA;
float32x4_t vRealA;
float32x4_t vImagA;
float32x4_t vMagSqA;
float32x4x2_t vecB;
float32x4_t vRealB;
float32x4_t vImagB;
float32x4_t vMagSqB;
/* Loop unrolling: Compute 8 outputs at a time */
blkCnt = numSamples >> 3;
while (blkCnt > 0U)
{
/* out = sqrt((real * real) + (imag * imag)) */
vecA = vld2q_f32(pSrc);
pSrc += 8;
vRealA = vmulq_f32(vecA.val[0], vecA.val[0]);
vImagA = vmulq_f32(vecA.val[1], vecA.val[1]);
vMagSqA = vaddq_f32(vRealA, vImagA);
vecB = vld2q_f32(pSrc);
pSrc += 8;
vRealB = vmulq_f32(vecB.val[0], vecB.val[0]);
vImagB = vmulq_f32(vecB.val[1], vecB.val[1]);
vMagSqB = vaddq_f32(vRealB, vImagB);
/* Store the result in the destination buffer. */
vst1q_f32(pDst, vMagSqA);
pDst += 4;
vst1q_f32(pDst, vMagSqB);
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
blkCnt = numSamples & 7;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
/* store result in destination buffer. */
*pDst++ = (real * real) + (imag * imag);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_mag_squared group
*/

View File

@ -0,0 +1,161 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_squared_q15.c
* Description: Q15 complex magnitude squared
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_mag_squared
@{
*/
/**
@brief Q15 complex magnitude squared.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
*/
void arm_cmplx_mag_squared_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_DSP)
q31_t in;
q31_t acc0; /* Accumulators */
#else
q15_t real, imag; /* Temporary input variables */
q31_t acc0, acc1; /* Accumulators */
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
/* store result in 3.13 format in destination buffer. */
*pDst++ = (q15_t) (acc0 >> 17);
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
*pDst++ = (q15_t) (acc0 >> 17);
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
*pDst++ = (q15_t) (acc0 >> 17);
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
*pDst++ = (q15_t) (acc0 >> 17);
#else
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
/* store result in 3.13 format in destination buffer. */
*pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
*pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
*pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
*pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
/* store result in 3.13 format in destination buffer. */
*pDst++ = (q15_t) (acc0 >> 17);
#else
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
/* store result in 3.13 format in destination buffer. */
*pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_mag_squared group
*/

View File

@ -0,0 +1,129 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_squared_q31.c
* Description: Q31 complex magnitude squared
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_mag_squared
@{
*/
/**
@brief Q31 complex magnitude squared.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
Input down scaling is not required.
*/
void arm_cmplx_mag_squared_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q31_t real, imag; /* Temporary input variables */
q31_t acc0, acc1; /* Accumulators */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
/* store the result in 3.29 format in the destination buffer. */
*pDst++ = acc0 + acc1;
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
*pDst++ = acc0 + acc1;
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
*pDst++ = acc0 + acc1;
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
*pDst++ = acc0 + acc1;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
/* store result in 3.29 format in destination buffer. */
*pDst++ = acc0 + acc1;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_mag_squared group
*/

View File

@ -0,0 +1,194 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_cmplx_f32.c
* Description: Floating-point complex-by-complex multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
Multiplies a complex vector by another complex vector and generates a complex result.
The data in the complex arrays is stored in an interleaved fashion
(real, imag, real, imag, ...).
The parameter <code>numSamples</code> represents the number of complex
samples processed. The complex arrays have a total of <code>2*numSamples</code>
real values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup CmplxByCmplxMult
@{
*/
/**
@brief Floating-point complex-by-complex multiplication.
@param[in] pSrcA points to first input vector
@param[in] pSrcB points to second input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
*/
void arm_cmplx_mult_cmplx_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
float32_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
float32_t a, b, c, d; /* Temporary variables to store real and imaginary values */
#if defined(ARM_MATH_NEON)
float32x4x2_t va, vb;
float32x4_t real, imag;
float32x4x2_t outCplx;
/* Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
va = vld2q_f32(pSrcA); // load & separate real/imag pSrcA (de-interleave 2)
vb = vld2q_f32(pSrcB); // load & separate real/imag pSrcB
/* Increment pointers */
pSrcA += 8;
pSrcB += 8;
/* Re{C} = Re{A}*Re{B} - Im{A}*Im{B} */
outCplx.val[0] = vmulq_f32(va.val[0], vb.val[0]);
outCplx.val[0] = vmlsq_f32(outCplx.val[0], va.val[1], vb.val[1]);
/* Im{C} = Re{A}*Im{B} + Im{A}*Re{B} */
outCplx.val[1] = vmulq_f32(va.val[0], vb.val[1]);
outCplx.val[1] = vmlaq_f32(outCplx.val[1], va.val[1], vb.val[0]);
vst2q_f32(pDst, outCplx);
/* Increment pointer */
pDst += 8;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = numSamples & 3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in destination buffer. */
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in destination buffer. */
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of CmplxByCmplxMult group
*/

View File

@ -0,0 +1,136 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_cmplx_q15.c
* Description: Q15 complex-by-complex multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup CmplxByCmplxMult
@{
*/
/**
@brief Q15 complex-by-complex multiplication.
@param[in] pSrcA points to first input vector
@param[in] pSrcB points to second input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
*/
void arm_cmplx_mult_cmplx_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
q15_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q15_t a, b, c, d; /* Temporary variables */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in 3.13 format in destination buffer. */
*pDst++ = (q15_t) ( (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17) );
*pDst++ = (q15_t) ( (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17) );
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (q15_t) ( (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17) );
*pDst++ = (q15_t) ( (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17) );
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (q15_t) ( (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17) );
*pDst++ = (q15_t) ( (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17) );
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (q15_t) ( (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17) );
*pDst++ = (q15_t) ( (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17) );
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in 3.13 format in destination buffer. */
*pDst++ = (q15_t) ( (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17) );
*pDst++ = (q15_t) ( (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17) );
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of CmplxByCmplxMult group
*/

View File

@ -0,0 +1,137 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_cmplx_q31.c
* Description: Q31 complex-by-complex multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup CmplxByCmplxMult
@{
*/
/**
@brief Q31 complex-by-complex multiplication.
@param[in] pSrcA points to first input vector
@param[in] pSrcB points to second input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
Input down scaling is not required.
*/
void arm_cmplx_mult_cmplx_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
q31_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q31_t a, b, c, d; /* Temporary variables */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in 3.29 format in destination buffer. */
*pDst++ = (q31_t) ( (((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33) );
*pDst++ = (q31_t) ( (((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33) );
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (q31_t) ( (((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33) );
*pDst++ = (q31_t) ( (((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33) );
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (q31_t) ( (((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33) );
*pDst++ = (q31_t) ( (((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33) );
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (q31_t) ( (((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33) );
*pDst++ = (q31_t) ( (((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33) );
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in 3.29 format in destination buffer. */
*pDst++ = (q31_t) ( (((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33) );
*pDst++ = (q31_t) ( (((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33) );
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of CmplxByCmplxMult group
*/

View File

@ -0,0 +1,169 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_real_f32.c
* Description: Floating-point complex by real multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@defgroup CmplxByRealMult Complex-by-Real Multiplication
Multiplies a complex vector by a real vector and generates a complex result.
The data in the complex arrays is stored in an interleaved fashion
(real, imag, real, imag, ...).
The parameter <code>numSamples</code> represents the number of complex
samples processed. The complex arrays have a total of <code>2*numSamples</code>
real values while the real array has a total of <code>numSamples</code>
real values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup CmplxByRealMult
@{
*/
/**
@brief Floating-point complex-by-real multiplication.
@param[in] pSrcCmplx points to complex input vector
@param[in] pSrcReal points to real input vector
@param[out] pCmplxDst points to complex output vector
@param[in] numSamples number of samples in each vector
@return none
*/
void arm_cmplx_mult_real_f32(
const float32_t * pSrcCmplx,
const float32_t * pSrcReal,
float32_t * pCmplxDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
float32_t in; /* Temporary variable */
#if defined(ARM_MATH_NEON)
float32x4_t r;
float32x4x2_t ab,outCplx;
/* Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
ab = vld2q_f32(pSrcCmplx); // load & separate real/imag pSrcA (de-interleave 2)
r = vld1q_f32(pSrcReal); // load & separate real/imag pSrcB
/* Increment pointers */
pSrcCmplx += 8;
pSrcReal += 4;
outCplx.val[0] = vmulq_f32(ab.val[0], r);
outCplx.val[1] = vmulq_f32(ab.val[1], r);
vst2q_f32(pCmplxDst, outCplx);
pCmplxDst += 8;
blkCnt--;
}
/* Tail */
blkCnt = numSamples & 3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
/* store result in destination buffer. */
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
in = *pSrcReal++;
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
in = *pSrcReal++;
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
in = *pSrcReal++;
*pCmplxDst++ = *pSrcCmplx++* in;
*pCmplxDst++ = *pSrcCmplx++ * in;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
/* store result in destination buffer. */
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of CmplxByRealMult group
*/

View File

@ -0,0 +1,182 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_real_q15.c
* Description: Q15 complex by real multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup CmplxByRealMult
@{
*/
/**
@brief Q15 complex-by-real multiplication.
@param[in] pSrcCmplx points to complex input vector
@param[in] pSrcReal points to real input vector
@param[out] pCmplxDst points to complex output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
void arm_cmplx_mult_real_q15(
const q15_t * pSrcCmplx,
const q15_t * pSrcReal,
q15_t * pCmplxDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q15_t in; /* Temporary variable */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t inA1, inA2; /* Temporary variables to hold input data */
q31_t inB1; /* Temporary variables to hold input data */
q15_t out1, out2, out3, out4; /* Temporary variables to hold output data */
q31_t mul1, mul2, mul3, mul4; /* Temporary variables to hold intermediate data */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
#if defined (ARM_MATH_DSP)
/* read 2 complex numbers both real and imaginary from complex input buffer */
inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
/* read 2 real values at a time from real input buffer */
inB1 = read_q15x2_ia ((q15_t **) &pSrcReal);
/* multiply complex number with real numbers */
#ifndef ARM_MATH_BIG_ENDIAN
mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
#else
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* saturate the result */
out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
/* pack real and imaginary outputs and store them to destination */
write_q15x2_ia (&pCmplxDst, __PKHBT(out1, out2, 16));
write_q15x2_ia (&pCmplxDst, __PKHBT(out3, out4, 16));
inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
inB1 = read_q15x2_ia ((q15_t **) &pSrcReal);
#ifndef ARM_MATH_BIG_ENDIAN
mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
#else
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
write_q15x2_ia (&pCmplxDst, __PKHBT(out1, out2, 16));
write_q15x2_ia (&pCmplxDst, __PKHBT(out3, out4, 16));
#else
in = *pSrcReal++;
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
in = *pSrcReal++;
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
in = *pSrcReal++;
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
in = *pSrcReal++;
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
/* store the result in the destination buffer. */
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of CmplxByRealMult group
*/

View File

@ -0,0 +1,148 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_real_q31.c
* Description: Q31 complex by real multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup CmplxByRealMult
@{
*/
/**
@brief Q31 complex-by-real multiplication.
@param[in] pSrcCmplx points to complex input vector
@param[in] pSrcReal points to real input vector
@param[out] pCmplxDst points to complex output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated.
*/
void arm_cmplx_mult_real_q31(
const q31_t * pSrcCmplx,
const q31_t * pSrcReal,
q31_t * pCmplxDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q31_t in; /* Temporary variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
#if defined (ARM_MATH_DSP)
/* store saturated result in 1.31 format to destination buffer */
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
#else
/* store result in destination buffer. */
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
#endif
in = *pSrcReal++;
#if defined (ARM_MATH_DSP)
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
#else
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
#endif
in = *pSrcReal++;
#if defined (ARM_MATH_DSP)
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
#else
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
#endif
in = *pSrcReal++;
#if defined (ARM_MATH_DSP)
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
#else
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
#if defined (ARM_MATH_DSP)
/* store saturated result in 1.31 format to destination buffer */
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
#else
/* store result in destination buffer. */
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of CmplxByRealMult group
*/

View File

@ -0,0 +1,37 @@
cmake_minimum_required (VERSION 3.6)
project(CMSISDSPController)
add_library(CMSISDSPController STATIC)
configdsp(CMSISDSPController ..)
include(interpol)
interpol(CMSISDSPController)
if (CONFIGTABLE AND ALLFAST)
target_compile_definitions(CMSISDSPController PUBLIC ARM_ALL_FAST_TABLES)
endif()
target_sources(CMSISDSPController PRIVATE arm_pid_init_f32.c)
target_sources(CMSISDSPController PRIVATE arm_pid_init_q15.c)
target_sources(CMSISDSPController PRIVATE arm_pid_init_q31.c)
target_sources(CMSISDSPController PRIVATE arm_pid_reset_f32.c)
target_sources(CMSISDSPController PRIVATE arm_pid_reset_q15.c)
target_sources(CMSISDSPController PRIVATE arm_pid_reset_q31.c)
if (NOT CONFIGTABLE OR ALLFAST OR ARM_SIN_COS_F32)
target_sources(CMSISDSPController PRIVATE arm_sin_cos_f32.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_SIN_COS_Q31)
target_sources(CMSISDSPController PRIVATE arm_sin_cos_q31.c)
endif()
### Includes
target_include_directories(CMSISDSPController PUBLIC "${DSP}/../../Include")

View File

@ -0,0 +1,37 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: ControllerFunctions.c
* Description: Combination of all controller function source files.
*
* $Date: 18. March 2019
* $Revision: V1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_pid_init_f32.c"
#include "arm_pid_init_q15.c"
#include "arm_pid_init_q31.c"
#include "arm_pid_reset_f32.c"
#include "arm_pid_reset_q15.c"
#include "arm_pid_reset_q31.c"
#include "arm_sin_cos_f32.c"
#include "arm_sin_cos_q31.c"

View File

@ -0,0 +1,75 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_pid_init_f32.c
* Description: Floating-point PID Control initialization function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@addtogroup PID
@{
*/
/**
@brief Initialization function for the floating-point PID Control.
@param[in,out] S points to an instance of the PID structure
@param[in] resetStateFlag
- value = 0: no change in state
- value = 1: reset state
@return none
@par Details
The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n
The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>
using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)
also sets the state variables to all zeros.
*/
void arm_pid_init_f32(
arm_pid_instance_f32 * S,
int32_t resetStateFlag)
{
/* Derived coefficient A0 */
S->A0 = S->Kp + S->Ki + S->Kd;
/* Derived coefficient A1 */
S->A1 = (-S->Kp) - ((float32_t) 2.0 * S->Kd);
/* Derived coefficient A2 */
S->A2 = S->Kd;
/* Check whether state needs reset or not */
if (resetStateFlag)
{
/* Reset state to zero, The size will be always 3 samples */
memset(S->state, 0, 3U * sizeof(float32_t));
}
}
/**
@} end of PID group
*/

View File

@ -0,0 +1,95 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_pid_init_q15.c
* Description: Q15 PID Control initialization function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@addtogroup PID
@{
*/
/**
@brief Initialization function for the Q15 PID Control.
@param[in,out] S points to an instance of the Q15 PID structure
@param[in] resetStateFlag
- value = 0: no change in state
- value = 1: reset state
@return none
@par Details
The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n
The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>
using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)
also sets the state variables to all zeros.
*/
void arm_pid_init_q15(
arm_pid_instance_q15 * S,
int32_t resetStateFlag)
{
#if defined (ARM_MATH_DSP)
/* Derived coefficient A0 */
S->A0 = __QADD16(__QADD16(S->Kp, S->Ki), S->Kd);
/* Derived coefficients and pack into A1 */
#ifndef ARM_MATH_BIG_ENDIAN
S->A1 = __PKHBT(-__QADD16(__QADD16(S->Kd, S->Kd), S->Kp), S->Kd, 16);
#else
S->A1 = __PKHBT(S->Kd, -__QADD16(__QADD16(S->Kd, S->Kd), S->Kp), 16);
#endif
#else
q31_t temp; /* to store the sum */
/* Derived coefficient A0 */
temp = S->Kp + S->Ki + S->Kd;
S->A0 = (q15_t) __SSAT(temp, 16);
/* Derived coefficients and pack into A1 */
temp = -(S->Kd + S->Kd + S->Kp);
S->A1 = (q15_t) __SSAT(temp, 16);
S->A2 = S->Kd;
#endif /* #if defined (ARM_MATH_DSP) */
/* Check whether state needs reset or not */
if (resetStateFlag)
{
/* Reset state to zero, The size will be always 3 samples */
memset(S->state, 0, 3U * sizeof(q15_t));
}
}
/**
@} end of PID group
*/

View File

@ -0,0 +1,92 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_pid_init_q31.c
* Description: Q31 PID Control initialization function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@addtogroup PID
@{
*/
/**
@brief Initialization function for the Q31 PID Control.
@param[in,out] S points to an instance of the Q31 PID structure
@param[in] resetStateFlag
- value = 0: no change in state
- value = 1: reset state
@return none
@par Details
The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n
The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>
using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)
also sets the state variables to all zeros.
*/
void arm_pid_init_q31(
arm_pid_instance_q31 * S,
int32_t resetStateFlag)
{
#if defined (ARM_MATH_DSP)
/* Derived coefficient A0 */
S->A0 = __QADD(__QADD(S->Kp, S->Ki), S->Kd);
/* Derived coefficient A1 */
S->A1 = -__QADD(__QADD(S->Kd, S->Kd), S->Kp);
#else
q31_t temp; /* to store the sum */
/* Derived coefficient A0 */
temp = clip_q63_to_q31((q63_t) S->Kp + S->Ki);
S->A0 = clip_q63_to_q31((q63_t) temp + S->Kd);
/* Derived coefficient A1 */
temp = clip_q63_to_q31((q63_t) S->Kd + S->Kd);
S->A1 = -clip_q63_to_q31((q63_t) temp + S->Kp);
#endif /* #if defined (ARM_MATH_DSP) */
/* Derived coefficient A2 */
S->A2 = S->Kd;
/* Check whether state needs reset or not */
if (resetStateFlag)
{
/* Reset state to zero, The size will be always 3 samples */
memset(S->state, 0, 3U * sizeof(q31_t));
}
}
/**
@} end of PID group
*/

View File

@ -0,0 +1,54 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_pid_reset_f32.c
* Description: Floating-point PID Control reset function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@addtogroup PID
@{
*/
/**
@brief Reset function for the floating-point PID Control.
@param[in,out] S points to an instance of the floating-point PID structure
@return none
@par Details
The function resets the state buffer to zeros.
*/
void arm_pid_reset_f32(
arm_pid_instance_f32 * S)
{
/* Reset state to zero, The size will be always 3 samples */
memset(S->state, 0, 3U * sizeof(float32_t));
}
/**
@} end of PID group
*/

View File

@ -0,0 +1,54 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_pid_reset_q15.c
* Description: Q15 PID Control reset function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@addtogroup PID
@{
*/
/**
@brief Reset function for the Q15 PID Control.
@param[in,out] S points to an instance of the Q15 PID structure
@return none
@par Details
The function resets the state buffer to zeros.
*/
void arm_pid_reset_q15(
arm_pid_instance_q15 * S)
{
/* Reset state to zero, The size will be always 3 samples */
memset(S->state, 0, 3U * sizeof(q15_t));
}
/**
@} end of PID group
*/

View File

@ -0,0 +1,54 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_pid_reset_q31.c
* Description: Q31 PID Control reset function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@addtogroup PID
@{
*/
/**
@brief Reset function for the Q31 PID Control.
@param[in,out] S points to an instance of the Q31 PID structure
@return none
@par Details
The function resets the state buffer to zeros.
*/
void arm_pid_reset_q31(
arm_pid_instance_q31 * S)
{
/* Reset state to zero, The size will be always 3 samples */
memset(S->state, 0, 3U * sizeof(q31_t));
}
/**
@} end of PID group
*/

View File

@ -0,0 +1,146 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sin_cos_f32.c
* Description: Sine and Cosine calculation for floating-point values
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupController
*/
/**
@defgroup SinCos Sine Cosine
Computes the trigonometric sine and cosine values using a combination of table lookup
and linear interpolation.
There are separate functions for Q31 and floating-point data types.
The input to the floating-point version is in degrees while the
fixed-point Q31 have a scaled input with the range
[-1 0.9999] mapping to [-180 +180] degrees.
The floating point function also allows values that are out of the usual range. When this happens, the function will
take extra time to adjust the input value to the range of [-180 180].
The result is accurate to 5 digits after the decimal point.
The implementation is based on table lookup using 360 values together with linear interpolation.
The steps used are:
-# Calculation of the nearest integer table index.
-# Compute the fractional portion (fract) of the input.
-# Fetch the value corresponding to \c index from sine table to \c y0 and also value from \c index+1 to \c y1.
-# Sine value is computed as <code> *psinVal = y0 + (fract * (y1 - y0))</code>.
-# Fetch the value corresponding to \c index from cosine table to \c y0 and also value from \c index+1 to \c y1.
-# Cosine value is computed as <code> *pcosVal = y0 + (fract * (y1 - y0))</code>.
*/
/**
@addtogroup SinCos
@{
*/
/**
@brief Floating-point sin_cos function.
@param[in] theta input value in degrees
@param[out] pSinVal points to processed sine output
@param[out] pCosVal points to processed cosine output
@return none
*/
void arm_sin_cos_f32(
float32_t theta,
float32_t * pSinVal,
float32_t * pCosVal)
{
float32_t fract, in; /* Temporary input, output variables */
uint16_t indexS, indexC; /* Index variable */
float32_t f1, f2, d1, d2; /* Two nearest output values */
float32_t Dn, Df;
float32_t temp, findex;
/* input x is in degrees */
/* Scale input, divide input by 360, for cosine add 0.25 (pi/2) to read sine table */
in = theta * 0.00277777777778f;
if (in < 0.0f)
{
in = -in;
}
in = in - (int32_t)in;
/* Calculate the nearest index */
findex = (float32_t)FAST_MATH_TABLE_SIZE * in;
indexS = ((uint16_t)findex) & 0x1ff;
indexC = (indexS + (FAST_MATH_TABLE_SIZE / 4)) & 0x1ff;
/* Calculation of fractional value */
fract = findex - (float32_t) indexS;
/* Read two nearest values of input value from the cos & sin tables */
f1 = sinTable_f32[indexC ];
f2 = sinTable_f32[indexC+1];
d1 = -sinTable_f32[indexS ];
d2 = -sinTable_f32[indexS+1];
temp = (1.0f - fract) * f1 + fract * f2;
Dn = 0.0122718463030f; /* delta between the two points (fixed), in this case 2*pi/FAST_MATH_TABLE_SIZE */
Df = f2 - f1; /* delta between the values of the functions */
temp = Dn * (d1 + d2) - 2 * Df;
temp = fract * temp + (3 * Df - (d2 + 2 * d1) * Dn);
temp = fract * temp + d1 * Dn;
/* Calculation of cosine value */
*pCosVal = fract * temp + f1;
/* Read two nearest values of input value from the cos & sin tables */
f1 = sinTable_f32[indexS ];
f2 = sinTable_f32[indexS+1];
d1 = sinTable_f32[indexC ];
d2 = sinTable_f32[indexC+1];
temp = (1.0f - fract) * f1 + fract * f2;
Df = f2 - f1; // delta between the values of the functions
temp = Dn * (d1 + d2) - 2 * Df;
temp = fract * temp + (3 * Df - (d2 + 2 * d1) * Dn);
temp = fract * temp + d1 * Dn;
/* Calculation of sine value */
*pSinVal = fract * temp + f1;
if (theta < 0.0f)
{
*pSinVal = -*pSinVal;
}
}
/**
@} end of SinCos group
*/

View File

@ -0,0 +1,110 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sin_cos_q31.c
* Description: Cosine & Sine calculation for Q31 values
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupController
*/
/**
@addtogroup SinCos
@{
*/
/**
@brief Q31 sin_cos function.
@param[in] theta scaled input value in degrees
@param[out] pSinVal points to processed sine output
@param[out] pCosVal points to processed cosine output
@return none
The Q31 input value is in the range [-1 0.999999] and is mapped to a degree value in the range [-180 179].
*/
void arm_sin_cos_q31(
q31_t theta,
q31_t * pSinVal,
q31_t * pCosVal)
{
q31_t fract; /* Temporary input, output variables */
uint16_t indexS, indexC; /* Index variable */
q31_t f1, f2, d1, d2; /* Two nearest output values */
q31_t Dn, Df;
q63_t temp;
/* Calculate the nearest index */
indexS = (uint32_t)theta >> CONTROLLER_Q31_SHIFT;
indexC = (indexS + 128) & 0x1ff;
/* Calculation of fractional value */
fract = (theta - (indexS << CONTROLLER_Q31_SHIFT)) << 8;
/* Read two nearest values of input value from the cos & sin tables */
f1 = sinTable_q31[indexC ];
f2 = sinTable_q31[indexC+1];
d1 = -sinTable_q31[indexS ];
d2 = -sinTable_q31[indexS+1];
Dn = 0x1921FB5; /* delta between the two points (fixed), in this case 2*pi/FAST_MATH_TABLE_SIZE */
Df = f2 - f1; /* delta between the values of the functions */
temp = Dn * ((q63_t)d1 + d2);
temp = temp - ((q63_t)Df << 32);
temp = (q63_t)fract * (temp >> 31);
temp = temp + ((3 * (q63_t)Df << 31) - (d2 + ((q63_t)d1 << 1)) * Dn);
temp = (q63_t)fract * (temp >> 31);
temp = temp + (q63_t)d1 * Dn;
temp = (q63_t)fract * (temp >> 31);
/* Calculation of cosine value */
*pCosVal = clip_q63_to_q31((temp >> 31) + (q63_t)f1);
/* Read two nearest values of input value from the cos & sin tables */
f1 = sinTable_q31[indexS ];
f2 = sinTable_q31[indexS+1];
d1 = sinTable_q31[indexC ];
d2 = sinTable_q31[indexC+1];
Df = f2 - f1; // delta between the values of the functions
temp = Dn * ((q63_t)d1 + d2);
temp = temp - ((q63_t)Df << 32);
temp = (q63_t)fract * (temp >> 31);
temp = temp + ((3 * (q63_t)Df << 31) - (d2 + ((q63_t)d1 << 1)) * Dn);
temp = (q63_t)fract * (temp >> 31);
temp = temp + (q63_t)d1 * Dn;
temp = (q63_t)fract * (temp >> 31);
/* Calculation of sine value */
*pSinVal = clip_q63_to_q31((temp >> 31) + (q63_t)f1);
}
/**
@} end of SinCos group
*/

View File

@ -0,0 +1,51 @@
cmake_minimum_required (VERSION 3.6)
project(CMSISDSPFastMath)
file(GLOB SRC "./*_*.c")
add_library(CMSISDSPFastMath STATIC)
include(interpol)
interpol(CMSISDSPFastMath)
if (CONFIGTABLE AND ALLFAST)
target_compile_definitions(CMSISDSPFastMath PUBLIC ARM_ALL_FAST_TABLES)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_COS_F32)
target_sources(CMSISDSPFastMath PRIVATE arm_cos_f32.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_COS_Q15)
target_sources(CMSISDSPFastMath PRIVATE arm_cos_q15.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_COS_Q31)
target_sources(CMSISDSPFastMath PRIVATE arm_cos_q31.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_SIN_F32)
target_sources(CMSISDSPFastMath PRIVATE arm_sin_f32.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_SIN_Q15)
target_sources(CMSISDSPFastMath PRIVATE arm_sin_q15.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_SIN_Q31)
target_sources(CMSISDSPFastMath PRIVATE arm_sin_q31.c)
endif()
target_sources(CMSISDSPFastMath PRIVATE arm_sqrt_q15.c)
target_sources(CMSISDSPFastMath PRIVATE arm_sqrt_q31.c)
configdsp(CMSISDSPFastMath ..)
### Includes
target_include_directories(CMSISDSPFastMath PUBLIC "${DSP}/../../Include")

View File

@ -0,0 +1,37 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: FastMathFunctions.c
* Description: Combination of all fast math function source files.
*
* $Date: 18. March 2019
* $Revision: V1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_cos_f32.c"
#include "arm_cos_q15.c"
#include "arm_cos_q31.c"
#include "arm_sin_f32.c"
#include "arm_sin_q15.c"
#include "arm_sin_q31.c"
#include "arm_sqrt_q15.c"
#include "arm_sqrt_q31.c"

View File

@ -0,0 +1,122 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cos_f32.c
* Description: Fast cosine calculation for floating-point values
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@defgroup cos Cosine
Computes the trigonometric cosine function using a combination of table lookup
and linear interpolation. There are separate functions for
Q15, Q31, and floating-point data types.
The input to the floating-point version is in radians while the
fixed-point Q15 and Q31 have a scaled input with the range
[0 +0.9999] mapping to [0 2*pi). The fixed-point range is chosen so that a
value of 2*pi wraps around to 0.
The implementation is based on table lookup using 256 values together with linear interpolation.
The steps used are:
-# Calculation of the nearest integer table index
-# Compute the fractional portion (fract) of the table index.
-# The final result equals <code>(1.0f-fract)*a + fract*b;</code>
where
<pre>
b = Table[index];
c = Table[index+1];
</pre>
*/
/**
@addtogroup cos
@{
*/
/**
@brief Fast approximation to the trigonometric cosine function for floating-point data.
@param[in] x input value in radians
@return cos(x)
*/
float32_t arm_cos_f32(
float32_t x)
{
float32_t cosVal, fract, in; /* Temporary input, output variables */
uint16_t index; /* Index variable */
float32_t a, b; /* Two nearest output values */
int32_t n;
float32_t findex;
/* input x is in radians */
/* Scale input to [0 1] range from [0 2*PI] , divide input by 2*pi, add 0.25 (pi/2) to read sine table */
in = x * 0.159154943092f + 0.25f;
/* Calculation of floor value of input */
n = (int32_t) in;
/* Make negative values towards -infinity */
if (in < 0.0f)
{
n--;
}
/* Map input value to [0 1] */
in = in - (float32_t) n;
/* Calculation of index of the table */
findex = (float32_t)FAST_MATH_TABLE_SIZE * in;
index = (uint16_t)findex;
/* when "in" is exactly 1, we need to rotate the index down to 0 */
if (index >= FAST_MATH_TABLE_SIZE) {
index = 0;
findex -= (float32_t)FAST_MATH_TABLE_SIZE;
}
/* fractional value calculation */
fract = findex - (float32_t) index;
/* Read two nearest values of input value from the cos table */
a = sinTable_f32[index];
b = sinTable_f32[index+1];
/* Linear interpolation process */
cosVal = (1.0f - fract) * a + fract * b;
/* Return output value */
return (cosVal);
}
/**
@} end of cos group
*/

View File

@ -0,0 +1,84 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cos_q15.c
* Description: Fast cosine calculation for Q15 values
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@addtogroup cos
@{
*/
/**
@brief Fast approximation to the trigonometric cosine function for Q15 data.
@param[in] x Scaled input value in radians
@return cos(x)
The Q15 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*PI).
*/
q15_t arm_cos_q15(
q15_t x)
{
q15_t cosVal; /* Temporary input, output variables */
int32_t index; /* Index variable */
q15_t a, b; /* Two nearest output values */
q15_t fract; /* Temporary values for fractional values */
/* add 0.25 (pi/2) to read sine table */
x = (uint16_t)x + 0x2000;
if (x < 0)
{ /* convert negative numbers to corresponding positive ones */
x = (uint16_t)x + 0x8000;
}
/* Calculate the nearest index */
index = (uint32_t)x >> FAST_MATH_Q15_SHIFT;
/* Calculation of fractional value */
fract = (x - (index << FAST_MATH_Q15_SHIFT)) << 9;
/* Read two nearest values of input value from the sin table */
a = sinTable_q15[index];
b = sinTable_q15[index+1];
/* Linear interpolation process */
cosVal = (q31_t) (0x8000 - fract) * a >> 16;
cosVal = (q15_t) ((((q31_t) cosVal << 16) + ((q31_t) fract * b)) >> 16);
/* Return output value */
return (cosVal << 1);
}
/**
@} end of cos group
*/

View File

@ -0,0 +1,84 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cos_q31.c
* Description: Fast cosine calculation for Q31 values
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@addtogroup cos
@{
*/
/**
@brief Fast approximation to the trigonometric cosine function for Q31 data.
@param[in] x Scaled input value in radians
@return cos(x)
The Q31 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*PI).
*/
q31_t arm_cos_q31(
q31_t x)
{
q31_t cosVal; /* Temporary input, output variables */
int32_t index; /* Index variable */
q31_t a, b; /* Two nearest output values */
q31_t fract; /* Temporary values for fractional values */
/* add 0.25 (pi/2) to read sine table */
x = (uint32_t)x + 0x20000000;
if (x < 0)
{ /* convert negative numbers to corresponding positive ones */
x = (uint32_t)x + 0x80000000;
}
/* Calculate the nearest index */
index = (uint32_t)x >> FAST_MATH_Q31_SHIFT;
/* Calculation of fractional value */
fract = (x - (index << FAST_MATH_Q31_SHIFT)) << 9;
/* Read two nearest values of input value from the sin table */
a = sinTable_q31[index];
b = sinTable_q31[index+1];
/* Linear interpolation process */
cosVal = (q63_t) (0x80000000 - fract) * a >> 32;
cosVal = (q31_t) ((((q63_t) cosVal << 32) + ((q63_t) fract * b)) >> 32);
/* Return output value */
return (cosVal << 1);
}
/**
@} end of cos group
*/

View File

@ -0,0 +1,122 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sin_f32.c
* Description: Fast sine calculation for floating-point values
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@defgroup sin Sine
Computes the trigonometric sine function using a combination of table lookup
and linear interpolation. There are separate functions for
Q15, Q31, and floating-point data types.
The input to the floating-point version is in radians while the
fixed-point Q15 and Q31 have a scaled input with the range
[0 +0.9999] mapping to [0 2*pi). The fixed-point range is chosen so that a
value of 2*pi wraps around to 0.
The implementation is based on table lookup using 256 values together with linear interpolation.
The steps used are:
-# Calculation of the nearest integer table index
-# Compute the fractional portion (fract) of the table index.
-# The final result equals <code>(1.0f-fract)*a + fract*b;</code>
where
<pre>
b = Table[index];
c = Table[index+1];
</pre>
*/
/**
@addtogroup sin
@{
*/
/**
@brief Fast approximation to the trigonometric sine function for floating-point data.
@param[in] x input value in radians.
@return sin(x)
*/
float32_t arm_sin_f32(
float32_t x)
{
float32_t sinVal, fract, in; /* Temporary input, output variables */
uint16_t index; /* Index variable */
float32_t a, b; /* Two nearest output values */
int32_t n;
float32_t findex;
/* input x is in radians */
/* Scale input to [0 1] range from [0 2*PI] , divide input by 2*pi */
in = x * 0.159154943092f;
/* Calculation of floor value of input */
n = (int32_t) in;
/* Make negative values towards -infinity */
if (in < 0.0f)
{
n--;
}
/* Map input value to [0 1] */
in = in - (float32_t) n;
/* Calculation of index of the table */
findex = (float32_t)FAST_MATH_TABLE_SIZE * in;
index = (uint16_t)findex;
/* when "in" is exactly 1, we need to rotate the index down to 0 */
if (index >= FAST_MATH_TABLE_SIZE) {
index = 0;
findex -= (float32_t)FAST_MATH_TABLE_SIZE;
}
/* fractional value calculation */
fract = findex - (float32_t) index;
/* Read two nearest values of input value from the sin table */
a = sinTable_f32[index];
b = sinTable_f32[index+1];
/* Linear interpolation process */
sinVal = (1.0f - fract) * a + fract * b;
/* Return output value */
return (sinVal);
}
/**
@} end of sin group
*/

View File

@ -0,0 +1,77 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sin_q15.c
* Description: Fast sine calculation for Q15 values
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@addtogroup sin
@{
*/
/**
@brief Fast approximation to the trigonometric sine function for Q15 data.
@param[in] x Scaled input value in radians
@return sin(x)
The Q15 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*PI).
*/
q15_t arm_sin_q15(
q15_t x)
{
q15_t sinVal; /* Temporary input, output variables */
int32_t index; /* Index variable */
q15_t a, b; /* Two nearest output values */
q15_t fract; /* Temporary values for fractional values */
/* Calculate the nearest index */
index = (uint32_t)x >> FAST_MATH_Q15_SHIFT;
/* Calculation of fractional value */
fract = (x - (index << FAST_MATH_Q15_SHIFT)) << 9;
/* Read two nearest values of input value from the sin table */
a = sinTable_q15[index];
b = sinTable_q15[index+1];
/* Linear interpolation process */
sinVal = (q31_t) (0x8000 - fract) * a >> 16;
sinVal = (q15_t) ((((q31_t) sinVal << 16) + ((q31_t) fract * b)) >> 16);
/* Return output value */
return (sinVal << 1);
}
/**
@} end of sin group
*/

View File

@ -0,0 +1,77 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sin_q31.c
* Description: Fast sine calculation for Q31 values
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@addtogroup sin
@{
*/
/**
@brief Fast approximation to the trigonometric sine function for Q31 data.
@param[in] x Scaled input value in radians
@return sin(x)
The Q31 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*PI).
*/
q31_t arm_sin_q31(
q31_t x)
{
q31_t sinVal; /* Temporary variables for input, output */
int32_t index; /* Index variable */
q31_t a, b; /* Two nearest output values */
q31_t fract; /* Temporary values for fractional values */
/* Calculate the nearest index */
index = (uint32_t)x >> FAST_MATH_Q31_SHIFT;
/* Calculation of fractional value */
fract = (x - (index << FAST_MATH_Q31_SHIFT)) << 9;
/* Read two nearest values of input value from the sin table */
a = sinTable_q31[index];
b = sinTable_q31[index+1];
/* Linear interpolation process */
sinVal = (q63_t) (0x80000000 - fract) * a >> 32;
sinVal = (q31_t) ((((q63_t) sinVal << 32) + ((q63_t) fract * b)) >> 32);
/* Return output value */
return (sinVal << 1);
}
/**
@} end of sin group
*/

View File

@ -0,0 +1,144 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sqrt_q15.c
* Description: Q15 square root function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@addtogroup SQRT
@{
*/
/**
@brief Q15 square root function.
@param[in] in input value. The range of the input value is [0 +1) or 0x0000 to 0x7FFF
@param[out] pOut points to square root of input value
@return execution status
- \ref ARM_MATH_SUCCESS : input value is positive
- \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
*/
arm_status arm_sqrt_q15(
q15_t in,
q15_t * pOut)
{
q31_t bits_val1;
q15_t number, temp1, var1, signBits1, half;
float32_t temp_float1;
union
{
q31_t fracval;
float32_t floatval;
} tempconv;
number = in;
/* If the input is a positive number then compute the signBits. */
if (number > 0)
{
signBits1 = __CLZ(number) - 17;
/* Shift by the number of signBits1 */
if ((signBits1 % 2) == 0)
{
number = number << signBits1;
}
else
{
number = number << (signBits1 - 1);
}
/* Calculate half value of the number */
half = number >> 1;
/* Store the number for later use */
temp1 = number;
/* Convert to float */
temp_float1 = number * 3.051757812500000e-005f;
/* Store as integer */
tempconv.floatval = temp_float1;
bits_val1 = tempconv.fracval;
/* Subtract the shifted value from the magic number to give intial guess */
bits_val1 = 0x5f3759df - (bits_val1 >> 1); /* gives initial guess */
/* Store as float */
tempconv.fracval = bits_val1;
temp_float1 = tempconv.floatval;
/* Convert to integer format */
var1 = (q31_t) (temp_float1 * 16384);
/* 1st iteration */
var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
((q15_t)
((((q15_t)
(((q31_t) var1 * var1) >> 15)) *
(q31_t) half) >> 15))) >> 15)) << 2;
/* 2nd iteration */
var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
((q15_t)
((((q15_t)
(((q31_t) var1 * var1) >> 15)) *
(q31_t) half) >> 15))) >> 15)) << 2;
/* 3rd iteration */
var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
((q15_t)
((((q15_t)
(((q31_t) var1 * var1) >> 15)) *
(q31_t) half) >> 15))) >> 15)) << 2;
/* Multiply the inverse square root with the original value */
var1 = ((q15_t) (((q31_t) temp1 * var1) >> 15)) << 1;
/* Shift the output down accordingly */
if ((signBits1 % 2) == 0)
{
var1 = var1 >> (signBits1 / 2);
}
else
{
var1 = var1 >> ((signBits1 - 1) / 2);
}
*pOut = var1;
return (ARM_MATH_SUCCESS);
}
/* If the number is a negative number then store zero as its square root value */
else
{
*pOut = 0;
return (ARM_MATH_ARGUMENT_ERROR);
}
}
/**
@} end of SQRT group
*/

View File

@ -0,0 +1,144 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sqrt_q31.c
* Description: Q31 square root function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@addtogroup SQRT
@{
*/
/**
@brief Q31 square root function.
@param[in] in input value. The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF
@param[out] pOut points to square root of input value
@return execution status
- \ref ARM_MATH_SUCCESS : input value is positive
- \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
*/
arm_status arm_sqrt_q31(
q31_t in,
q31_t * pOut)
{
q31_t bits_val1;
q31_t number, temp1, var1, signBits1, half;
float32_t temp_float1;
union
{
q31_t fracval;
float32_t floatval;
} tempconv;
number = in;
/* If the input is a positive number then compute the signBits. */
if (number > 0)
{
signBits1 = __CLZ(number) - 1;
/* Shift by the number of signBits1 */
if ((signBits1 % 2) == 0)
{
number = number << signBits1;
}
else
{
number = number << (signBits1 - 1);
}
/* Calculate half value of the number */
half = number >> 1;
/* Store the number for later use */
temp1 = number;
/* Convert to float */
temp_float1 = number * 4.6566128731e-010f;
/* Store as integer */
tempconv.floatval = temp_float1;
bits_val1 = tempconv.fracval;
/* Subtract the shifted value from the magic number to give intial guess */
bits_val1 = 0x5f3759df - (bits_val1 >> 1); /* gives initial guess */
/* Store as float */
tempconv.fracval = bits_val1;
temp_float1 = tempconv.floatval;
/* Convert to integer format */
var1 = (q31_t) (temp_float1 * 1073741824);
/* 1st iteration */
var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
((q31_t)
((((q31_t)
(((q63_t) var1 * var1) >> 31)) *
(q63_t) half) >> 31))) >> 31)) << 2;
/* 2nd iteration */
var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
((q31_t)
((((q31_t)
(((q63_t) var1 * var1) >> 31)) *
(q63_t) half) >> 31))) >> 31)) << 2;
/* 3rd iteration */
var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
((q31_t)
((((q31_t)
(((q63_t) var1 * var1) >> 31)) *
(q63_t) half) >> 31))) >> 31)) << 2;
/* Multiply the inverse square root with the original value */
var1 = ((q31_t) (((q63_t) temp1 * var1) >> 31)) << 1;
/* Shift the output down accordingly */
if ((signBits1 % 2) == 0)
{
var1 = var1 >> (signBits1 / 2);
}
else
{
var1 = var1 >> ((signBits1 - 1) / 2);
}
*pOut = var1;
return (ARM_MATH_SUCCESS);
}
/* If the number is a negative number then store zero as its square root value */
else
{
*pOut = 0;
return (ARM_MATH_ARGUMENT_ERROR);
}
}
/**
@} end of SQRT group
*/

View File

@ -0,0 +1,128 @@
cmake_minimum_required (VERSION 3.6)
project(CMSISDSPFiltering)
add_library(CMSISDSPFiltering STATIC)
include(interpol)
interpol(CMSISDSPFiltering)
configdsp(CMSISDSPFiltering ..)
if (CONFIGTABLE AND ALLFAST)
target_compile_definitions(CMSISDSPFiltering PUBLIC ARM_ALL_FAST_TABLES)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_LMS_NORM_Q31)
target_sources(CMSISDSPFiltering PRIVATE arm_lms_norm_init_q31.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_LMS_NORM_Q15)
target_sources(CMSISDSPFiltering PRIVATE arm_lms_norm_init_q15.c)
endif()
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df1_32x64_init_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df1_32x64_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df1_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df1_fast_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df1_fast_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df1_init_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df1_init_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df1_init_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df1_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df1_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df2T_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df2T_f64.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df2T_init_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_df2T_init_f64.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_stereo_df2T_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_biquad_cascade_stereo_df2T_init_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_fast_opt_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_fast_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_fast_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_opt_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_opt_q7.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_partial_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_partial_fast_opt_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_partial_fast_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_partial_fast_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_partial_opt_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_partial_opt_q7.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_partial_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_partial_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_partial_q7.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_conv_q7.c)
target_sources(CMSISDSPFiltering PRIVATE arm_correlate_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_correlate_fast_opt_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_correlate_fast_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_correlate_fast_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_correlate_opt_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_correlate_opt_q7.c)
target_sources(CMSISDSPFiltering PRIVATE arm_correlate_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_correlate_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_correlate_q7.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_decimate_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_decimate_fast_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_decimate_fast_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_decimate_init_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_decimate_init_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_decimate_init_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_decimate_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_decimate_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_fast_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_fast_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_init_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_init_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_init_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_init_q7.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_interpolate_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_interpolate_init_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_interpolate_init_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_interpolate_init_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_interpolate_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_interpolate_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_lattice_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_lattice_init_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_lattice_init_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_lattice_init_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_lattice_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_lattice_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_q7.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_sparse_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_sparse_init_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_sparse_init_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_sparse_init_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_sparse_init_q7.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_sparse_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_sparse_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_fir_sparse_q7.c)
target_sources(CMSISDSPFiltering PRIVATE arm_iir_lattice_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_iir_lattice_init_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_iir_lattice_init_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_iir_lattice_init_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_iir_lattice_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_iir_lattice_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_lms_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_lms_init_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_lms_init_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_lms_init_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_lms_norm_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_lms_norm_init_f32.c)
target_sources(CMSISDSPFiltering PRIVATE arm_lms_norm_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_lms_norm_q31.c)
target_sources(CMSISDSPFiltering PRIVATE arm_lms_q15.c)
target_sources(CMSISDSPFiltering PRIVATE arm_lms_q31.c)
### Includes
target_include_directories(CMSISDSPFiltering PUBLIC "${DSP}/../../Include")

View File

@ -0,0 +1,127 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: FilteringFunctions.c
* Description: Combination of all filtering function source files.
*
* $Date: 18. March 2019
* $Revision: V1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_biquad_cascade_df1_32x64_init_q31.c"
#include "arm_biquad_cascade_df1_32x64_q31.c"
#include "arm_biquad_cascade_df1_f32.c"
#include "arm_biquad_cascade_df1_fast_q15.c"
#include "arm_biquad_cascade_df1_fast_q31.c"
#include "arm_biquad_cascade_df1_init_f32.c"
#include "arm_biquad_cascade_df1_init_q15.c"
#include "arm_biquad_cascade_df1_init_q31.c"
#include "arm_biquad_cascade_df1_q15.c"
#include "arm_biquad_cascade_df1_q31.c"
#include "arm_biquad_cascade_df2T_f32.c"
#include "arm_biquad_cascade_df2T_f64.c"
#include "arm_biquad_cascade_df2T_init_f32.c"
#include "arm_biquad_cascade_df2T_init_f64.c"
#include "arm_biquad_cascade_stereo_df2T_f32.c"
#include "arm_biquad_cascade_stereo_df2T_init_f32.c"
#include "arm_conv_f32.c"
#include "arm_conv_fast_opt_q15.c"
#include "arm_conv_fast_q15.c"
#include "arm_conv_fast_q31.c"
#include "arm_conv_opt_q15.c"
#include "arm_conv_opt_q7.c"
#include "arm_conv_partial_f32.c"
#include "arm_conv_partial_fast_opt_q15.c"
#include "arm_conv_partial_fast_q15.c"
#include "arm_conv_partial_fast_q31.c"
#include "arm_conv_partial_opt_q15.c"
#include "arm_conv_partial_opt_q7.c"
#include "arm_conv_partial_q15.c"
#include "arm_conv_partial_q31.c"
#include "arm_conv_partial_q7.c"
#include "arm_conv_q15.c"
#include "arm_conv_q31.c"
#include "arm_conv_q7.c"
#include "arm_correlate_f32.c"
#include "arm_correlate_fast_opt_q15.c"
#include "arm_correlate_fast_q15.c"
#include "arm_correlate_fast_q31.c"
#include "arm_correlate_opt_q15.c"
#include "arm_correlate_opt_q7.c"
#include "arm_correlate_q15.c"
#include "arm_correlate_q31.c"
#include "arm_correlate_q7.c"
#include "arm_fir_decimate_f32.c"
#include "arm_fir_decimate_fast_q15.c"
#include "arm_fir_decimate_fast_q31.c"
#include "arm_fir_decimate_init_f32.c"
#include "arm_fir_decimate_init_q15.c"
#include "arm_fir_decimate_init_q31.c"
#include "arm_fir_decimate_q15.c"
#include "arm_fir_decimate_q31.c"
#include "arm_fir_f32.c"
#include "arm_fir_fast_q15.c"
#include "arm_fir_fast_q31.c"
#include "arm_fir_init_f32.c"
#include "arm_fir_init_q15.c"
#include "arm_fir_init_q31.c"
#include "arm_fir_init_q7.c"
#include "arm_fir_interpolate_f32.c"
#include "arm_fir_interpolate_init_f32.c"
#include "arm_fir_interpolate_init_q15.c"
#include "arm_fir_interpolate_init_q31.c"
#include "arm_fir_interpolate_q15.c"
#include "arm_fir_interpolate_q31.c"
#include "arm_fir_lattice_f32.c"
#include "arm_fir_lattice_init_f32.c"
#include "arm_fir_lattice_init_q15.c"
#include "arm_fir_lattice_init_q31.c"
#include "arm_fir_lattice_q15.c"
#include "arm_fir_lattice_q31.c"
#include "arm_fir_q15.c"
#include "arm_fir_q31.c"
#include "arm_fir_q7.c"
#include "arm_fir_sparse_f32.c"
#include "arm_fir_sparse_init_f32.c"
#include "arm_fir_sparse_init_q15.c"
#include "arm_fir_sparse_init_q31.c"
#include "arm_fir_sparse_init_q7.c"
#include "arm_fir_sparse_q15.c"
#include "arm_fir_sparse_q31.c"
#include "arm_fir_sparse_q7.c"
#include "arm_iir_lattice_f32.c"
#include "arm_iir_lattice_init_f32.c"
#include "arm_iir_lattice_init_q15.c"
#include "arm_iir_lattice_init_q31.c"
#include "arm_iir_lattice_q15.c"
#include "arm_iir_lattice_q31.c"
#include "arm_lms_f32.c"
#include "arm_lms_init_f32.c"
#include "arm_lms_init_q15.c"
#include "arm_lms_init_q31.c"
#include "arm_lms_norm_f32.c"
#include "arm_lms_norm_init_f32.c"
#include "arm_lms_norm_init_q15.c"
#include "arm_lms_norm_init_q31.c"
#include "arm_lms_norm_q15.c"
#include "arm_lms_norm_q31.c"
#include "arm_lms_q15.c"
#include "arm_lms_q31.c"

View File

@ -0,0 +1,94 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df1_32x64_init_q31.c
* Description: High precision Q31 Biquad cascade filter initialization function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@addtogroup BiquadCascadeDF1_32x64
@{
*/
/**
@brief Initialization function for the Q31 Biquad cascade 32x64 filter.
@param[in,out] S points to an instance of the high precision Q31 Biquad cascade filter structure
@param[in] numStages number of 2nd order stages in the filter
@param[in] pCoeffs points to the filter coefficients
@param[in] pState points to the state buffer
@param[in] postShift Shift to be applied after the accumulator. Varies according to the coefficients format
@return none
@par Coefficient and State Ordering
The coefficients are stored in the array <code>pCoeffs</code> in the following order:
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
<code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
@par
The <code>pState</code> points to state variables array and size of each state variable is 1.63 format.
Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
The state variables are arranged in the state array as:
<pre>
{x[n-1], x[n-2], y[n-1], y[n-2]}
</pre>
The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
The state array has a total length of <code>4*numStages</code> values.
The state variables are updated after each block of data is processed; the coefficients are untouched.
*/
void arm_biquad_cas_df1_32x64_init_q31(
arm_biquad_cas_df1_32x64_ins_q31 * S,
uint8_t numStages,
const q31_t * pCoeffs,
q63_t * pState,
uint8_t postShift)
{
/* Assign filter stages */
S->numStages = numStages;
/* Assign postShift to be applied to the output */
S->postShift = postShift;
/* Assign coefficient pointer */
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 4 * numStages */
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(q63_t));
/* Assign state pointer */
S->pState = pState;
}
/**
@} end of BiquadCascadeDF1_32x64 group
*/

View File

@ -0,0 +1,458 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df1_32x64_q31.c
* Description: High precision Q31 Biquad cascade filter processing function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@defgroup BiquadCascadeDF1_32x64 High Precision Q31 Biquad Cascade Filter
This function implements a high precision Biquad cascade filter which operates on
Q31 data values. The filter coefficients are in 1.31 format and the state variables
are in 1.63 format. The double precision state variables reduce quantization noise
in the filter and provide a cleaner output.
These filters are particularly useful when implementing filters in which the
singularities are close to the unit circle. This is common for low pass or high
pass filters with very low cutoff frequencies.
The function operates on blocks of input and output data
and each call to the function processes <code>blockSize</code> samples through
the filter. <code>pSrc</code> and <code>pDst</code> points to input and output arrays
containing <code>blockSize</code> Q31 values.
@par Algorithm
Each Biquad stage implements a second order filter using the difference equation:
<pre>
y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
</pre>
A Direct Form I algorithm is used with 5 coefficients and 4 state variables per stage.
\image html Biquad.gif "Single Biquad filter stage"
Coefficients <code>b0, b1 and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
Pay careful attention to the sign of the feedback coefficients.
Some design tools use the difference equation
<pre>
y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a2 * y[n-2]
</pre>
In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
@par
Higher order filters are realized as a cascade of second order sections.
<code>numStages</code> refers to the number of second order stages used.
For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
\image html BiquadCascade.gif "8th order filter using a cascade of Biquad stages"
A 9th order filter would be realized with <code>numStages=5</code> second order stages
with the coefficients for one of the stages configured as a first order filter
(<code>b2=0</code> and <code>a2=0</code>).
@par
The <code>pState</code> points to state variables array.
Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code> and each state variable in 1.63 format to improve precision.
The state variables are arranged in the array as:
<pre>
{x[n-1], x[n-2], y[n-1], y[n-2]}
</pre>
@par
The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
The state array has a total length of <code>4*numStages</code> values of data in 1.63 format.
The state variables are updated after each block of data is processed, the coefficients are untouched.
@par Instance Structure
The coefficients and state variables for a filter are stored together in an instance data structure.
A separate instance structure must be defined for each filter.
Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
@par Init Function
There is also an associated initialization function which performs the following operations:
- Sets the values of the internal structure fields.
- Zeros out the values in the state buffer.
To do this manually without calling the init function, assign the follow subfields of the instance structure:
numStages, pCoeffs, postShift, pState. Also set all of the values in pState to zero.
@par
Use of the initialization function is optional.
However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
To place an instance structure into a const data section, the instance structure must be manually initialized.
Set the values in the state buffer to zeros before static initialization.
For example, to statically initialize the filter instance structure use
<pre>
arm_biquad_cas_df1_32x64_ins_q31 S1 = {numStages, pState, pCoeffs, postShift};
</pre>
where <code>numStages</code> is the number of Biquad stages in the filter;
<code>pState</code> is the address of the state buffer;
<code>pCoeffs</code> is the address of the coefficient buffer;
<code>postShift</code> shift to be applied which is described in detail below.
@par Fixed-Point Behavior
Care must be taken while using Biquad Cascade 32x64 filter function.
Following issues must be considered:
- Scaling of coefficients
- Filter gain
- Overflow and saturation
@par
Filter coefficients are represented as fractional values and
restricted to lie in the range <code>[-1 +1)</code>.
The processing function has an additional scaling parameter <code>postShift</code>
which allows the filter coefficients to exceed the range <code>[+1 -1)</code>.
At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.
\image html BiquadPostshift.gif "Fixed-point Biquad with shift by postShift bits after accumulator"
This essentially scales the filter coefficients by <code>2^postShift</code>.
For example, to realize the coefficients
<pre>
{1.5, -0.8, 1.2, 1.6, -0.9}
</pre>
set the Coefficient array to:
<pre>
{0.75, -0.4, 0.6, 0.8, -0.45}
</pre>
and set <code>postShift=1</code>
@par
The second thing to keep in mind is the gain through the filter.
The frequency response of a Biquad filter is a function of its coefficients.
It is possible for the gain through the filter to exceed 1.0 meaning that the
filter increases the amplitude of certain frequencies.
This means that an input signal with amplitude < 1.0 may result in an output > 1.0
and these are saturated or overflowed based on the implementation of the filter.
To avoid this behavior the filter needs to be scaled down such that its peak gain < 1.0
or the input signal must be scaled down so that the combination of input and filter are never overflowed.
@par
The third item to consider is the overflow and saturation behavior of the fixed-point Q31 version.
This is described in the function specific documentation below.
*/
/**
@addtogroup BiquadCascadeDF1_32x64
@{
*/
/**
@brief Processing function for the Q31 Biquad cascade 32x64 filter.
@param[in] S points to an instance of the high precision Q31 Biquad cascade filter
@param[in] pSrc points to the block of input data
@param[out] pDst points to the block of output data
@param[in] blockSize number of samples to process
@return none
@par Details
The function is implemented using an internal 64-bit accumulator.
The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
Thus, if the accumulator result overflows it wraps around rather than clip.
In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).
After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to
1.31 format by discarding the low 32 bits.
@par
Two related functions are provided in the CMSIS DSP library.
- \ref arm_biquad_cascade_df1_q31() implements a Biquad cascade with 32-bit coefficients and state variables with a Q63 accumulator.
- \ref arm_biquad_cascade_df1_fast_q31() implements a Biquad cascade with 32-bit coefficients and state variables with a Q31 accumulator.
*/
void arm_biquad_cas_df1_32x64_q31(
const arm_biquad_cas_df1_32x64_ins_q31 * S,
q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
q31_t *pIn = pSrc; /* input pointer initialization */
q31_t *pOut = pDst; /* output pointer initialization */
q63_t *pState = S->pState; /* state pointer initialization */
const q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */
q63_t acc; /* accumulator */
q31_t Xn1, Xn2; /* Input Filter state variables */
q63_t Yn1, Yn2; /* Output Filter state variables */
q31_t b0, b1, b2, a1, a2; /* Filter coefficients */
q31_t Xn; /* temporary input */
int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */
uint32_t sample, stage = S->numStages; /* loop counters */
q31_t acc_l, acc_h; /* temporary output */
uint32_t uShift = ((uint32_t) S->postShift + 1U);
uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */
do
{
/* Reading the coefficients */
b0 = *pCoeffs++;
b1 = *pCoeffs++;
b2 = *pCoeffs++;
a1 = *pCoeffs++;
a2 = *pCoeffs++;
/* Reading the state values */
Xn1 = (q31_t) (pState[0]);
Xn2 = (q31_t) (pState[1]);
Yn1 = pState[2];
Yn2 = pState[3];
#if defined (ARM_MATH_LOOPUNROLL)
/* Apply loop unrolling and compute 4 output values simultaneously. */
/* Variable acc hold output value that is being computed and stored in destination buffer
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
/* Loop unrolling: Compute 4 outputs at a time */
sample = blockSize >> 2U;
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
/* acc = b0 * x[n] */
acc = (q63_t) Xn * b0;
/* acc += b1 * x[n-1] */
acc += (q63_t) Xn1 * b1;
/* acc += b[2] * x[n-2] */
acc += (q63_t) Xn2 * b2;
/* acc += a1 * y[n-1] */
acc += mult32x64(Yn1, a1);
/* acc += a2 * y[n-2] */
acc += mult32x64(Yn2, a2);
/* The result is converted to 1.63 , Yn2 variable is reused */
Yn2 = acc << shift;
/* Calc lower part of acc */
acc_l = acc & 0xffffffff;
/* Calc upper part of acc */
acc_h = (acc >> 32) & 0xffffffff;
/* Apply shift for lower part of acc and upper part of acc */
acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
/* Store the output in the destination buffer in 1.31 format. */
*pOut = acc_h;
/* Read the second input into Xn2, to reuse the value */
Xn2 = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
/* acc += b1 * x[n-1] */
acc = (q63_t) Xn * b1;
/* acc = b0 * x[n] */
acc += (q63_t) Xn2 * b0;
/* acc += b[2] * x[n-2] */
acc += (q63_t) Xn1 * b2;
/* acc += a1 * y[n-1] */
acc += mult32x64(Yn2, a1);
/* acc += a2 * y[n-2] */
acc += mult32x64(Yn1, a2);
/* The result is converted to 1.63, Yn1 variable is reused */
Yn1 = acc << shift;
/* Calc lower part of acc */
acc_l = acc & 0xffffffff;
/* Calc upper part of acc */
acc_h = (acc >> 32) & 0xffffffff;
/* Apply shift for lower part of acc and upper part of acc */
acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
/* Read the third input into Xn1, to reuse the value */
Xn1 = *pIn++;
/* The result is converted to 1.31 */
/* Store the output in the destination buffer. */
*(pOut + 1U) = acc_h;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
/* acc = b0 * x[n] */
acc = (q63_t) Xn1 * b0;
/* acc += b1 * x[n-1] */
acc += (q63_t) Xn2 * b1;
/* acc += b[2] * x[n-2] */
acc += (q63_t) Xn * b2;
/* acc += a1 * y[n-1] */
acc += mult32x64(Yn1, a1);
/* acc += a2 * y[n-2] */
acc += mult32x64(Yn2, a2);
/* The result is converted to 1.63, Yn2 variable is reused */
Yn2 = acc << shift;
/* Calc lower part of acc */
acc_l = acc & 0xffffffff;
/* Calc upper part of acc */
acc_h = (acc >> 32) & 0xffffffff;
/* Apply shift for lower part of acc and upper part of acc */
acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
/* Store the output in the destination buffer in 1.31 format. */
*(pOut + 2U) = acc_h;
/* Read the fourth input into Xn, to reuse the value */
Xn = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
/* acc = b0 * x[n] */
acc = (q63_t) Xn * b0;
/* acc += b1 * x[n-1] */
acc += (q63_t) Xn1 * b1;
/* acc += b[2] * x[n-2] */
acc += (q63_t) Xn2 * b2;
/* acc += a1 * y[n-1] */
acc += mult32x64(Yn2, a1);
/* acc += a2 * y[n-2] */
acc += mult32x64(Yn1, a2);
/* The result is converted to 1.63, Yn1 variable is reused */
Yn1 = acc << shift;
/* Calc lower part of acc */
acc_l = acc & 0xffffffff;
/* Calc upper part of acc */
acc_h = (acc >> 32) & 0xffffffff;
/* Apply shift for lower part of acc and upper part of acc */
acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
/* Store the output in the destination buffer in 1.31 format. */
*(pOut + 3U) = acc_h;
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
Xn2 = Xn1;
Xn1 = Xn;
/* update output pointer */
pOut += 4U;
/* decrement loop counter */
sample--;
}
/* Loop unrolling: Compute remaining outputs */
sample = blockSize & 0x3U;
#else
/* Initialize blkCnt with number of samples */
sample = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
/* acc = b0 * x[n] */
acc = (q63_t) Xn * b0;
/* acc += b1 * x[n-1] */
acc += (q63_t) Xn1 * b1;
/* acc += b[2] * x[n-2] */
acc += (q63_t) Xn2 * b2;
/* acc += a1 * y[n-1] */
acc += mult32x64(Yn1, a1);
/* acc += a2 * y[n-2] */
acc += mult32x64(Yn2, a2);
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
Xn2 = Xn1;
Xn1 = Xn;
Yn2 = Yn1;
/* The result is converted to 1.63, Yn1 variable is reused */
Yn1 = acc << shift;
/* Calc lower part of acc */
acc_l = acc & 0xffffffff;
/* Calc upper part of acc */
acc_h = (acc >> 32) & 0xffffffff;
/* Apply shift for lower part of acc and upper part of acc */
acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
/* Store the output in the destination buffer in 1.31 format. */
*pOut++ = acc_h;
/* Yn1 = acc << shift; */
/* Store the output in the destination buffer in 1.31 format. */
/* *pOut++ = (q31_t) (acc >> (32 - shift)); */
/* decrement loop counter */
sample--;
}
/* The first stage output is given as input to the second stage. */
pIn = pDst;
/* Reset to destination buffer working pointer */
pOut = pDst;
/* Store the updated state variables back into the pState array */
*pState++ = (q63_t) Xn1;
*pState++ = (q63_t) Xn2;
*pState++ = Yn1;
*pState++ = Yn2;
} while (--stage);
}
/**
@} end of BiquadCascadeDF1_32x64 group
*/

View File

@ -0,0 +1,495 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df1_f32.c
* Description: Processing function for the floating-point Biquad cascade DirectFormI(DF1) filter
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@defgroup BiquadCascadeDF1 Biquad Cascade IIR Filters Using Direct Form I Structure
This set of functions implements arbitrary order recursive (IIR) filters.
The filters are implemented as a cascade of second order Biquad sections.
The functions support Q15, Q31 and floating-point data types.
Fast version of Q15 and Q31 also available.
The functions operate on blocks of input and output data and each call to the function
processes <code>blockSize</code> samples through the filter.
<code>pSrc</code> points to the array of input data and
<code>pDst</code> points to the array of output data.
Both arrays contain <code>blockSize</code> values.
@par Algorithm
Each Biquad stage implements a second order filter using the difference equation:
<pre>
y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
</pre>
A Direct Form I algorithm is used with 5 coefficients and 4 state variables per stage.
\image html Biquad.gif "Single Biquad filter stage"
Coefficients <code>b0, b1 and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
Pay careful attention to the sign of the feedback coefficients.
Some design tools use the difference equation
<pre>
y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a2 * y[n-2]
</pre>
In this case the feedback coefficients <code>a1</code> and <code>a2</code>
must be negated when used with the CMSIS DSP Library.
@par
Higher order filters are realized as a cascade of second order sections.
<code>numStages</code> refers to the number of second order stages used.
For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
\image html BiquadCascade.gif "8th order filter using a cascade of Biquad stages"
A 9th order filter would be realized with <code>numStages=5</code> second order stages with the coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
@par
The <code>pState</code> points to state variables array.
Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
The state variables are arranged in the <code>pState</code> array as:
<pre>
{x[n-1], x[n-2], y[n-1], y[n-2]}
</pre>
@par
The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
The state array has a total length of <code>4*numStages</code> values.
The state variables are updated after each block of data is processed, the coefficients are untouched.
@par Instance Structure
The coefficients and state variables for a filter are stored together in an instance data structure.
A separate instance structure must be defined for each filter.
Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
There are separate instance structure declarations for each of the 3 supported data types.
@par Init Function
There is also an associated initialization function for each data type.
The initialization function performs following operations:
- Sets the values of the internal structure fields.
- Zeros out the values in the state buffer.
To do this manually without calling the init function, assign the follow subfields of the instance structure:
numStages, pCoeffs, pState. Also set all of the values in pState to zero.
@par
Use of the initialization function is optional.
However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
To place an instance structure into a const data section, the instance structure must be manually initialized.
Set the values in the state buffer to zeros before static initialization.
The code below statically initializes each of the 3 different data type filter instance structures
<pre>
arm_biquad_casd_df1_inst_f32 S1 = {numStages, pState, pCoeffs};
arm_biquad_casd_df1_inst_q15 S2 = {numStages, pState, pCoeffs, postShift};
arm_biquad_casd_df1_inst_q31 S3 = {numStages, pState, pCoeffs, postShift};
</pre>
where <code>numStages</code> is the number of Biquad stages in the filter;
<code>pState</code> is the address of the state buffer;
<code>pCoeffs</code> is the address of the coefficient buffer;
<code>postShift</code> shift to be applied.
@par Fixed-Point Behavior
Care must be taken when using the Q15 and Q31 versions of the Biquad Cascade filter functions.
Following issues must be considered:
- Scaling of coefficients
- Filter gain
- Overflow and saturation
@par Scaling of coefficients
Filter coefficients are represented as fractional values and
coefficients are restricted to lie in the range <code>[-1 +1)</code>.
The fixed-point functions have an additional scaling parameter <code>postShift</code>
which allow the filter coefficients to exceed the range <code>[+1 -1)</code>.
At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.
\image html BiquadPostshift.gif "Fixed-point Biquad with shift by postShift bits after accumulator"
This essentially scales the filter coefficients by <code>2^postShift</code>.
For example, to realize the coefficients
<pre>
{1.5, -0.8, 1.2, 1.6, -0.9}
</pre>
set the pCoeffs array to:
<pre>
{0.75, -0.4, 0.6, 0.8, -0.45}
</pre>
and set <code>postShift=1</code>
@par Filter gain
The frequency response of a Biquad filter is a function of its coefficients.
It is possible for the gain through the filter to exceed 1.0 meaning that the filter increases the amplitude of certain frequencies.
This means that an input signal with amplitude < 1.0 may result in an output > 1.0 and these are saturated or overflowed based on the implementation of the filter.
To avoid this behavior the filter needs to be scaled down such that its peak gain < 1.0 or the input signal must be scaled down so that the combination of input and filter are never overflowed.
@par Overflow and saturation
For Q15 and Q31 versions, it is described separately as part of the function specific documentation below.
*/
/**
@addtogroup BiquadCascadeDF1
@{
*/
/**
@brief Processing function for the floating-point Biquad cascade filter.
@param[in] S points to an instance of the floating-point Biquad cascade structure
@param[in] pSrc points to the block of input data
@param[out] pDst points to the block of output data
@param[in] blockSize number of samples to process
@return none
*/
#if defined(ARM_MATH_NEON)
void arm_biquad_cascade_df1_f32(
const arm_biquad_casd_df1_inst_f32 * S,
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
const float32_t *pIn = pSrc; /* source pointer */
float32_t *pOut = pDst; /* destination pointer */
float32_t *pState = S->pState; /* pState pointer */
const float32_t *pCoeffs = S->pCoeffs; /* coefficient pointer */
float32_t acc; /* Simulates the accumulator */
uint32_t sample, stage = S->numStages; /* loop counters */
float32x4_t Xn;
float32x2_t Yn;
float32x2_t a;
float32x4_t b;
float32x4_t x,tmp;
float32x2_t t;
float32x2x2_t y;
float32_t Xns;
while (stage > 0U)
{
/* Reading the coefficients */
Xn = vld1q_f32(pState);
Yn = vld1_f32(pState + 2);
b = vld1q_f32(pCoeffs);
b = vrev64q_f32(b);
b = vcombine_f32(vget_high_f32(b), vget_low_f32(b));
a = vld1_f32(pCoeffs + 3);
a = vrev64_f32(a);
b[0] = 0.0;
pCoeffs += 5;
/* Reading the pState values */
/* Apply loop unrolling and compute 4 output values simultaneously. */
/* The variable acc hold output values that are being computed:
*
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
sample = blockSize >> 2U;
while (sample > 0U)
{
/* Read the first 4 inputs */
x = vld1q_f32(pIn);
pIn += 4;
tmp = vextq_f32(Xn, x, 1);
t = vmul_f32(vget_high_f32(b), vget_high_f32(tmp));
t = vmla_f32(t, vget_low_f32(b), vget_low_f32(tmp));
t = vmla_f32(t, a, Yn);
t = vpadd_f32(t, t);
Yn = vext_f32(Yn, t, 1);
tmp = vextq_f32(Xn, x, 2);
t = vmul_f32(vget_high_f32(b), vget_high_f32(tmp));
t = vmla_f32(t, vget_low_f32(b), vget_low_f32(tmp));
t = vmla_f32(t, a, Yn);
t = vpadd_f32(t, t);
Yn = vext_f32(Yn, t, 1);
y.val[0] = Yn;
tmp = vextq_f32(Xn, x, 3);
t = vmul_f32(vget_high_f32(b), vget_high_f32(tmp));
t = vmla_f32(t, vget_low_f32(b), vget_low_f32(tmp));
t = vmla_f32(t, a, Yn);
t = vpadd_f32(t, t);
Yn = vext_f32(Yn, t, 1);
Xn = x;
t = vmul_f32(vget_high_f32(b), vget_high_f32(Xn));
t = vmla_f32(t, vget_low_f32(b), vget_low_f32(Xn));
t = vmla_f32(t, a, Yn);
t = vpadd_f32(t, t);
Yn = vext_f32(Yn, t, 1);
y.val[1] = Yn;
tmp = vcombine_f32(y.val[0], y.val[1]);
/* Store the 4 outputs and increment the pointer */
vst1q_f32(pOut, tmp);
pOut += 4;
/* Decrement the loop counter */
sample--;
}
/* If the block size is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
sample = blockSize & 0x3U;
while (sample > 0U)
{
/* Read the input */
Xns = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
acc = (b[1] * Xn[2]) + (b[2] * Xn[3]) + (b[3] * Xns) + (a[0] * Yn[0]) + (a[1] * Yn[1]);
/* Store the result in the accumulator in the destination buffer. */
*pOut++ = acc;
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
Xn[2] = Xn[3];
Xn[3] = Xns;
Yn[0] = Yn[1];
Yn[1] = acc;
/* Decrement the loop counter */
sample--;
}
vst1q_f32(pState,vcombine_f32(vrev64_f32(vget_high_f32(Xn)),vrev64_f32(Yn)));
pState += 4;
/* Store the updated state variables back into the pState array */
/* The first stage goes from the input buffer to the output buffer. */
/* Subsequent numStages occur in-place in the output buffer */
pIn = pDst;
/* Reset the output pointer */
pOut = pDst;
/* Decrement the loop counter */
stage--;
}
}
#else
void arm_biquad_cascade_df1_f32(
const arm_biquad_casd_df1_inst_f32 * S,
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
const float32_t *pIn = pSrc; /* Source pointer */
float32_t *pOut = pDst; /* Destination pointer */
float32_t *pState = S->pState; /* pState pointer */
const float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
float32_t acc; /* Accumulator */
float32_t b0, b1, b2, a1, a2; /* Filter coefficients */
float32_t Xn1, Xn2, Yn1, Yn2; /* Filter pState variables */
float32_t Xn; /* Temporary input */
uint32_t sample, stage = S->numStages; /* Loop counters */
do
{
/* Reading the coefficients */
b0 = *pCoeffs++;
b1 = *pCoeffs++;
b2 = *pCoeffs++;
a1 = *pCoeffs++;
a2 = *pCoeffs++;
/* Reading the pState values */
Xn1 = pState[0];
Xn2 = pState[1];
Yn1 = pState[2];
Yn2 = pState[3];
#if defined (ARM_MATH_LOOPUNROLL)
/* Apply loop unrolling and compute 4 output values simultaneously. */
/* Variable acc hold output values that are being computed:
*
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
/* Loop unrolling: Compute 4 outputs at a time */
sample = blockSize >> 2U;
while (sample > 0U)
{
/* Read the first input */
Xn = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
Yn2 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
/* Store output in destination buffer. */
*pOut++ = Yn2;
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
/* Read the second input */
Xn2 = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
Yn1 = (b0 * Xn2) + (b1 * Xn) + (b2 * Xn1) + (a1 * Yn2) + (a2 * Yn1);
/* Store output in destination buffer. */
*pOut++ = Yn1;
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
/* Read the third input */
Xn1 = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
Yn2 = (b0 * Xn1) + (b1 * Xn2) + (b2 * Xn) + (a1 * Yn1) + (a2 * Yn2);
/* Store output in destination buffer. */
*pOut++ = Yn2;
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
/* Read the forth input */
Xn = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
Yn1 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn2) + (a2 * Yn1);
/* Store output in destination buffer. */
*pOut++ = Yn1;
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
Xn2 = Xn1;
Xn1 = Xn;
/* decrement loop counter */
sample--;
}
/* Loop unrolling: Compute remaining outputs */
sample = blockSize & 0x3U;
#else
/* Initialize blkCnt with number of samples */
sample = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
acc = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
/* Store output in destination buffer. */
*pOut++ = acc;
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
Xn2 = Xn1;
Xn1 = Xn;
Yn2 = Yn1;
Yn1 = acc;
/* decrement loop counter */
sample--;
}
/* Store the updated state variables back into the pState array */
*pState++ = Xn1;
*pState++ = Xn2;
*pState++ = Yn1;
*pState++ = Yn2;
/* The first stage goes from the input buffer to the output buffer. */
/* Subsequent numStages occur in-place in the output buffer */
pIn = pDst;
/* Reset output pointer */
pOut = pDst;
/* decrement loop counter */
stage--;
} while (stage > 0U);
}
#endif /* #if defined(ARM_MATH_NEON) */
/**
@} end of BiquadCascadeDF1 group
*/

View File

@ -0,0 +1,250 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df1_fast_q15.c
* Description: Fast processing function for the Q15 Biquad cascade filter
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@addtogroup BiquadCascadeDF1
@{
*/
/**
@brief Processing function for the Q15 Biquad cascade filter (fast variant).
@param[in] S points to an instance of the Q15 Biquad cascade structure
@param[in] pSrc points to the block of input data
@param[out] pDst points to the block of output data
@param[in] blockSize number of samples to process per call
@return none
@par Scaling and Overflow Behavior
This fast version uses a 32-bit accumulator with 2.30 format.
The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.
Thus, if the accumulator result overflows it wraps around and distorts the result.
In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25).
The 2.30 accumulator is then shifted by <code>postShift</code> bits and the result truncated to 1.15 format by discarding the low 16 bits.
@remark
Refer to \ref arm_biquad_cascade_df1_q15() for a slower implementation of this function
which uses 64-bit accumulation to avoid wrap around distortion. Both the slow and the fast versions use the same instance structure.
Use the function \ref arm_biquad_cascade_df1_init_q15() to initialize the filter structure.
*/
void arm_biquad_cascade_df1_fast_q15(
const arm_biquad_casd_df1_inst_q15 * S,
const q15_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
const q15_t *pIn = pSrc; /* Source pointer */
q15_t *pOut = pDst; /* Destination pointer */
q15_t *pState = S->pState; /* State pointer */
const q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
q31_t acc; /* Accumulator */
q31_t in; /* Temporary variable to hold input value */
q31_t out; /* Temporary variable to hold output value */
q31_t b0; /* Temporary variable to hold bo value */
q31_t b1, a1; /* Filter coefficients */
q31_t state_in, state_out; /* Filter state variables */
int32_t shift = (int32_t) (15 - S->postShift); /* Post shift */
uint32_t sample, stage = S->numStages; /* Loop counters */
do
{
/* Read the b0 and 0 coefficients using SIMD */
b0 = read_q15x2_ia ((q15_t **) &pCoeffs);
/* Read the b1 and b2 coefficients using SIMD */
b1 = read_q15x2_ia ((q15_t **) &pCoeffs);
/* Read the a1 and a2 coefficients using SIMD */
a1 = read_q15x2_ia ((q15_t **) &pCoeffs);
/* Read the input state values from the state buffer: x[n-1], x[n-2] */
state_in = read_q15x2_ia (&pState);
/* Read the output state values from the state buffer: y[n-1], y[n-2] */
state_out = read_q15x2_da (&pState);
#if defined (ARM_MATH_LOOPUNROLL)
/* Apply loop unrolling and compute 2 output values simultaneously. */
/* Variable acc hold output values that are being computed:
*
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
/* Loop unrolling: Compute 2 outputs at a time */
sample = blockSize >> 1U;
while (sample > 0U)
{
/* Read the input */
in = read_q15x2_ia ((q15_t **) &pIn);
/* out = b0 * x[n] + 0 * 0 */
out = __SMUAD(b0, in);
/* acc = b1 * x[n-1] + acc += b2 * x[n-2] + out */
acc = __SMLAD(b1, state_in, out);
/* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
acc = __SMLAD(a1, state_out, acc);
/* The result is converted from 3.29 to 1.31 and then saturation is applied */
out = __SSAT((acc >> shift), 16);
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
#ifndef ARM_MATH_BIG_ENDIAN
state_in = __PKHBT(in, state_in, 16);
state_out = __PKHBT(out, state_out, 16);
#else
state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
state_out = __PKHBT(state_out >> 16, (out), 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* out = b0 * x[n] + 0 * 0 */
out = __SMUADX(b0, in);
/* acc0 = b1 * x[n-1] , acc0 += b2 * x[n-2] + out */
acc = __SMLAD(b1, state_in, out);
/* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
acc = __SMLAD(a1, state_out, acc);
/* The result is converted from 3.29 to 1.31 and then saturation is applied */
out = __SSAT((acc >> shift), 16);
/* Store the output in the destination buffer. */
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pOut, __PKHBT(state_out, out, 16));
#else
write_q15x2_ia (&pOut, __PKHBT(out, state_out >> 16, 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
#ifndef ARM_MATH_BIG_ENDIAN
state_in = __PKHBT(in >> 16, state_in, 16);
state_out = __PKHBT(out, state_out, 16);
#else
state_in = __PKHBT(state_in >> 16, in, 16);
state_out = __PKHBT(state_out >> 16, out, 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Decrement loop counter */
sample--;
}
/* Loop unrolling: Compute remaining outputs */
sample = (blockSize & 0x1U);
#else
/* Initialize blkCnt with number of samples */
sample = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (sample > 0U)
{
/* Read the input */
in = *pIn++;
/* out = b0 * x[n] + 0 * 0 */
#ifndef ARM_MATH_BIG_ENDIAN
out = __SMUAD(b0, in);
#else
out = __SMUADX(b0, in);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* acc = b1 * x[n-1], acc += b2 * x[n-2] + out */
acc = __SMLAD(b1, state_in, out);
/* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
acc = __SMLAD(a1, state_out, acc);
/* The result is converted from 3.29 to 1.31 and then saturation is applied */
out = __SSAT((acc >> shift), 16);
/* Store the output in the destination buffer. */
*pOut++ = (q15_t) out;
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
#ifndef ARM_MATH_BIG_ENDIAN
state_in = __PKHBT(in, state_in, 16);
state_out = __PKHBT(out, state_out, 16);
#else
state_in = __PKHBT(state_in >> 16, in, 16);
state_out = __PKHBT(state_out >> 16, out, 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* decrement loop counter */
sample--;
}
/* The first stage goes from the input buffer to the output buffer. */
/* Subsequent (numStages - 1) occur in-place in the output buffer */
pIn = pDst;
/* Reset the output pointer */
pOut = pDst;
/* Store the updated state variables back into the state array */
write_q15x2_ia(&pState, state_in);
write_q15x2_ia(&pState, state_out);
/* Decrement loop counter */
stage--;
} while (stage > 0U);
}
/**
@} end of BiquadCascadeDF1 group
*/

View File

@ -0,0 +1,296 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df1_fast_q31.c
* Description: Processing function for the Q31 Fast Biquad cascade DirectFormI(DF1) filter
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@addtogroup BiquadCascadeDF1
@{
*/
/**
@brief Processing function for the Q31 Biquad cascade filter (fast variant).
@param[in] S points to an instance of the Q31 Biquad cascade structure
@param[in] pSrc points to the block of input data
@param[out] pDst points to the block of output data
@param[in] blockSize number of samples to process per call
@return none
@par Scaling and Overflow Behavior
This function is optimized for speed at the expense of fixed-point precision and overflow protection.
The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.
These intermediate results are added to a 2.30 accumulator.
Finally, the accumulator is saturated and converted to a 1.31 result.
The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result.
In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25). Use the intialization function
arm_biquad_cascade_df1_init_q31() to initialize filter structure.
@remark
Refer to \ref arm_biquad_cascade_df1_q31() for a slower implementation of this function
which uses 64-bit accumulation to provide higher precision. Both the slow and the fast versions use the same instance structure.
Use the function \ref arm_biquad_cascade_df1_init_q31() to initialize the filter structure.
*/
void arm_biquad_cascade_df1_fast_q31(
const arm_biquad_casd_df1_inst_q31 * S,
const q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
const q31_t *pIn = pSrc; /* Source pointer */
q31_t *pOut = pDst; /* Destination pointer */
q31_t *pState = S->pState; /* pState pointer */
const q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
q31_t acc = 0; /* Accumulator */
q31_t b0, b1, b2, a1, a2; /* Filter coefficients */
q31_t Xn1, Xn2, Yn1, Yn2; /* Filter pState variables */
q31_t Xn; /* Temporary input */
int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */
uint32_t sample, stage = S->numStages; /* Loop counters */
do
{
/* Reading the coefficients */
b0 = *pCoeffs++;
b1 = *pCoeffs++;
b2 = *pCoeffs++;
a1 = *pCoeffs++;
a2 = *pCoeffs++;
/* Reading the pState values */
Xn1 = pState[0];
Xn2 = pState[1];
Yn1 = pState[2];
Yn2 = pState[3];
#if defined (ARM_MATH_LOOPUNROLL)
/* Apply loop unrolling and compute 4 output values simultaneously. */
/* Variables acc ... acc3 hold output values that are being computed:
*
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
/* Loop unrolling: Compute 4 outputs at a time */
sample = blockSize >> 2U;
while (sample > 0U)
{
/* Read the input */
Xn = *pIn;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
/* acc = b0 * x[n] */
/* acc = (q31_t) (((q63_t) b1 * Xn1) >> 32);*/
mult_32x32_keep32_R(acc, b1, Xn1);
/* acc += b1 * x[n-1] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b0 * (Xn))) >> 32);*/
multAcc_32x32_keep32_R(acc, b0, Xn);
/* acc += b[2] * x[n-2] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);*/
multAcc_32x32_keep32_R(acc, b2, Xn2);
/* acc += a1 * y[n-1] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);*/
multAcc_32x32_keep32_R(acc, a1, Yn1);
/* acc += a2 * y[n-2] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);*/
multAcc_32x32_keep32_R(acc, a2, Yn2);
/* The result is converted to 1.31 , Yn2 variable is reused */
Yn2 = acc << shift;
/* Read the second input */
Xn2 = *(pIn + 1U);
/* Store the output in the destination buffer. */
*pOut = Yn2;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
/* acc = b0 * x[n] */
/* acc = (q31_t) (((q63_t) b0 * (Xn2)) >> 32);*/
mult_32x32_keep32_R(acc, b0, Xn2);
/* acc += b1 * x[n-1] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn))) >> 32);*/
multAcc_32x32_keep32_R(acc, b1, Xn);
/* acc += b[2] * x[n-2] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn1))) >> 32);*/
multAcc_32x32_keep32_R(acc, b2, Xn1);
/* acc += a1 * y[n-1] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);*/
multAcc_32x32_keep32_R(acc, a1, Yn2);
/* acc += a2 * y[n-2] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);*/
multAcc_32x32_keep32_R(acc, a2, Yn1);
/* The result is converted to 1.31, Yn1 variable is reused */
Yn1 = acc << shift;
/* Read the third input */
Xn1 = *(pIn + 2U);
/* Store the output in the destination buffer. */
*(pOut + 1U) = Yn1;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
/* acc = b0 * x[n] */
/* acc = (q31_t) (((q63_t) b0 * (Xn1)) >> 32);*/
mult_32x32_keep32_R(acc, b0, Xn1);
/* acc += b1 * x[n-1] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn2))) >> 32);*/
multAcc_32x32_keep32_R(acc, b1, Xn2);
/* acc += b[2] * x[n-2] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn))) >> 32);*/
multAcc_32x32_keep32_R(acc, b2, Xn);
/* acc += a1 * y[n-1] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);*/
multAcc_32x32_keep32_R(acc, a1, Yn1);
/* acc += a2 * y[n-2] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);*/
multAcc_32x32_keep32_R(acc, a2, Yn2);
/* The result is converted to 1.31, Yn2 variable is reused */
Yn2 = acc << shift;
/* Read the forth input */
Xn = *(pIn + 3U);
/* Store the output in the destination buffer. */
*(pOut + 2U) = Yn2;
pIn += 4U;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
/* acc = b0 * x[n] */
/* acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);*/
mult_32x32_keep32_R(acc, b0, Xn);
/* acc += b1 * x[n-1] */
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);*/
multAcc_32x32_keep32_R(acc, b1, Xn1);
/* acc += b[2] * x[n-2] */
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);*/
multAcc_32x32_keep32_R(acc, b2, Xn2);
/* acc += a1 * y[n-1] */
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);*/
multAcc_32x32_keep32_R(acc, a1, Yn2);
/* acc += a2 * y[n-2] */
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);*/
multAcc_32x32_keep32_R(acc, a2, Yn1);
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
Xn2 = Xn1;
/* The result is converted to 1.31, Yn1 variable is reused */
Yn1 = acc << shift;
/* Xn1 = Xn */
Xn1 = Xn;
/* Store the output in the destination buffer. */
*(pOut + 3U) = Yn1;
pOut += 4U;
/* decrement loop counter */
sample--;
}
/* Loop unrolling: Compute remaining outputs */
sample = (blockSize & 0x3U);
#else
/* Initialize blkCnt with number of samples */
sample = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
/* acc = b0 * x[n] */
/* acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);*/
mult_32x32_keep32_R(acc, b0, Xn);
/* acc += b1 * x[n-1] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);*/
multAcc_32x32_keep32_R(acc, b1, Xn1);
/* acc += b[2] * x[n-2] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);*/
multAcc_32x32_keep32_R(acc, b2, Xn2);
/* acc += a1 * y[n-1] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);*/
multAcc_32x32_keep32_R(acc, a1, Yn1);
/* acc += a2 * y[n-2] */
/* acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);*/
multAcc_32x32_keep32_R(acc, a2, Yn2);
/* The result is converted to 1.31 */
acc = acc << shift;
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
Xn2 = Xn1;
Xn1 = Xn;
Yn2 = Yn1;
Yn1 = acc;
/* Store the output in the destination buffer. */
*pOut++ = acc;
/* decrement loop counter */
sample--;
}
/* The first stage goes from the input buffer to the output buffer. */
/* Subsequent stages occur in-place in the output buffer */
pIn = pDst;
/* Reset to destination pointer */
pOut = pDst;
/* Store the updated state variables back into the pState array */
*pState++ = Xn1;
*pState++ = Xn2;
*pState++ = Yn1;
*pState++ = Yn2;
} while (--stage);
}
/**
@} end of BiquadCascadeDF1 group
*/

View File

@ -0,0 +1,91 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df1_init_f32.c
* Description: Floating-point Biquad cascade DirectFormI(DF1) filter initialization function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@addtogroup BiquadCascadeDF1
@{
*/
/**
@brief Initialization function for the floating-point Biquad cascade filter.
@param[in,out] S points to an instance of the floating-point Biquad cascade structure.
@param[in] numStages number of 2nd order stages in the filter.
@param[in] pCoeffs points to the filter coefficients.
@param[in] pState points to the state buffer.
@return none
@par Coefficient and State Ordering
The coefficients are stored in the array <code>pCoeffs</code> in the following order:
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par
where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
<code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
@par
The <code>pState</code> is a pointer to state array.
Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
The state variables are arranged in the <code>pState</code> array as:
<pre>
{x[n-1], x[n-2], y[n-1], y[n-2]}
</pre>
The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
The state array has a total length of <code>4*numStages</code> values.
The state variables are updated after each block of data is processed; the coefficients are untouched.
*/
void arm_biquad_cascade_df1_init_f32(
arm_biquad_casd_df1_inst_f32 * S,
uint8_t numStages,
const float32_t * pCoeffs,
float32_t * pState)
{
/* Assign filter stages */
S->numStages = numStages;
/* Assign coefficient pointer */
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 4 * numStages */
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(float32_t));
/* Assign state pointer */
S->pState = pState;
}
/**
@} end of BiquadCascadeDF1 group
*/

View File

@ -0,0 +1,96 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df1_init_q15.c
* Description: Q15 Biquad cascade DirectFormI(DF1) filter initialization function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@addtogroup BiquadCascadeDF1
@{
*/
/**
@brief Initialization function for the Q15 Biquad cascade filter.
@param[in,out] S points to an instance of the Q15 Biquad cascade structure.
@param[in] numStages number of 2nd order stages in the filter.
@param[in] pCoeffs points to the filter coefficients.
@param[in] pState points to the state buffer.
@param[in] postShift Shift to be applied to the accumulator result. Varies according to the coefficients format
@return none
@par Coefficient and State Ordering
The coefficients are stored in the array <code>pCoeffs</code> in the following order:
<pre>
{b10, 0, b11, b12, a11, a12, b20, 0, b21, b22, a21, a22, ...}
</pre>
@par
where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
<code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
and so on. The <code>pCoeffs</code> array contains a total of <code>6*numStages</code> values.
The zero coefficient between <code>b1</code> and <code>b2</code> facilities use of 16-bit SIMD instructions on the Cortex-M4.
@par
The state variables are stored in the array <code>pState</code>.
Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
The state variables are arranged in the <code>pState</code> array as:
<pre>
{x[n-1], x[n-2], y[n-1], y[n-2]}
</pre>
The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
The state array has a total length of <code>4*numStages</code> values.
The state variables are updated after each block of data is processed; the coefficients are untouched.
*/
void arm_biquad_cascade_df1_init_q15(
arm_biquad_casd_df1_inst_q15 * S,
uint8_t numStages,
const q15_t * pCoeffs,
q15_t * pState,
int8_t postShift)
{
/* Assign filter stages */
S->numStages = numStages;
/* Assign postShift to be applied to the output */
S->postShift = postShift;
/* Assign coefficient pointer */
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 4 * numStages */
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(q15_t));
/* Assign state pointer */
S->pState = pState;
}
/**
@} end of BiquadCascadeDF1 group
*/

View File

@ -0,0 +1,95 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df1_init_q31.c
* Description: Q31 Biquad cascade DirectFormI(DF1) filter initialization function
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@addtogroup BiquadCascadeDF1
@{
*/
/**
@brief Initialization function for the Q31 Biquad cascade filter.
@param[in,out] S points to an instance of the Q31 Biquad cascade structure.
@param[in] numStages number of 2nd order stages in the filter.
@param[in] pCoeffs points to the filter coefficients.
@param[in] pState points to the state buffer.
@param[in] postShift Shift to be applied after the accumulator. Varies according to the coefficients format
@return none
@par Coefficient and State Ordering
The coefficients are stored in the array <code>pCoeffs</code> in the following order:
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par
where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
<code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
@par
The <code>pState</code> points to state variables array.
Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
The state variables are arranged in the <code>pState</code> array as:
<pre>
{x[n-1], x[n-2], y[n-1], y[n-2]}
</pre>
The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
The state array has a total length of <code>4*numStages</code> values.
The state variables are updated after each block of data is processed; the coefficients are untouched.
*/
void arm_biquad_cascade_df1_init_q31(
arm_biquad_casd_df1_inst_q31 * S,
uint8_t numStages,
const q31_t * pCoeffs,
q31_t * pState,
int8_t postShift)
{
/* Assign filter stages */
S->numStages = numStages;
/* Assign postShift to be applied to the output */
S->postShift = postShift;
/* Assign coefficient pointer */
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 4 * numStages */
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(q31_t));
/* Assign state pointer */
S->pState = pState;
}
/**
@} end of BiquadCascadeDF1 group
*/

View File

@ -0,0 +1,363 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df1_q15.c
* Description: Processing function for the Q15 Biquad cascade DirectFormI(DF1) filter
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@addtogroup BiquadCascadeDF1
@{
*/
/**
@brief Processing function for the Q15 Biquad cascade filter.
@param[in] S points to an instance of the Q15 Biquad cascade structure
@param[in] pSrc points to the block of input data
@param[out] pDst points to the location where the output result is written
@param[in] blockSize number of samples to process
@return none
@par Scaling and Overflow Behavior
The function is implemented using a 64-bit internal accumulator.
Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
The accumulator is then shifted by <code>postShift</code> bits to truncate the result to 1.15 format by discarding the low 16 bits.
Finally, the result is saturated to 1.15 format.
@remark
Refer to \ref arm_biquad_cascade_df1_fast_q15() for a faster but less precise implementation of this filter.
*/
void arm_biquad_cascade_df1_q15(
const arm_biquad_casd_df1_inst_q15 * S,
const q15_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
#if defined (ARM_MATH_DSP)
const q15_t *pIn = pSrc; /* Source pointer */
q15_t *pOut = pDst; /* Destination pointer */
q31_t in; /* Temporary variable to hold input value */
q31_t out; /* Temporary variable to hold output value */
q31_t b0; /* Temporary variable to hold bo value */
q31_t b1, a1; /* Filter coefficients */
q31_t state_in, state_out; /* Filter state variables */
q31_t acc_l, acc_h;
q63_t acc; /* Accumulator */
q15_t *pState = S->pState; /* State pointer */
const q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */
uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */
int32_t uShift = (32 - lShift);
do
{
/* Read the b0 and 0 coefficients using SIMD */
b0 = read_q15x2_ia ((q15_t **) &pCoeffs);
/* Read the b1 and b2 coefficients using SIMD */
b1 = read_q15x2_ia ((q15_t **) &pCoeffs);
/* Read the a1 and a2 coefficients using SIMD */
a1 = read_q15x2_ia ((q15_t **) &pCoeffs);
/* Read the input state values from the state buffer: x[n-1], x[n-2] */
state_in = read_q15x2_ia (&pState);
/* Read the output state values from the state buffer: y[n-1], y[n-2] */
state_out = read_q15x2_da (&pState);
/* Apply loop unrolling and compute 2 output values simultaneously. */
/* The variable acc hold output values that are being computed:
*
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
sample = blockSize >> 1U;
/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
** a second loop below computes the remaining 1 sample. */
while (sample > 0U)
{
/* Read the input */
in = read_q15x2_ia ((q15_t **) &pIn);
/* out = b0 * x[n] + 0 * 0 */
out = __SMUAD(b0, in);
/* acc += b1 * x[n-1] + b2 * x[n-2] + out */
acc = __SMLALD(b1, state_in, out);
/* acc += a1 * y[n-1] + a2 * y[n-2] */
acc = __SMLALD(a1, state_out, acc);
/* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
/* Calc lower part of acc */
acc_l = acc & 0xffffffff;
/* Calc upper part of acc */
acc_h = (acc >> 32) & 0xffffffff;
/* Apply shift for lower part of acc and upper part of acc */
out = (uint32_t) acc_l >> lShift | acc_h << uShift;
out = __SSAT(out, 16);
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
#ifndef ARM_MATH_BIG_ENDIAN
state_in = __PKHBT(in, state_in, 16);
state_out = __PKHBT(out, state_out, 16);
#else
state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
state_out = __PKHBT(state_out >> 16, (out), 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* out = b0 * x[n] + 0 * 0 */
out = __SMUADX(b0, in);
/* acc += b1 * x[n-1] + b2 * x[n-2] + out */
acc = __SMLALD(b1, state_in, out);
/* acc += a1 * y[n-1] + a2 * y[n-2] */
acc = __SMLALD(a1, state_out, acc);
/* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
/* Calc lower part of acc */
acc_l = acc & 0xffffffff;
/* Calc upper part of acc */
acc_h = (acc >> 32) & 0xffffffff;
/* Apply shift for lower part of acc and upper part of acc */
out = (uint32_t) acc_l >> lShift | acc_h << uShift;
out = __SSAT(out, 16);
/* Store the output in the destination buffer. */
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pOut, __PKHBT(state_out, out, 16));
#else
write_q15x2_ia (&pOut, __PKHBT(out, state_out >> 16, 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
#ifndef ARM_MATH_BIG_ENDIAN
state_in = __PKHBT(in >> 16, state_in, 16);
state_out = __PKHBT(out, state_out, 16);
#else
state_in = __PKHBT(state_in >> 16, in, 16);
state_out = __PKHBT(state_out >> 16, out, 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Decrement loop counter */
sample--;
}
/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
** No loop unrolling is used. */
if ((blockSize & 0x1U) != 0U)
{
/* Read the input */
in = *pIn++;
/* out = b0 * x[n] + 0 * 0 */
#ifndef ARM_MATH_BIG_ENDIAN
out = __SMUAD(b0, in);
#else
out = __SMUADX(b0, in);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* acc = b1 * x[n-1] + b2 * x[n-2] + out */
acc = __SMLALD(b1, state_in, out);
/* acc += a1 * y[n-1] + a2 * y[n-2] */
acc = __SMLALD(a1, state_out, acc);
/* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
/* Calc lower part of acc */
acc_l = acc & 0xffffffff;
/* Calc upper part of acc */
acc_h = (acc >> 32) & 0xffffffff;
/* Apply shift for lower part of acc and upper part of acc */
out = (uint32_t) acc_l >> lShift | acc_h << uShift;
out = __SSAT(out, 16);
/* Store the output in the destination buffer. */
*pOut++ = (q15_t) out;
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
#ifndef ARM_MATH_BIG_ENDIAN
state_in = __PKHBT(in, state_in, 16);
state_out = __PKHBT(out, state_out, 16);
#else
state_in = __PKHBT(state_in >> 16, in, 16);
state_out = __PKHBT(state_out >> 16, out, 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
}
/* The first stage goes from the input wire to the output wire. */
/* Subsequent numStages occur in-place in the output wire */
pIn = pDst;
/* Reset the output pointer */
pOut = pDst;
/* Store the updated state variables back into the state array */
write_q15x2_ia (&pState, state_in);
write_q15x2_ia (&pState, state_out);
/* Decrement loop counter */
stage--;
} while (stage > 0U);
#else
const q15_t *pIn = pSrc; /* Source pointer */
q15_t *pOut = pDst; /* Destination pointer */
q15_t b0, b1, b2, a1, a2; /* Filter coefficients */
q15_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */
q15_t Xn; /* temporary input */
q63_t acc; /* Accumulator */
int32_t shift = (15 - (int32_t) S->postShift); /* Post shift */
q15_t *pState = S->pState; /* State pointer */
const q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */
do
{
/* Reading the coefficients */
b0 = *pCoeffs++;
pCoeffs++; // skip the 0 coefficient
b1 = *pCoeffs++;
b2 = *pCoeffs++;
a1 = *pCoeffs++;
a2 = *pCoeffs++;
/* Reading the state values */
Xn1 = pState[0];
Xn2 = pState[1];
Yn1 = pState[2];
Yn2 = pState[3];
/* The variables acc holds the output value that is computed:
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
sample = blockSize;
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
/* acc = b0 * x[n] */
acc = (q31_t) b0 *Xn;
/* acc += b1 * x[n-1] */
acc += (q31_t) b1 *Xn1;
/* acc += b[2] * x[n-2] */
acc += (q31_t) b2 *Xn2;
/* acc += a1 * y[n-1] */
acc += (q31_t) a1 *Yn1;
/* acc += a2 * y[n-2] */
acc += (q31_t) a2 *Yn2;
/* The result is converted to 1.31 */
acc = __SSAT((acc >> shift), 16);
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
Xn2 = Xn1;
Xn1 = Xn;
Yn2 = Yn1;
Yn1 = (q15_t) acc;
/* Store the output in the destination buffer. */
*pOut++ = (q15_t) acc;
/* decrement the loop counter */
sample--;
}
/* The first stage goes from the input buffer to the output buffer. */
/* Subsequent stages occur in-place in the output buffer */
pIn = pDst;
/* Reset to destination pointer */
pOut = pDst;
/* Store the updated state variables back into the pState array */
*pState++ = Xn1;
*pState++ = Xn2;
*pState++ = Yn1;
*pState++ = Yn2;
} while (--stage);
#endif /* #if defined (ARM_MATH_DSP) */
}
/**
@} end of BiquadCascadeDF1 group
*/

View File

@ -0,0 +1,247 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df1_q31.c
* Description: Processing function for the Q31 Biquad cascade filter
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@addtogroup BiquadCascadeDF1
@{
*/
/**
@brief Processing function for the Q31 Biquad cascade filter.
@param[in] S points to an instance of the Q31 Biquad cascade structure
@param[in] pSrc points to the block of input data
@param[out] pDst points to the block of output data
@param[in] blockSize number of samples to process
@return none
@par Scaling and Overflow Behavior
The function is implemented using an internal 64-bit accumulator.
The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
Thus, if the accumulator result overflows it wraps around rather than clip.
In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).
After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to
1.31 format by discarding the low 32 bits.
@remark
Refer to \ref arm_biquad_cascade_df1_fast_q31() for a faster but less precise implementation of this filter.
*/
void arm_biquad_cascade_df1_q31(
const arm_biquad_casd_df1_inst_q31 * S,
const q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
const q31_t *pIn = pSrc; /* Source pointer */
q31_t *pOut = pDst; /* Destination pointer */
q31_t *pState = S->pState; /* pState pointer */
const q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
q63_t acc; /* Accumulator */
q31_t b0, b1, b2, a1, a2; /* Filter coefficients */
q31_t Xn1, Xn2, Yn1, Yn2; /* Filter pState variables */
q31_t Xn; /* Temporary input */
uint32_t uShift = ((uint32_t) S->postShift + 1U);
uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */
uint32_t sample, stage = S->numStages; /* Loop counters */
#if defined (ARM_MATH_LOOPUNROLL)
q31_t acc_l, acc_h; /* temporary output variables */
#endif
do
{
/* Reading the coefficients */
b0 = *pCoeffs++;
b1 = *pCoeffs++;
b2 = *pCoeffs++;
a1 = *pCoeffs++;
a2 = *pCoeffs++;
/* Reading the pState values */
Xn1 = pState[0];
Xn2 = pState[1];
Yn1 = pState[2];
Yn2 = pState[3];
#if defined (ARM_MATH_LOOPUNROLL)
/* Apply loop unrolling and compute 4 output values simultaneously. */
/* Variable acc hold output values that are being computed:
*
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
/* Loop unrolling: Compute 4 outputs at a time */
sample = blockSize >> 2U;
while (sample > 0U)
{
/* Read the first input */
Xn = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
acc = ((q63_t) b0 * Xn) + ((q63_t) b1 * Xn1) + ((q63_t) b2 * Xn2) + ((q63_t) a1 * Yn1) + ((q63_t) a2 * Yn2);
/* The result is converted to 1.31 , Yn2 variable is reused */
acc_l = (acc ) & 0xffffffff; /* Calc lower part of acc */
acc_h = (acc >> 32) & 0xffffffff; /* Calc upper part of acc */
/* Apply shift for lower part of acc and upper part of acc */
Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
/* Store output in destination buffer. */
*pOut++ = Yn2;
/* Read the second input */
Xn2 = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
acc = ((q63_t) b0 * Xn2) + ((q63_t) b1 * Xn) + ((q63_t) b2 * Xn1) + ((q63_t) a1 * Yn2) + ((q63_t) a2 * Yn1);
/* The result is converted to 1.31, Yn1 variable is reused */
acc_l = (acc ) & 0xffffffff; /* Calc lower part of acc */
acc_h = (acc >> 32) & 0xffffffff; /* Calc upper part of acc */
/* Apply shift for lower part of acc and upper part of acc */
Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
/* Store output in destination buffer. */
*pOut++ = Yn1;
/* Read the third input */
Xn1 = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
acc = ((q63_t) b0 * Xn1) + ((q63_t) b1 * Xn2) + ((q63_t) b2 * Xn) + ((q63_t) a1 * Yn1) + ((q63_t) a2 * Yn2);
/* The result is converted to 1.31, Yn2 variable is reused */
acc_l = (acc ) & 0xffffffff; /* Calc lower part of acc */
acc_h = (acc >> 32) & 0xffffffff; /* Calc upper part of acc */
/* Apply shift for lower part of acc and upper part of acc */
Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
/* Store output in destination buffer. */
*pOut++ = Yn2;
/* Read the forth input */
Xn = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
acc = ((q63_t) b0 * Xn) + ((q63_t) b1 * Xn1) + ((q63_t) b2 * Xn2) + ((q63_t) a1 * Yn2) + ((q63_t) a2 * Yn1);
/* The result is converted to 1.31, Yn1 variable is reused */
acc_l = (acc ) & 0xffffffff; /* Calc lower part of acc */
acc_h = (acc >> 32) & 0xffffffff; /* Calc upper part of acc */
/* Apply shift for lower part of acc and upper part of acc */
Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
/* Store output in destination buffer. */
*pOut++ = Yn1;
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
Xn2 = Xn1;
Xn1 = Xn;
/* decrement loop counter */
sample--;
}
/* Loop unrolling: Compute remaining outputs */
sample = blockSize & 0x3U;
#else
/* Initialize blkCnt with number of samples */
sample = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
acc = ((q63_t) b0 * Xn) + ((q63_t) b1 * Xn1) + ((q63_t) b2 * Xn2) + ((q63_t) a1 * Yn1) + ((q63_t) a2 * Yn2);
/* The result is converted to 1.31 */
acc = acc >> lShift;
/* Store output in destination buffer. */
*pOut++ = (q31_t) acc;
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
/* Xn2 = Xn1 */
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
Xn2 = Xn1;
Xn1 = Xn;
Yn2 = Yn1;
Yn1 = (q31_t) acc;
/* decrement loop counter */
sample--;
}
/* Store the updated state variables back into the pState array */
*pState++ = Xn1;
*pState++ = Xn2;
*pState++ = Yn1;
*pState++ = Yn2;
/* The first stage goes from the input buffer to the output buffer. */
/* Subsequent numStages occur in-place in the output buffer */
pIn = pDst;
/* Reset output pointer */
pOut = pDst;
/* decrement loop counter */
stage--;
} while (stage > 0U);
}
/**
@} end of BiquadCascadeDF1 group
*/

View File

@ -0,0 +1,523 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df2T_f32.c
* Description: Processing function for floating-point transposed direct form II Biquad cascade filter
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@addtogroup BiquadCascadeDF2T
@{
*/
/**
@brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
@param[in] S points to an instance of the filter data structure
@param[in] pSrc points to the block of input data
@param[out] pDst points to the block of output data
@param[in] blockSize number of samples to process
@return none
*/
#if defined(ARM_MATH_NEON)
void arm_biquad_cascade_df2T_f32(
const arm_biquad_cascade_df2T_instance_f32 * S,
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
const float32_t *pIn = pSrc; /* source pointer */
float32_t *pOut = pDst; /* destination pointer */
float32_t *pState = S->pState; /* State pointer */
const float32_t *pCoeffs = S->pCoeffs; /* coefficient pointer */
float32_t acc1; /* accumulator */
float32_t b0, b1, b2, a1, a2; /* Filter coefficients */
float32_t Xn1; /* temporary input */
float32_t d1, d2; /* state variables */
uint32_t sample, stageCnt,stage = S->numStages; /* loop counters */
float32_t Xn2, Xn3, Xn4; /* Input State variables */
float32_t acc2, acc3, acc4; /* accumulator */
float32_t p0, p1, p2, p3, p4, A1;
float32x4_t XnV, YnV;
float32x4x2_t dV;
float32x4_t zeroV = vdupq_n_f32(0.0);
float32x4_t t1,t2,t3,t4,b1V,b2V,a1V,a2V,s;
/* Loop unrolling. Compute 4 outputs at a time */
stageCnt = stage >> 2;
while (stageCnt > 0U)
{
/* Reading the coefficients */
t1 = vld1q_f32(pCoeffs);
pCoeffs += 4;
t2 = vld1q_f32(pCoeffs);
pCoeffs += 4;
t3 = vld1q_f32(pCoeffs);
pCoeffs += 4;
t4 = vld1q_f32(pCoeffs);
pCoeffs += 4;
b1V = vld1q_f32(pCoeffs);
pCoeffs += 4;
b2V = vld1q_f32(pCoeffs);
pCoeffs += 4;
a1V = vld1q_f32(pCoeffs);
pCoeffs += 4;
a2V = vld1q_f32(pCoeffs);
pCoeffs += 4;
/* Reading the state values */
dV = vld2q_f32(pState);
sample = blockSize;
while (sample > 0U) {
/* y[n] = b0 * x[n] + d1 */
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
/* d2 = b2 * x[n] + a2 * y[n] */
XnV = vdupq_n_f32(*pIn++);
s = dV.val[0];
YnV = s;
s = vextq_f32(zeroV,dV.val[0],3);
YnV = vmlaq_f32(YnV, t1, s);
s = vextq_f32(zeroV,dV.val[0],2);
YnV = vmlaq_f32(YnV, t2, s);
s = vextq_f32(zeroV,dV.val[0],1);
YnV = vmlaq_f32(YnV, t3, s);
YnV = vmlaq_f32(YnV, t4, XnV);
s = vextq_f32(XnV,YnV,3);
dV.val[0] = vmlaq_f32(dV.val[1], s, b1V);
dV.val[0] = vmlaq_f32(dV.val[0], YnV, a1V);
dV.val[1] = vmulq_f32(s, b2V);
dV.val[1] = vmlaq_f32(dV.val[1], YnV, a2V);
*pOut++ = YnV[3];
sample--;
}
/* Store the updated state variables back into the state array */
vst2q_f32(pState,dV);
pState += 8;
/* The current stage input is given as the output to the next stage */
pIn = pDst;
/*Reset the output working pointer */
pOut = pDst;
/* decrement the loop counter */
stageCnt--;
}
/* Tail */
stageCnt = stage & 3;
while (stageCnt > 0U)
{
/* Reading the coefficients */
b0 = *pCoeffs++;
b1 = *pCoeffs++;
b2 = *pCoeffs++;
a1 = *pCoeffs++;
a2 = *pCoeffs++;
/*Reading the state values */
d1 = pState[0];
d2 = pState[1];
sample = blockSize;
while (sample > 0U)
{
/* Read the input */
Xn1 = *pIn++;
/* y[n] = b0 * x[n] + d1 */
acc1 = (b0 * Xn1) + d1;
/* Store the result in the accumulator in the destination buffer. */
*pOut++ = acc1;
/* Every time after the output is computed state should be updated. */
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
d1 = ((b1 * Xn1) + (a1 * acc1)) + d2;
/* d2 = b2 * x[n] + a2 * y[n] */
d2 = (b2 * Xn1) + (a2 * acc1);
/* decrement the loop counter */
sample--;
}
/* Store the updated state variables back into the state array */
*pState++ = d1;
*pState++ = d2;
/* The current stage input is given as the output to the next stage */
pIn = pDst;
/*Reset the output working pointer */
pOut = pDst;
/* decrement the loop counter */
stageCnt--;
}
}
#else
LOW_OPTIMIZATION_ENTER
void arm_biquad_cascade_df2T_f32(
const arm_biquad_cascade_df2T_instance_f32 * S,
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
const float32_t *pIn = pSrc; /* Source pointer */
float32_t *pOut = pDst; /* Destination pointer */
float32_t *pState = S->pState; /* State pointer */
const float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
float32_t acc1; /* Accumulator */
float32_t b0, b1, b2, a1, a2; /* Filter coefficients */
float32_t Xn1; /* Temporary input */
float32_t d1, d2; /* State variables */
uint32_t sample, stage = S->numStages; /* Loop counters */
do
{
/* Reading the coefficients */
b0 = pCoeffs[0];
b1 = pCoeffs[1];
b2 = pCoeffs[2];
a1 = pCoeffs[3];
a2 = pCoeffs[4];
/* Reading the state values */
d1 = pState[0];
d2 = pState[1];
pCoeffs += 5U;
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 16 outputs at a time */
sample = blockSize >> 4U;
while (sample > 0U) {
/* y[n] = b0 * x[n] + d1 */
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
/* d2 = b2 * x[n] + a2 * y[n] */
/* 1 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 2 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 3 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 4 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 5 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 6 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 7 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 8 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 9 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 10 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 11 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 12 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 13 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 14 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 15 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 16 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* decrement loop counter */
sample--;
}
/* Loop unrolling: Compute remaining outputs */
sample = blockSize & 0xFU;
#else
/* Initialize blkCnt with number of samples */
sample = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (sample > 0U) {
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* decrement loop counter */
sample--;
}
/* Store the updated state variables back into the state array */
pState[0] = d1;
pState[1] = d2;
pState += 2U;
/* The current stage input is given as the output to the next stage */
pIn = pDst;
/* Reset the output working pointer */
pOut = pDst;
/* decrement loop counter */
stage--;
} while (stage > 0U);
}
LOW_OPTIMIZATION_EXIT
#endif /* #if defined(ARM_MATH_NEON) */
/**
@} end of BiquadCascadeDF2T group
*/

View File

@ -0,0 +1,443 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df2T_f64.c
* Description: Processing function for floating-point transposed direct form II Biquad cascade filter
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@defgroup BiquadCascadeDF2T Biquad Cascade IIR Filters Using a Direct Form II Transposed Structure
This set of functions implements arbitrary order recursive (IIR) filters using a transposed direct form II structure.
The filters are implemented as a cascade of second order Biquad sections.
These functions provide a slight memory savings as compared to the direct form I Biquad filter functions.
Only floating-point data is supported.
This function operate on blocks of input and output data and each call to the function
processes <code>blockSize</code> samples through the filter.
<code>pSrc</code> points to the array of input data and
<code>pDst</code> points to the array of output data.
Both arrays contain <code>blockSize</code> values.
@par Algorithm
Each Biquad stage implements a second order filter using the difference equation:
<pre>
y[n] = b0 * x[n] + d1
d1 = b1 * x[n] + a1 * y[n] + d2
d2 = b2 * x[n] + a2 * y[n]
</pre>
where d1 and d2 represent the two state values.
@par
A Biquad filter using a transposed Direct Form II structure is shown below.
\image html BiquadDF2Transposed.gif "Single transposed Direct Form II Biquad"
Coefficients <code>b0, b1, and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
Pay careful attention to the sign of the feedback coefficients.
Some design tools flip the sign of the feedback coefficients:
<pre>
y[n] = b0 * x[n] + d1;
d1 = b1 * x[n] - a1 * y[n] + d2;
d2 = b2 * x[n] - a2 * y[n];
</pre>
In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
@par
Higher order filters are realized as a cascade of second order sections.
<code>numStages</code> refers to the number of second order stages used.
For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
A 9th order filter would be realized with <code>numStages=5</code> second order stages with the
coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
@par
<code>pState</code> points to the state variable array.
Each Biquad stage has 2 state variables <code>d1</code> and <code>d2</code>.
The state variables are arranged in the <code>pState</code> array as:
<pre>
{d11, d12, d21, d22, ...}
</pre>
where <code>d1x</code> refers to the state variables for the first Biquad and
<code>d2x</code> refers to the state variables for the second Biquad.
The state array has a total length of <code>2*numStages</code> values.
The state variables are updated after each block of data is processed; the coefficients are untouched.
@par
The CMSIS library contains Biquad filters in both Direct Form I and transposed Direct Form II.
The advantage of the Direct Form I structure is that it is numerically more robust for fixed-point data types.
That is why the Direct Form I structure supports Q15 and Q31 data types.
The transposed Direct Form II structure, on the other hand, requires a wide dynamic range for the state variables <code>d1</code> and <code>d2</code>.
Because of this, the CMSIS library only has a floating-point version of the Direct Form II Biquad.
The advantage of the Direct Form II Biquad is that it requires half the number of state variables, 2 rather than 4, per Biquad stage.
@par Instance Structure
The coefficients and state variables for a filter are stored together in an instance data structure.
A separate instance structure must be defined for each filter.
Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
@par Init Functions
There is also an associated initialization function.
The initialization function performs following operations:
- Sets the values of the internal structure fields.
- Zeros out the values in the state buffer.
To do this manually without calling the init function, assign the follow subfields of the instance structure:
numStages, pCoeffs, pState. Also set all of the values in pState to zero.
@par
Use of the initialization function is optional.
However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
To place an instance structure into a const data section, the instance structure must be manually initialized.
Set the values in the state buffer to zeros before static initialization.
For example, to statically initialize the instance structure use
<pre>
arm_biquad_cascade_df2T_instance_f64 S1 = {numStages, pState, pCoeffs};
arm_biquad_cascade_df2T_instance_f32 S1 = {numStages, pState, pCoeffs};
</pre>
where <code>numStages</code> is the number of Biquad stages in the filter;
<code>pState</code> is the address of the state buffer.
<code>pCoeffs</code> is the address of the coefficient buffer;
*/
/**
@addtogroup BiquadCascadeDF2T
@{
*/
/**
@brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
@param[in] S points to an instance of the filter data structure
@param[in] pSrc points to the block of input data
@param[out] pDst points to the block of output data
@param[in] blockSize number of samples to process
@return none
*/
LOW_OPTIMIZATION_ENTER
void arm_biquad_cascade_df2T_f64(
const arm_biquad_cascade_df2T_instance_f64 * S,
float64_t * pSrc,
float64_t * pDst,
uint32_t blockSize)
{
float64_t *pIn = pSrc; /* Source pointer */
float64_t *pOut = pDst; /* Destination pointer */
float64_t *pState = S->pState; /* State pointer */
float64_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
float64_t acc1; /* Accumulator */
float64_t b0, b1, b2, a1, a2; /* Filter coefficients */
float64_t Xn1; /* Temporary input */
float64_t d1, d2; /* State variables */
uint32_t sample, stage = S->numStages; /* Loop counters */
do
{
/* Reading the coefficients */
b0 = pCoeffs[0];
b1 = pCoeffs[1];
b2 = pCoeffs[2];
a1 = pCoeffs[3];
a2 = pCoeffs[4];
/* Reading the state values */
d1 = pState[0];
d2 = pState[1];
pCoeffs += 5U;
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 16 outputs at a time */
sample = blockSize >> 4U;
while (sample > 0U) {
/* y[n] = b0 * x[n] + d1 */
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
/* d2 = b2 * x[n] + a2 * y[n] */
/* 1 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 2 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 3 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 4 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 5 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 6 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 7 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 8 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 9 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 10 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 11 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 12 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 13 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 14 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 15 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* 16 */
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* decrement loop counter */
sample--;
}
/* Loop unrolling: Compute remaining outputs */
sample = blockSize & 0xFU;
#else
/* Initialize blkCnt with number of samples */
sample = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (sample > 0U) {
Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1;
d1 = b1 * Xn1 + d2;
d1 += a1 * acc1;
d2 = b2 * Xn1;
d2 += a2 * acc1;
*pOut++ = acc1;
/* decrement loop counter */
sample--;
}
/* Store the updated state variables back into the state array */
pState[0] = d1;
pState[1] = d2;
pState += 2U;
/* The current stage input is given as the output to the next stage */
pIn = pDst;
/* Reset the output working pointer */
pOut = pDst;
/* decrement loop counter */
stage--;
} while (stage > 0U);
}
LOW_OPTIMIZATION_EXIT
/**
@} end of BiquadCascadeDF2T group
*/

View File

@ -0,0 +1,211 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df2T_init_f32.c
* Description: Initialization function for floating-point transposed direct form II Biquad cascade filter
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@addtogroup BiquadCascadeDF2T
@{
*/
/**
@brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
@param[in,out] S points to an instance of the filter data structure.
@param[in] numStages number of 2nd order stages in the filter.
@param[in] pCoeffs points to the filter coefficients.
@param[in] pState points to the state buffer.
@return none
@par Coefficient and State Ordering
The coefficients are stored in the array <code>pCoeffs</code> in the following order
in the not Neon version.
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par
where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
<code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
For Neon version, this array is bigger. If numstages = 4x + y, then the array has size:
32*x + 5*y
and it must be initialized using the function
arm_biquad_cascade_df2T_compute_coefs_f32 which is taking the
standard array coefficient as parameters.
But, an array of 8*numstages is a good approximation.
Then, the initialization can be done with:
<pre>
arm_biquad_cascade_df2T_init_f32(&SNeon, nbCascade, neonCoefs, stateNeon);
arm_biquad_cascade_df2T_compute_coefs_f32(&SNeon,nbCascade,coefs);
</pre>
@par In this example, neonCoefs is a bigger array of size 8 * numStages.
coefs is the standard array:
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par
The <code>pState</code> is a pointer to state array.
Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
The state array has a total length of <code>2*numStages</code> values.
The state variables are updated after each block of data is processed; the coefficients are untouched.
*/
#if defined(ARM_MATH_NEON)
/*
Must be called after initializing the biquad instance.
pCoeffs has size 5 * nbCascade
Whereas the pCoeffs for the init has size (4*4 + 4*4)* nbCascade
So this pCoeffs is the one which would be used for the not Neon version.
The pCoeffs passed in init is bigger than the one for the not Neon version.
*/
void arm_biquad_cascade_df2T_compute_coefs_f32(
arm_biquad_cascade_df2T_instance_f32 * S,
uint8_t numStages,
float32_t * pCoeffs)
{
uint8_t cnt;
float32_t *pDstCoeffs;
float32_t b0[4],b1[4],b2[4],a1[4],a2[4];
pDstCoeffs = S->pCoeffs;
cnt = numStages >> 2;
while(cnt > 0)
{
for(int i=0;i<4;i++)
{
b0[i] = pCoeffs[0];
b1[i] = pCoeffs[1];
b2[i] = pCoeffs[2];
a1[i] = pCoeffs[3];
a2[i] = pCoeffs[4];
pCoeffs += 5;
}
/* Vec 1 */
*pDstCoeffs++ = 0;
*pDstCoeffs++ = b0[1];
*pDstCoeffs++ = b0[2];
*pDstCoeffs++ = b0[3];
/* Vec 2 */
*pDstCoeffs++ = 0;
*pDstCoeffs++ = 0;
*pDstCoeffs++ = b0[1] * b0[2];
*pDstCoeffs++ = b0[2] * b0[3];
/* Vec 3 */
*pDstCoeffs++ = 0;
*pDstCoeffs++ = 0;
*pDstCoeffs++ = 0;
*pDstCoeffs++ = b0[1] * b0[2] * b0[3];
/* Vec 4 */
*pDstCoeffs++ = b0[0];
*pDstCoeffs++ = b0[0] * b0[1];
*pDstCoeffs++ = b0[0] * b0[1] * b0[2];
*pDstCoeffs++ = b0[0] * b0[1] * b0[2] * b0[3];
/* Vec 5 */
*pDstCoeffs++ = b1[0];
*pDstCoeffs++ = b1[1];
*pDstCoeffs++ = b1[2];
*pDstCoeffs++ = b1[3];
/* Vec 6 */
*pDstCoeffs++ = b2[0];
*pDstCoeffs++ = b2[1];
*pDstCoeffs++ = b2[2];
*pDstCoeffs++ = b2[3];
/* Vec 7 */
*pDstCoeffs++ = a1[0];
*pDstCoeffs++ = a1[1];
*pDstCoeffs++ = a1[2];
*pDstCoeffs++ = a1[3];
/* Vec 8 */
*pDstCoeffs++ = a2[0];
*pDstCoeffs++ = a2[1];
*pDstCoeffs++ = a2[2];
*pDstCoeffs++ = a2[3];
cnt--;
}
cnt = numStages & 0x3;
while(cnt > 0)
{
*pDstCoeffs++ = *pCoeffs++;
*pDstCoeffs++ = *pCoeffs++;
*pDstCoeffs++ = *pCoeffs++;
*pDstCoeffs++ = *pCoeffs++;
*pDstCoeffs++ = *pCoeffs++;
cnt--;
}
}
#endif
void arm_biquad_cascade_df2T_init_f32(
arm_biquad_cascade_df2T_instance_f32 * S,
uint8_t numStages,
const float32_t * pCoeffs,
float32_t * pState)
{
/* Assign filter stages */
S->numStages = numStages;
/* Assign coefficient pointer */
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 2 * numStages */
memset(pState, 0, (2U * (uint32_t) numStages) * sizeof(float32_t));
/* Assign state pointer */
S->pState = pState;
}
/**
@} end of BiquadCascadeDF2T group
*/

View File

@ -0,0 +1,86 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_df2T_init_f64.c
* Description: Initialization function for floating-point transposed direct form II Biquad cascade filter
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@addtogroup BiquadCascadeDF2T
@{
*/
/**
@brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
@param[in,out] S points to an instance of the filter data structure
@param[in] numStages number of 2nd order stages in the filter
@param[in] pCoeffs points to the filter coefficients
@param[in] pState points to the state buffer
@return none
@par Coefficient and State Ordering
The coefficients are stored in the array <code>pCoeffs</code> in the following order:
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par
where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
<code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
@par
The <code>pState</code> is a pointer to state array.
Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
The state array has a total length of <code>2*numStages</code> values.
The state variables are updated after each block of data is processed; the coefficients are untouched.
*/
void arm_biquad_cascade_df2T_init_f64(
arm_biquad_cascade_df2T_instance_f64 * S,
uint8_t numStages,
float64_t * pCoeffs,
float64_t * pState)
{
/* Assign filter stages */
S->numStages = numStages;
/* Assign coefficient pointer */
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 2 * numStages */
memset(pState, 0, (2U * (uint32_t) numStages) * sizeof(float64_t));
/* Assign state pointer */
S->pState = pState;
}
/**
@} end of BiquadCascadeDF2T group
*/

View File

@ -0,0 +1,285 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_stereo_df2T_f32.c
* Description: Processing function for floating-point transposed direct form II Biquad cascade filter. 2 channels
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@addtogroup BiquadCascadeDF2T
@{
*/
/**
@brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
@param[in] S points to an instance of the filter data structure
@param[in] pSrc points to the block of input data
@param[out] pDst points to the block of output data
@param[in] blockSize number of samples to process
@return none
*/
LOW_OPTIMIZATION_ENTER
void arm_biquad_cascade_stereo_df2T_f32(
const arm_biquad_cascade_stereo_df2T_instance_f32 * S,
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
const float32_t *pIn = pSrc; /* Source pointer */
float32_t *pOut = pDst; /* Destination pointer */
float32_t *pState = S->pState; /* State pointer */
const float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
float32_t acc1a, acc1b; /* Accumulator */
float32_t b0, b1, b2, a1, a2; /* Filter coefficients */
float32_t Xn1a, Xn1b; /* Temporary input */
float32_t d1a, d2a, d1b, d2b; /* State variables */
uint32_t sample, stage = S->numStages; /* Loop counters */
do
{
/* Reading the coefficients */
b0 = pCoeffs[0];
b1 = pCoeffs[1];
b2 = pCoeffs[2];
a1 = pCoeffs[3];
a2 = pCoeffs[4];
/* Reading the state values */
d1a = pState[0];
d2a = pState[1];
d1b = pState[2];
d2b = pState[3];
pCoeffs += 5U;
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 8 outputs at a time */
sample = blockSize >> 3U;
while (sample > 0U) {
/* y[n] = b0 * x[n] + d1 */
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
/* d2 = b2 * x[n] + a2 * y[n] */
/* 1 */
Xn1a = *pIn++; /* Channel a */
Xn1b = *pIn++; /* Channel b */
acc1a = (b0 * Xn1a) + d1a;
acc1b = (b0 * Xn1b) + d1b;
*pOut++ = acc1a;
*pOut++ = acc1b;
d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
d2a = (b2 * Xn1a) + (a2 * acc1a);
d2b = (b2 * Xn1b) + (a2 * acc1b);
/* 2 */
Xn1a = *pIn++; /* Channel a */
Xn1b = *pIn++; /* Channel b */
acc1a = (b0 * Xn1a) + d1a;
acc1b = (b0 * Xn1b) + d1b;
*pOut++ = acc1a;
*pOut++ = acc1b;
d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
d2a = (b2 * Xn1a) + (a2 * acc1a);
d2b = (b2 * Xn1b) + (a2 * acc1b);
/* 3 */
Xn1a = *pIn++; /* Channel a */
Xn1b = *pIn++; /* Channel b */
acc1a = (b0 * Xn1a) + d1a;
acc1b = (b0 * Xn1b) + d1b;
*pOut++ = acc1a;
*pOut++ = acc1b;
d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
d2a = (b2 * Xn1a) + (a2 * acc1a);
d2b = (b2 * Xn1b) + (a2 * acc1b);
/* 4 */
Xn1a = *pIn++; /* Channel a */
Xn1b = *pIn++; /* Channel b */
acc1a = (b0 * Xn1a) + d1a;
acc1b = (b0 * Xn1b) + d1b;
*pOut++ = acc1a;
*pOut++ = acc1b;
d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
d2a = (b2 * Xn1a) + (a2 * acc1a);
d2b = (b2 * Xn1b) + (a2 * acc1b);
/* 5 */
Xn1a = *pIn++; /* Channel a */
Xn1b = *pIn++; /* Channel b */
acc1a = (b0 * Xn1a) + d1a;
acc1b = (b0 * Xn1b) + d1b;
*pOut++ = acc1a;
*pOut++ = acc1b;
d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
d2a = (b2 * Xn1a) + (a2 * acc1a);
d2b = (b2 * Xn1b) + (a2 * acc1b);
/* 6 */
Xn1a = *pIn++; /* Channel a */
Xn1b = *pIn++; /* Channel b */
acc1a = (b0 * Xn1a) + d1a;
acc1b = (b0 * Xn1b) + d1b;
*pOut++ = acc1a;
*pOut++ = acc1b;
d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
d2a = (b2 * Xn1a) + (a2 * acc1a);
d2b = (b2 * Xn1b) + (a2 * acc1b);
/* 7 */
Xn1a = *pIn++; /* Channel a */
Xn1b = *pIn++; /* Channel b */
acc1a = (b0 * Xn1a) + d1a;
acc1b = (b0 * Xn1b) + d1b;
*pOut++ = acc1a;
*pOut++ = acc1b;
d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
d2a = (b2 * Xn1a) + (a2 * acc1a);
d2b = (b2 * Xn1b) + (a2 * acc1b);
/* 8 */
Xn1a = *pIn++; /* Channel a */
Xn1b = *pIn++; /* Channel b */
acc1a = (b0 * Xn1a) + d1a;
acc1b = (b0 * Xn1b) + d1b;
*pOut++ = acc1a;
*pOut++ = acc1b;
d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
d2a = (b2 * Xn1a) + (a2 * acc1a);
d2b = (b2 * Xn1b) + (a2 * acc1b);
/* decrement loop counter */
sample--;
}
/* Loop unrolling: Compute remaining outputs */
sample = blockSize & 0x7U;
#else
/* Initialize blkCnt with number of samples */
sample = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (sample > 0U) {
/* Read the input */
Xn1a = *pIn++; /* Channel a */
Xn1b = *pIn++; /* Channel b */
/* y[n] = b0 * x[n] + d1 */
acc1a = (b0 * Xn1a) + d1a;
acc1b = (b0 * Xn1b) + d1b;
/* Store the result in the accumulator in the destination buffer. */
*pOut++ = acc1a;
*pOut++ = acc1b;
/* Every time after the output is computed state should be updated. */
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
/* d2 = b2 * x[n] + a2 * y[n] */
d2a = (b2 * Xn1a) + (a2 * acc1a);
d2b = (b2 * Xn1b) + (a2 * acc1b);
/* decrement loop counter */
sample--;
}
/* Store the updated state variables back into the state array */
pState[0] = d1a;
pState[1] = d2a;
pState[2] = d1b;
pState[3] = d2b;
pState += 4U;
/* The current stage input is given as the output to the next stage */
pIn = pDst;
/* Reset the output working pointer */
pOut = pDst;
/* Decrement the loop counter */
stage--;
} while (stage > 0U);
}
LOW_OPTIMIZATION_EXIT
/**
@} end of BiquadCascadeDF2T group
*/

View File

@ -0,0 +1,86 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_biquad_cascade_stereo_df2T_init_f32.c
* Description: Initialization function for floating-point transposed direct form II Biquad cascade filter
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupFilters
*/
/**
@addtogroup BiquadCascadeDF2T
@{
*/
/**
@brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
@param[in,out] S points to an instance of the filter data structure.
@param[in] numStages number of 2nd order stages in the filter.
@param[in] pCoeffs points to the filter coefficients.
@param[in] pState points to the state buffer.
@return none
@par Coefficient and State Ordering
The coefficients are stored in the array <code>pCoeffs</code> in the following order:
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par
where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
<code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
@par
The <code>pState</code> is a pointer to state array.
Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code> for each channel.
The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
The state array has a total length of <code>2*numStages</code> values.
The state variables are updated after each block of data is processed; the coefficients are untouched.
*/
void arm_biquad_cascade_stereo_df2T_init_f32(
arm_biquad_cascade_stereo_df2T_instance_f32 * S,
uint8_t numStages,
const float32_t * pCoeffs,
float32_t * pState)
{
/* Assign filter stages */
S->numStages = numStages;
/* Assign coefficient pointer */
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 4 * numStages */
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(float32_t));
/* Assign state pointer */
S->pState = pState;
}
/**
@} end of BiquadCascadeDF2T group
*/

Some files were not shown because too many files have changed in this diff Show More