Camera_driver: refactored version of camera driver

This commit is contained in:
Petr Malanik
2023-01-23 19:36:57 +01:00
parent 34b9eaafc2
commit d99e4b27e6
1292 changed files with 1100905 additions and 0 deletions

View File

@ -0,0 +1,16 @@
cmake_minimum_required (VERSION 3.6)
project(CMSISDSPComplexMath)
file(GLOB SRC "./*_*.c")
add_library(CMSISDSPComplexMath STATIC ${SRC})
configdsp(CMSISDSPComplexMath ..)
### Includes
target_include_directories(CMSISDSPComplexMath PUBLIC "${DSP}/../../Include")

View File

@ -0,0 +1,46 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: CompexMathFunctions.c
* Description: Combination of all comlex math function source files.
*
* $Date: 18. March 2019
* $Revision: V1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_cmplx_conj_f32.c"
#include "arm_cmplx_conj_q15.c"
#include "arm_cmplx_conj_q31.c"
#include "arm_cmplx_dot_prod_f32.c"
#include "arm_cmplx_dot_prod_q15.c"
#include "arm_cmplx_dot_prod_q31.c"
#include "arm_cmplx_mag_f32.c"
#include "arm_cmplx_mag_q15.c"
#include "arm_cmplx_mag_q31.c"
#include "arm_cmplx_mag_squared_f32.c"
#include "arm_cmplx_mag_squared_q15.c"
#include "arm_cmplx_mag_squared_q31.c"
#include "arm_cmplx_mult_cmplx_f32.c"
#include "arm_cmplx_mult_cmplx_q15.c"
#include "arm_cmplx_mult_cmplx_q31.c"
#include "arm_cmplx_mult_real_f32.c"
#include "arm_cmplx_mult_real_q15.c"
#include "arm_cmplx_mult_real_q31.c"

View File

@ -0,0 +1,161 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_conj_f32.c
* Description: Floating-point complex conjugate
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_conj Complex Conjugate
Conjugates the elements of a complex data vector.
The <code>pSrc</code> points to the source data and
<code>pDst</code> points to the destination data where the result should be written.
<code>numSamples</code> specifies the number of complex samples
and the data in each array is stored in an interleaved fashion
(real, imag, real, imag, ...).
Each array has a total of <code>2*numSamples</code> values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[(2*n) ] = pSrc[(2*n) ]; // real part
pDst[(2*n)+1] = -pSrc[(2*n)+1]; // imag part
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_conj
@{
*/
/**
@brief Floating-point complex conjugate.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] numSamples number of samples in each vector
@return none
*/
void arm_cmplx_conj_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t zero;
float32x4x2_t vec;
zero = vdupq_n_f32(0.0);
/* Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0]+jC[1] = A[0]+(-1)*jA[1] */
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
vec = vld2q_f32(pSrc);
vec.val[1] = vsubq_f32(zero,vec.val[1]);
vst2q_f32(pDst,vec);
/* Increment pointers */
pSrc += 8;
pDst += 8;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = numSamples & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined (ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_conj group
*/

View File

@ -0,0 +1,157 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_conj_q15.c
* Description: Q15 complex conjugate
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_conj
@{
*/
/**
@brief Q15 complex conjugate.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q15 value -1 (0x8000) is saturated to the maximum allowable positive value 0x7FFF.
*/
void arm_cmplx_conj_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q31_t in1; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
q31_t in2, in3, in4; /* Temporary input variables */
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
in1 = read_q15x2_ia ((q15_t **) &pSrc);
in2 = read_q15x2_ia ((q15_t **) &pSrc);
in3 = read_q15x2_ia ((q15_t **) &pSrc);
in4 = read_q15x2_ia ((q15_t **) &pSrc);
#ifndef ARM_MATH_BIG_ENDIAN
in1 = __QASX(0, in1);
in2 = __QASX(0, in2);
in3 = __QASX(0, in3);
in4 = __QASX(0, in4);
#else
in1 = __QSAX(0, in1);
in2 = __QSAX(0, in2);
in3 = __QSAX(0, in3);
in4 = __QSAX(0, in4);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
in1 = ((uint32_t) in1 >> 16) | ((uint32_t) in1 << 16);
in2 = ((uint32_t) in2 >> 16) | ((uint32_t) in2 << 16);
in3 = ((uint32_t) in3 >> 16) | ((uint32_t) in3 << 16);
in4 = ((uint32_t) in4 >> 16) | ((uint32_t) in4 << 16);
write_q15x2_ia (&pDst, in1);
write_q15x2_ia (&pDst, in2);
write_q15x2_ia (&pDst, in3);
write_q15x2_ia (&pDst, in4);
#else
*pDst++ = *pSrc++;
in1 = *pSrc++;
*pDst++ = (in1 == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in1;
*pDst++ = *pSrc++;
in1 = *pSrc++;
*pDst++ = (in1 == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in1;
*pDst++ = *pSrc++;
in1 = *pSrc++;
*pDst++ = (in1 == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in1;
*pDst++ = *pSrc++;
in1 = *pSrc++;
*pDst++ = (in1 == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in1;
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
in1 = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __SSAT(-in1, 16);
#else
*pDst++ = (in1 == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in1;
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_conj group
*/

View File

@ -0,0 +1,137 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_conj_q31.c
* Description: Q31 complex conjugate
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_conj
@{
*/
/**
@brief Q31 complex conjugate.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q31 value -1 (0x80000000) is saturated to the maximum allowable positive value 0x7FFFFFFF.
*/
void arm_cmplx_conj_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q31_t in; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
*pDst++ = *pSrc++;
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
*pDst++ = *pSrc++;
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
*pDst++ = *pSrc++;
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_conj group
*/

View File

@ -0,0 +1,233 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_dot_prod_f32.c
* Description: Floating-point complex dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_dot_prod Complex Dot Product
Computes the dot product of two complex vectors.
The vectors are multiplied element-by-element and then summed.
The <code>pSrcA</code> points to the first complex input vector and
<code>pSrcB</code> points to the second complex input vector.
<code>numSamples</code> specifies the number of complex samples
and the data in each array is stored in an interleaved fashion
(real, imag, real, imag, ...).
Each array has a total of <code>2*numSamples</code> values.
The underlying algorithm is used:
<pre>
realResult = 0;
imagResult = 0;
for (n = 0; n < numSamples; n++) {
realResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
imagResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_dot_prod
@{
*/
/**
@brief Floating-point complex dot product.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] numSamples number of samples in each vector
@param[out] realResult real part of the result returned here
@param[out] imagResult imaginary part of the result returned here
@return none
*/
void arm_cmplx_dot_prod_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t numSamples,
float32_t * realResult,
float32_t * imagResult)
{
uint32_t blkCnt; /* Loop counter */
float32_t real_sum = 0.0f, imag_sum = 0.0f; /* Temporary result variables */
float32_t a0,b0,c0,d0;
#if defined(ARM_MATH_NEON)
float32x4x2_t vec1,vec2,vec3,vec4;
float32x4_t accR,accI;
float32x2_t accum = vdup_n_f32(0);
accR = vdupq_n_f32(0.0);
accI = vdupq_n_f32(0.0);
/* Loop unrolling: Compute 8 outputs at a time */
blkCnt = numSamples >> 3U;
while (blkCnt > 0U)
{
/* C = (A[0]+jA[1])*(B[0]+jB[1]) + ... */
/* Calculate dot product and then store the result in a temporary buffer. */
vec1 = vld2q_f32(pSrcA);
vec2 = vld2q_f32(pSrcB);
/* Increment pointers */
pSrcA += 8;
pSrcB += 8;
/* Re{C} = Re{A}*Re{B} - Im{A}*Im{B} */
accR = vmlaq_f32(accR,vec1.val[0],vec2.val[0]);
accR = vmlsq_f32(accR,vec1.val[1],vec2.val[1]);
/* Im{C} = Re{A}*Im{B} + Im{A}*Re{B} */
accI = vmlaq_f32(accI,vec1.val[1],vec2.val[0]);
accI = vmlaq_f32(accI,vec1.val[0],vec2.val[1]);
vec3 = vld2q_f32(pSrcA);
vec4 = vld2q_f32(pSrcB);
/* Increment pointers */
pSrcA += 8;
pSrcB += 8;
/* Re{C} = Re{A}*Re{B} - Im{A}*Im{B} */
accR = vmlaq_f32(accR,vec3.val[0],vec4.val[0]);
accR = vmlsq_f32(accR,vec3.val[1],vec4.val[1]);
/* Im{C} = Re{A}*Im{B} + Im{A}*Re{B} */
accI = vmlaq_f32(accI,vec3.val[1],vec4.val[0]);
accI = vmlaq_f32(accI,vec3.val[0],vec4.val[1]);
/* Decrement the loop counter */
blkCnt--;
}
accum = vpadd_f32(vget_low_f32(accR), vget_high_f32(accR));
real_sum += accum[0] + accum[1];
accum = vpadd_f32(vget_low_f32(accI), vget_high_f32(accI));
imag_sum += accum[0] + accum[1];
/* Tail */
blkCnt = numSamples & 0x7;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
/* Decrement loop counter */
blkCnt--;
}
/* Store real and imaginary result in destination buffer. */
*realResult = real_sum;
*imagResult = imag_sum;
}
/**
@} end of cmplx_dot_prod group
*/

View File

@ -0,0 +1,154 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_dot_prod_q15.c
* Description: Processing function for the Q15 Complex Dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_dot_prod
@{
*/
/**
@brief Q15 complex dot product.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] numSamples number of samples in each vector
@param[out] realResult real part of the result returned here
@param[out] imagResult imaginary part of the result returned her
@return none
@par Scaling and Overflow Behavior
The function is implemented using an internal 64-bit accumulator.
The intermediate 1.15 by 1.15 multiplications are performed with full precision and yield a 2.30 result.
These are accumulated in a 64-bit accumulator with 34.30 precision.
As a final step, the accumulators are converted to 8.24 format.
The return results <code>realResult</code> and <code>imagResult</code> are in 8.24 format.
*/
void arm_cmplx_dot_prod_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
uint32_t numSamples,
q31_t * realResult,
q31_t * imagResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t real_sum = 0, imag_sum = 0; /* Temporary result variables */
q15_t a0,b0,c0,d0;
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += (q31_t)a0 * c0;
imag_sum += (q31_t)a0 * d0;
real_sum -= (q31_t)b0 * d0;
imag_sum += (q31_t)b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += (q31_t)a0 * c0;
imag_sum += (q31_t)a0 * d0;
real_sum -= (q31_t)b0 * d0;
imag_sum += (q31_t)b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += (q31_t)a0 * c0;
imag_sum += (q31_t)a0 * d0;
real_sum -= (q31_t)b0 * d0;
imag_sum += (q31_t)b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += (q31_t)a0 * c0;
imag_sum += (q31_t)a0 * d0;
real_sum -= (q31_t)b0 * d0;
imag_sum += (q31_t)b0 * c0;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += (q31_t)a0 * c0;
imag_sum += (q31_t)a0 * d0;
real_sum -= (q31_t)b0 * d0;
imag_sum += (q31_t)b0 * c0;
/* Decrement loop counter */
blkCnt--;
}
/* Store real and imaginary result in 8.24 format */
/* Convert real data in 34.30 to 8.24 by 6 right shifts */
*realResult = (q31_t) (real_sum >> 6);
/* Convert imaginary data in 34.30 to 8.24 by 6 right shifts */
*imagResult = (q31_t) (imag_sum >> 6);
}
/**
@} end of cmplx_dot_prod group
*/

View File

@ -0,0 +1,153 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_dot_prod_q31.c
* Description: Q31 complex dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_dot_prod
@{
*/
/**
@brief Q31 complex dot product.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] numSamples number of samples in each vector
@param[out] realResult real part of the result returned here
@param[out] imagResult imaginary part of the result returned here
@return none
@par Scaling and Overflow Behavior
The function is implemented using an internal 64-bit accumulator.
The intermediate 1.31 by 1.31 multiplications are performed with 64-bit precision and then shifted to 16.48 format.
The internal real and imaginary accumulators are in 16.48 format and provide 15 guard bits.
Additions are nonsaturating and no overflow will occur as long as <code>numSamples</code> is less than 32768.
The return results <code>realResult</code> and <code>imagResult</code> are in 16.48 format.
Input down scaling is not required.
*/
void arm_cmplx_dot_prod_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
uint32_t numSamples,
q63_t * realResult,
q63_t * imagResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t real_sum = 0, imag_sum = 0; /* Temporary result variables */
q31_t a0,b0,c0,d0;
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += ((q63_t)a0 * c0) >> 14;
imag_sum += ((q63_t)a0 * d0) >> 14;
real_sum -= ((q63_t)b0 * d0) >> 14;
imag_sum += ((q63_t)b0 * c0) >> 14;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += ((q63_t)a0 * c0) >> 14;
imag_sum += ((q63_t)a0 * d0) >> 14;
real_sum -= ((q63_t)b0 * d0) >> 14;
imag_sum += ((q63_t)b0 * c0) >> 14;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += ((q63_t)a0 * c0) >> 14;
imag_sum += ((q63_t)a0 * d0) >> 14;
real_sum -= ((q63_t)b0 * d0) >> 14;
imag_sum += ((q63_t)b0 * c0) >> 14;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += ((q63_t)a0 * c0) >> 14;
imag_sum += ((q63_t)a0 * d0) >> 14;
real_sum -= ((q63_t)b0 * d0) >> 14;
imag_sum += ((q63_t)b0 * c0) >> 14;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += ((q63_t)a0 * c0) >> 14;
imag_sum += ((q63_t)a0 * d0) >> 14;
real_sum -= ((q63_t)b0 * d0) >> 14;
imag_sum += ((q63_t)b0 * c0) >> 14;
/* Decrement loop counter */
blkCnt--;
}
/* Store real and imaginary result in 16.48 format */
*realResult = real_sum;
*imagResult = imag_sum;
}
/**
@} end of cmplx_dot_prod group
*/

View File

@ -0,0 +1,188 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_f32.c
* Description: Floating-point complex magnitude
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_mag Complex Magnitude
Computes the magnitude of the elements of a complex data vector.
The <code>pSrc</code> points to the source data and
<code>pDst</code> points to the where the result should be written.
<code>numSamples</code> specifies the number of complex samples
in the input array and the data is stored in an interleaved fashion
(real, imag, real, imag, ...).
The input array has a total of <code>2*numSamples</code> values;
the output array has a total of <code>numSamples</code> values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_mag
@{
*/
/**
@brief Floating-point complex magnitude.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
*/
void arm_cmplx_mag_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* loop counter */
float32_t real, imag; /* Temporary variables to hold input values */
#if defined(ARM_MATH_NEON)
float32x4x2_t vecA;
float32x4_t vRealA;
float32x4_t vImagA;
float32x4_t vMagSqA;
float32x4x2_t vecB;
float32x4_t vRealB;
float32x4_t vImagB;
float32x4_t vMagSqB;
/* Loop unrolling: Compute 8 outputs at a time */
blkCnt = numSamples >> 3;
while (blkCnt > 0U)
{
/* out = sqrt((real * real) + (imag * imag)) */
vecA = vld2q_f32(pSrc);
pSrc += 8;
vecB = vld2q_f32(pSrc);
pSrc += 8;
vRealA = vmulq_f32(vecA.val[0], vecA.val[0]);
vImagA = vmulq_f32(vecA.val[1], vecA.val[1]);
vMagSqA = vaddq_f32(vRealA, vImagA);
vRealB = vmulq_f32(vecB.val[0], vecB.val[0]);
vImagB = vmulq_f32(vecB.val[1], vecB.val[1]);
vMagSqB = vaddq_f32(vRealB, vImagB);
/* Store the result in the destination buffer. */
vst1q_f32(pDst, __arm_vec_sqrt_f32_neon(vMagSqA));
pDst += 4;
vst1q_f32(pDst, __arm_vec_sqrt_f32_neon(vMagSqB));
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
blkCnt = numSamples & 7;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
/* store result in destination buffer. */
arm_sqrt_f32((real * real) + (imag * imag), pDst++);
real = *pSrc++;
imag = *pSrc++;
arm_sqrt_f32((real * real) + (imag * imag), pDst++);
real = *pSrc++;
imag = *pSrc++;
arm_sqrt_f32((real * real) + (imag * imag), pDst++);
real = *pSrc++;
imag = *pSrc++;
arm_sqrt_f32((real * real) + (imag * imag), pDst++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
/* store result in destination buffer. */
arm_sqrt_f32((real * real) + (imag * imag), pDst++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_mag group
*/

View File

@ -0,0 +1,162 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_q15.c
* Description: Q15 complex magnitude
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_mag
@{
*/
/**
@brief Q15 complex magnitude.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format.
*/
void arm_cmplx_mag_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_DSP)
q31_t in;
q31_t acc0; /* Accumulators */
#else
q15_t real, imag; /* Temporary input variables */
q31_t acc0, acc1; /* Accumulators */
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
/* store result in 2.14 format in destination buffer. */
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
#else
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
/* store result in 2.14 format in destination buffer. */
arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
/* store result in 2.14 format in destination buffer. */
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
#else
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
/* store result in 2.14 format in destination buffer. */
arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_mag group
*/

View File

@ -0,0 +1,130 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_q31.c
* Description: Q31 complex magnitude
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_mag
@{
*/
/**
@brief Q31 complex magnitude.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function implements 1.31 by 1.31 multiplications and finally output is converted into 2.30 format.
Input down scaling is not required.
*/
void arm_cmplx_mag_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q31_t real, imag; /* Temporary input variables */
q31_t acc0, acc1; /* Accumulators */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
/* store result in 2.30 format in destination buffer. */
arm_sqrt_q31(acc0 + acc1, pDst++);
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
arm_sqrt_q31(acc0 + acc1, pDst++);
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
arm_sqrt_q31(acc0 + acc1, pDst++);
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
arm_sqrt_q31(acc0 + acc1, pDst++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
/* store result in 2.30 format in destination buffer. */
arm_sqrt_q31(acc0 + acc1, pDst++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_mag group
*/

View File

@ -0,0 +1,184 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_squared_f32.c
* Description: Floating-point complex magnitude squared
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_mag_squared Complex Magnitude Squared
Computes the magnitude squared of the elements of a complex data vector.
The <code>pSrc</code> points to the source data and
<code>pDst</code> points to the where the result should be written.
<code>numSamples</code> specifies the number of complex samples
in the input array and the data is stored in an interleaved fashion
(real, imag, real, imag, ...).
The input array has a total of <code>2*numSamples</code> values;
the output array has a total of <code>numSamples</code> values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_mag_squared
@{
*/
/**
@brief Floating-point complex magnitude squared.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
*/
void arm_cmplx_mag_squared_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
float32_t real, imag; /* Temporary input variables */
#if defined(ARM_MATH_NEON)
float32x4x2_t vecA;
float32x4_t vRealA;
float32x4_t vImagA;
float32x4_t vMagSqA;
float32x4x2_t vecB;
float32x4_t vRealB;
float32x4_t vImagB;
float32x4_t vMagSqB;
/* Loop unrolling: Compute 8 outputs at a time */
blkCnt = numSamples >> 3;
while (blkCnt > 0U)
{
/* out = sqrt((real * real) + (imag * imag)) */
vecA = vld2q_f32(pSrc);
pSrc += 8;
vRealA = vmulq_f32(vecA.val[0], vecA.val[0]);
vImagA = vmulq_f32(vecA.val[1], vecA.val[1]);
vMagSqA = vaddq_f32(vRealA, vImagA);
vecB = vld2q_f32(pSrc);
pSrc += 8;
vRealB = vmulq_f32(vecB.val[0], vecB.val[0]);
vImagB = vmulq_f32(vecB.val[1], vecB.val[1]);
vMagSqB = vaddq_f32(vRealB, vImagB);
/* Store the result in the destination buffer. */
vst1q_f32(pDst, vMagSqA);
pDst += 4;
vst1q_f32(pDst, vMagSqB);
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
blkCnt = numSamples & 7;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
/* store result in destination buffer. */
*pDst++ = (real * real) + (imag * imag);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_mag_squared group
*/

View File

@ -0,0 +1,161 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_squared_q15.c
* Description: Q15 complex magnitude squared
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_mag_squared
@{
*/
/**
@brief Q15 complex magnitude squared.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
*/
void arm_cmplx_mag_squared_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_DSP)
q31_t in;
q31_t acc0; /* Accumulators */
#else
q15_t real, imag; /* Temporary input variables */
q31_t acc0, acc1; /* Accumulators */
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
/* store result in 3.13 format in destination buffer. */
*pDst++ = (q15_t) (acc0 >> 17);
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
*pDst++ = (q15_t) (acc0 >> 17);
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
*pDst++ = (q15_t) (acc0 >> 17);
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
*pDst++ = (q15_t) (acc0 >> 17);
#else
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
/* store result in 3.13 format in destination buffer. */
*pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
*pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
*pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
*pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
acc0 = __SMUAD(in, in);
/* store result in 3.13 format in destination buffer. */
*pDst++ = (q15_t) (acc0 >> 17);
#else
real = *pSrc++;
imag = *pSrc++;
acc0 = ((q31_t) real * real);
acc1 = ((q31_t) imag * imag);
/* store result in 3.13 format in destination buffer. */
*pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_mag_squared group
*/

View File

@ -0,0 +1,129 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_squared_q31.c
* Description: Q31 complex magnitude squared
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup cmplx_mag_squared
@{
*/
/**
@brief Q31 complex magnitude squared.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
Input down scaling is not required.
*/
void arm_cmplx_mag_squared_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q31_t real, imag; /* Temporary input variables */
q31_t acc0, acc1; /* Accumulators */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
/* store the result in 3.29 format in the destination buffer. */
*pDst++ = acc0 + acc1;
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
*pDst++ = acc0 + acc1;
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
*pDst++ = acc0 + acc1;
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
*pDst++ = acc0 + acc1;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
acc0 = (q31_t) (((q63_t) real * real) >> 33);
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
/* store result in 3.29 format in destination buffer. */
*pDst++ = acc0 + acc1;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of cmplx_mag_squared group
*/

View File

@ -0,0 +1,194 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_cmplx_f32.c
* Description: Floating-point complex-by-complex multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
Multiplies a complex vector by another complex vector and generates a complex result.
The data in the complex arrays is stored in an interleaved fashion
(real, imag, real, imag, ...).
The parameter <code>numSamples</code> represents the number of complex
samples processed. The complex arrays have a total of <code>2*numSamples</code>
real values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup CmplxByCmplxMult
@{
*/
/**
@brief Floating-point complex-by-complex multiplication.
@param[in] pSrcA points to first input vector
@param[in] pSrcB points to second input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
*/
void arm_cmplx_mult_cmplx_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
float32_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
float32_t a, b, c, d; /* Temporary variables to store real and imaginary values */
#if defined(ARM_MATH_NEON)
float32x4x2_t va, vb;
float32x4_t real, imag;
float32x4x2_t outCplx;
/* Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
va = vld2q_f32(pSrcA); // load & separate real/imag pSrcA (de-interleave 2)
vb = vld2q_f32(pSrcB); // load & separate real/imag pSrcB
/* Increment pointers */
pSrcA += 8;
pSrcB += 8;
/* Re{C} = Re{A}*Re{B} - Im{A}*Im{B} */
outCplx.val[0] = vmulq_f32(va.val[0], vb.val[0]);
outCplx.val[0] = vmlsq_f32(outCplx.val[0], va.val[1], vb.val[1]);
/* Im{C} = Re{A}*Im{B} + Im{A}*Re{B} */
outCplx.val[1] = vmulq_f32(va.val[0], vb.val[1]);
outCplx.val[1] = vmlaq_f32(outCplx.val[1], va.val[1], vb.val[0]);
vst2q_f32(pDst, outCplx);
/* Increment pointer */
pDst += 8;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = numSamples & 3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in destination buffer. */
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in destination buffer. */
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of CmplxByCmplxMult group
*/

View File

@ -0,0 +1,136 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_cmplx_q15.c
* Description: Q15 complex-by-complex multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup CmplxByCmplxMult
@{
*/
/**
@brief Q15 complex-by-complex multiplication.
@param[in] pSrcA points to first input vector
@param[in] pSrcB points to second input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
*/
void arm_cmplx_mult_cmplx_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
q15_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q15_t a, b, c, d; /* Temporary variables */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in 3.13 format in destination buffer. */
*pDst++ = (q15_t) ( (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17) );
*pDst++ = (q15_t) ( (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17) );
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (q15_t) ( (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17) );
*pDst++ = (q15_t) ( (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17) );
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (q15_t) ( (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17) );
*pDst++ = (q15_t) ( (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17) );
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (q15_t) ( (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17) );
*pDst++ = (q15_t) ( (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17) );
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in 3.13 format in destination buffer. */
*pDst++ = (q15_t) ( (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17) );
*pDst++ = (q15_t) ( (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17) );
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of CmplxByCmplxMult group
*/

View File

@ -0,0 +1,137 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_cmplx_q31.c
* Description: Q31 complex-by-complex multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup CmplxByCmplxMult
@{
*/
/**
@brief Q31 complex-by-complex multiplication.
@param[in] pSrcA points to first input vector
@param[in] pSrcB points to second input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
Input down scaling is not required.
*/
void arm_cmplx_mult_cmplx_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
q31_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q31_t a, b, c, d; /* Temporary variables */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in 3.29 format in destination buffer. */
*pDst++ = (q31_t) ( (((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33) );
*pDst++ = (q31_t) ( (((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33) );
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (q31_t) ( (((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33) );
*pDst++ = (q31_t) ( (((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33) );
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (q31_t) ( (((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33) );
*pDst++ = (q31_t) ( (((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33) );
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (q31_t) ( (((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33) );
*pDst++ = (q31_t) ( (((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33) );
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in 3.29 format in destination buffer. */
*pDst++ = (q31_t) ( (((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33) );
*pDst++ = (q31_t) ( (((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33) );
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of CmplxByCmplxMult group
*/

View File

@ -0,0 +1,169 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_real_f32.c
* Description: Floating-point complex by real multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@defgroup CmplxByRealMult Complex-by-Real Multiplication
Multiplies a complex vector by a real vector and generates a complex result.
The data in the complex arrays is stored in an interleaved fashion
(real, imag, real, imag, ...).
The parameter <code>numSamples</code> represents the number of complex
samples processed. The complex arrays have a total of <code>2*numSamples</code>
real values while the real array has a total of <code>numSamples</code>
real values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup CmplxByRealMult
@{
*/
/**
@brief Floating-point complex-by-real multiplication.
@param[in] pSrcCmplx points to complex input vector
@param[in] pSrcReal points to real input vector
@param[out] pCmplxDst points to complex output vector
@param[in] numSamples number of samples in each vector
@return none
*/
void arm_cmplx_mult_real_f32(
const float32_t * pSrcCmplx,
const float32_t * pSrcReal,
float32_t * pCmplxDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
float32_t in; /* Temporary variable */
#if defined(ARM_MATH_NEON)
float32x4_t r;
float32x4x2_t ab,outCplx;
/* Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
ab = vld2q_f32(pSrcCmplx); // load & separate real/imag pSrcA (de-interleave 2)
r = vld1q_f32(pSrcReal); // load & separate real/imag pSrcB
/* Increment pointers */
pSrcCmplx += 8;
pSrcReal += 4;
outCplx.val[0] = vmulq_f32(ab.val[0], r);
outCplx.val[1] = vmulq_f32(ab.val[1], r);
vst2q_f32(pCmplxDst, outCplx);
pCmplxDst += 8;
blkCnt--;
}
/* Tail */
blkCnt = numSamples & 3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
/* store result in destination buffer. */
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
in = *pSrcReal++;
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
in = *pSrcReal++;
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
in = *pSrcReal++;
*pCmplxDst++ = *pSrcCmplx++* in;
*pCmplxDst++ = *pSrcCmplx++ * in;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
/* store result in destination buffer. */
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of CmplxByRealMult group
*/

View File

@ -0,0 +1,182 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_real_q15.c
* Description: Q15 complex by real multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup CmplxByRealMult
@{
*/
/**
@brief Q15 complex-by-real multiplication.
@param[in] pSrcCmplx points to complex input vector
@param[in] pSrcReal points to real input vector
@param[out] pCmplxDst points to complex output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
void arm_cmplx_mult_real_q15(
const q15_t * pSrcCmplx,
const q15_t * pSrcReal,
q15_t * pCmplxDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q15_t in; /* Temporary variable */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t inA1, inA2; /* Temporary variables to hold input data */
q31_t inB1; /* Temporary variables to hold input data */
q15_t out1, out2, out3, out4; /* Temporary variables to hold output data */
q31_t mul1, mul2, mul3, mul4; /* Temporary variables to hold intermediate data */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
#if defined (ARM_MATH_DSP)
/* read 2 complex numbers both real and imaginary from complex input buffer */
inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
/* read 2 real values at a time from real input buffer */
inB1 = read_q15x2_ia ((q15_t **) &pSrcReal);
/* multiply complex number with real numbers */
#ifndef ARM_MATH_BIG_ENDIAN
mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
#else
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* saturate the result */
out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
/* pack real and imaginary outputs and store them to destination */
write_q15x2_ia (&pCmplxDst, __PKHBT(out1, out2, 16));
write_q15x2_ia (&pCmplxDst, __PKHBT(out3, out4, 16));
inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
inB1 = read_q15x2_ia ((q15_t **) &pSrcReal);
#ifndef ARM_MATH_BIG_ENDIAN
mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
#else
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
write_q15x2_ia (&pCmplxDst, __PKHBT(out1, out2, 16));
write_q15x2_ia (&pCmplxDst, __PKHBT(out3, out4, 16));
#else
in = *pSrcReal++;
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
in = *pSrcReal++;
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
in = *pSrcReal++;
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
in = *pSrcReal++;
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
/* store the result in the destination buffer. */
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
*pCmplxDst++ = (q15_t) __SSAT((((q31_t) *pSrcCmplx++ * in) >> 15), 16);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of CmplxByRealMult group
*/

View File

@ -0,0 +1,148 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_real_q31.c
* Description: Q31 complex by real multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupCmplxMath
*/
/**
@addtogroup CmplxByRealMult
@{
*/
/**
@brief Q31 complex-by-real multiplication.
@param[in] pSrcCmplx points to complex input vector
@param[in] pSrcReal points to real input vector
@param[out] pCmplxDst points to complex output vector
@param[in] numSamples number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated.
*/
void arm_cmplx_mult_real_q31(
const q31_t * pSrcCmplx,
const q31_t * pSrcReal,
q31_t * pCmplxDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
q31_t in; /* Temporary variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
#if defined (ARM_MATH_DSP)
/* store saturated result in 1.31 format to destination buffer */
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
#else
/* store result in destination buffer. */
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
#endif
in = *pSrcReal++;
#if defined (ARM_MATH_DSP)
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
#else
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
#endif
in = *pSrcReal++;
#if defined (ARM_MATH_DSP)
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
#else
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
#endif
in = *pSrcReal++;
#if defined (ARM_MATH_DSP)
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
#else
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
#if defined (ARM_MATH_DSP)
/* store saturated result in 1.31 format to destination buffer */
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
*pCmplxDst++ = (__SSAT((q31_t) (((q63_t) *pSrcCmplx++ * in) >> 32), 31) << 1);
#else
/* store result in destination buffer. */
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
*pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) *pSrcCmplx++ * in) >> 31);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of CmplxByRealMult group
*/