Camera_driver: initialize project and libraries

This commit is contained in:
Petr Malanik
2022-08-15 18:21:50 +02:00
parent 4da5aefb5a
commit 34b9eaafc2
1280 changed files with 1099270 additions and 0 deletions

View File

@ -0,0 +1,63 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: BasicMathFunctions.c
* Description: Combination of all basic math function source files.
*
* $Date: 18. March 2019
* $Revision: V1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_abs_f32.c"
#include "arm_abs_q15.c"
#include "arm_abs_q31.c"
#include "arm_abs_q7.c"
#include "arm_add_f32.c"
#include "arm_add_q15.c"
#include "arm_add_q31.c"
#include "arm_add_q7.c"
#include "arm_dot_prod_f32.c"
#include "arm_dot_prod_q15.c"
#include "arm_dot_prod_q31.c"
#include "arm_dot_prod_q7.c"
#include "arm_mult_f32.c"
#include "arm_mult_q15.c"
#include "arm_mult_q31.c"
#include "arm_mult_q7.c"
#include "arm_negate_f32.c"
#include "arm_negate_q15.c"
#include "arm_negate_q31.c"
#include "arm_negate_q7.c"
#include "arm_offset_f32.c"
#include "arm_offset_q15.c"
#include "arm_offset_q31.c"
#include "arm_offset_q7.c"
#include "arm_scale_f32.c"
#include "arm_scale_q15.c"
#include "arm_scale_q31.c"
#include "arm_scale_q7.c"
#include "arm_shift_q15.c"
#include "arm_shift_q31.c"
#include "arm_shift_q7.c"
#include "arm_sub_f32.c"
#include "arm_sub_q15.c"
#include "arm_sub_q31.c"
#include "arm_sub_q7.c"

View File

@ -0,0 +1,16 @@
cmake_minimum_required (VERSION 3.6)
project(CMSISDSPBasicMath)
file(GLOB SRC "./*_*.c")
add_library(CMSISDSPBasicMath STATIC ${SRC})
configdsp(CMSISDSPBasicMath ..)
### Includes
target_include_directories(CMSISDSPBasicMath PUBLIC "${DSP}/../../Include")

View File

@ -0,0 +1,146 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_abs_f32.c
* Description: Floating-point vector absolute value
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include <math.h>
/**
@ingroup groupMath
*/
/**
@defgroup BasicAbs Vector Absolute Value
Computes the absolute value of a vector on an element-by-element basis.
<pre>
pDst[n] = abs(pSrc[n]), 0 <= n < blockSize.
</pre>
The functions support in-place computation allowing the source and
destination pointers to reference the same memory buffer.
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicAbs
@{
*/
/**
@brief Floating-point vector absolute value.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_abs_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute values and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrc);
res = vabsq_f32(vec1);
vst1q_f32(pDst, res);
/* Increment pointers */
pSrc += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute and store result in destination buffer. */
*pDst++ = fabsf(*pSrc++);
*pDst++ = fabsf(*pSrc++);
*pDst++ = fabsf(*pSrc++);
*pDst++ = fabsf(*pSrc++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute and store result in destination buffer. */
*pDst++ = fabsf(*pSrc++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAbs group
*/

View File

@ -0,0 +1,132 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_abs_q15.c
* Description: Q15 vector absolute value
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicAbs
@{
*/
/**
@brief Q15 vector absolute value.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
*/
void arm_abs_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q15_t in; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7fff) and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7fff) and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q15_t)__QSUB16(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAbs group
*/

View File

@ -0,0 +1,132 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_abs_q31.c
* Description: Q31 vector absolute value
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicAbs
@{
*/
/**
@brief Q31 vector absolute value.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
*/
void arm_abs_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q31_t in; /* Temporary variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q31_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAbs group
*/

View File

@ -0,0 +1,134 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_abs_q7.c
* Description: Q7 vector absolute value
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicAbs
@{
*/
/**
@brief Q7 vector absolute value.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Conditions for optimum performance
Input and output buffers should be aligned by 32-bit
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
*/
void arm_abs_q7(
const q7_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q7_t in; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7f) and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q7_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q7_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q7_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q7_t)__QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7f) and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (in > 0) ? in : (q7_t) __QSUB(0, in);
#else
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? (q7_t) 0x7f : -in);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAbs group
*/

View File

@ -0,0 +1,145 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_add_f32.c
* Description: Floating-point vector addition
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicAdd Vector Addition
Element-by-element addition of two vectors.
<pre>
pDst[n] = pSrcA[n] + pSrcB[n], 0 <= n < blockSize.
</pre>
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicAdd
@{
*/
/**
@brief Floating-point vector addition.
@param[in] pSrcA points to first input vector
@param[in] pSrcB points to second input vector
@param[out] pDst points to output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_add_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t vec2;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrcA);
vec2 = vld1q_f32(pSrcB);
res = vaddq_f32(vec1, vec2);
vst1q_f32(pDst, res);
/* Increment pointers */
pSrcA += 4;
pSrcB += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and store result in destination buffer. */
*pDst++ = (*pSrcA++) + (*pSrcB++);
*pDst++ = (*pSrcA++) + (*pSrcB++);
*pDst++ = (*pSrcA++) + (*pSrcB++);
*pDst++ = (*pSrcA++) + (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and store result in destination buffer. */
*pDst++ = (*pSrcA++) + (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAdd group
*/

View File

@ -0,0 +1,126 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_add_q15.c
* Description: Q15 vector addition
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicAdd
@{
*/
/**
@brief Q15 vector addition.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
void arm_add_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t inA1, inA2;
q31_t inB1, inB2;
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + B */
#if defined (ARM_MATH_DSP)
/* read 2 times 2 samples at a time from sourceA */
inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
/* read 2 times 2 samples at a time from sourceB */
inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
/* Add and store 2 times 2 samples at a time */
write_q15x2_ia (&pDst, __QADD16(inA1, inB1));
write_q15x2_ia (&pDst, __QADD16(inA2, inB2));
#else
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
*pDst++ = (q15_t) __QADD16(*pSrcA++, *pSrcB++);
#else
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ + *pSrcB++), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAdd group
*/

View File

@ -0,0 +1,108 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_add_q31.c
* Description: Q31 vector addition
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicAdd
@{
*/
/**
@brief Q31 vector addition.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
*/
void arm_add_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and store result in destination buffer. */
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and store result in destination buffer. */
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAdd group
*/

View File

@ -0,0 +1,109 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_add_q7.c
* Description: Q7 vector addition
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicAdd
@{
*/
/**
@brief Q7 vector addition.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
*/
void arm_add_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + B */
#if defined (ARM_MATH_DSP)
/* Add and store result in destination buffer (4 samples at a time). */
write_q7x4_ia (&pDst, __QADD8 (read_q7x4_ia ((q7_t **) &pSrcA), read_q7x4_ia ((q7_t **) &pSrcB)));
#else
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and store result in destination buffer. */
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ + *pSrcB++, 8);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicAdd group
*/

View File

@ -0,0 +1,163 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dot_prod_f32.c
* Description: Floating-point dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicDotProd Vector Dot Product
Computes the dot product of two vectors.
The vectors are multiplied element-by-element and then summed.
<pre>
sum = pSrcA[0]*pSrcB[0] + pSrcA[1]*pSrcB[1] + ... + pSrcA[blockSize-1]*pSrcB[blockSize-1]
</pre>
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicDotProd
@{
*/
/**
@brief Dot product of floating-point vectors.
@param[in] pSrcA points to the first input vector.
@param[in] pSrcB points to the second input vector.
@param[in] blockSize number of samples in each vector.
@param[out] result output result returned here.
@return none
*/
void arm_dot_prod_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * result)
{
uint32_t blkCnt; /* Loop counter */
float32_t sum = 0.0f; /* Temporary return variable */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t vec2;
float32x4_t res;
float32x4_t accum = vdupq_n_f32(0);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
vec1 = vld1q_f32(pSrcA);
vec2 = vld1q_f32(pSrcB);
while (blkCnt > 0U)
{
/* C = A[0]*B[0] + A[1]*B[1] + A[2]*B[2] + ... + A[blockSize-1]*B[blockSize-1] */
/* Calculate dot product and then store the result in a temporary buffer. */
accum = vmlaq_f32(accum, vec1, vec2);
/* Increment pointers */
pSrcA += 4;
pSrcB += 4;
vec1 = vld1q_f32(pSrcA);
vec2 = vld1q_f32(pSrcB);
/* Decrement the loop counter */
blkCnt--;
}
#if __aarch64__
sum = vpadds_f32(vpadd_f32(vget_low_f32(accum), vget_high_f32(accum)));
#else
sum = (vpadd_f32(vget_low_f32(accum), vget_high_f32(accum)))[0] + (vpadd_f32(vget_low_f32(accum), vget_high_f32(accum)))[1];
#endif
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and store result in a temporary buffer. */
sum += (*pSrcA++) * (*pSrcB++);
sum += (*pSrcA++) * (*pSrcB++);
sum += (*pSrcA++) * (*pSrcB++);
sum += (*pSrcA++) * (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and store result in a temporary buffer. */
sum += (*pSrcA++) * (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = sum;
}
/**
@} end of BasicDotProd group
*/

View File

@ -0,0 +1,120 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dot_prod_q15.c
* Description: Q15 dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicDotProd
@{
*/
/**
@brief Dot product of Q15 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in each vector
@param[out] result output result returned here
@return none
@par Scaling and Overflow Behavior
The intermediate multiplications are in 1.15 x 1.15 = 2.30 format and these
results are added to a 64-bit accumulator in 34.30 format.
Nonsaturating additions are used and given that there are 33 guard bits in the accumulator
there is no risk of overflow.
The return result is in 34.30 format.
*/
void arm_dot_prod_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
uint32_t blockSize,
q63_t * result)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
#if defined (ARM_MATH_DSP)
/* Calculate dot product and store result in a temporary buffer. */
sum = __SMLALD(read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB), sum);
sum = __SMLALD(read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB), sum);
#else
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and store result in a temporary buffer. */
//#if defined (ARM_MATH_DSP)
// sum = __SMLALD(*pSrcA++, *pSrcB++, sum);
//#else
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
//#endif
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer in 34.30 format */
*result = sum;
}
/**
@} end of BasicDotProd group
*/

View File

@ -0,0 +1,115 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dot_prod_q31.c
* Description: Q31 dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicDotProd
@{
*/
/**
@brief Dot product of Q31 vectors.
@param[in] pSrcA points to the first input vector.
@param[in] pSrcB points to the second input vector.
@param[in] blockSize number of samples in each vector.
@param[out] result output result returned here.
@return none
@par Scaling and Overflow Behavior
The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these
are truncated to 2.48 format by discarding the lower 14 bits.
The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
There are 15 guard bits in the accumulator and there is no risk of overflow as long as
the length of the vectors is less than 2^16 elements.
The return result is in 16.48 format.
*/
void arm_dot_prod_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
uint32_t blockSize,
q63_t * result)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and store result in a temporary buffer. */
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and store result in a temporary buffer. */
sum += ((q63_t) *pSrcA++ * *pSrcB++) >> 14U;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer in 16.48 format */
*result = sum;
}
/**
@} end of BasicDotProd group
*/

View File

@ -0,0 +1,139 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dot_prod_q7.c
* Description: Q7 dot product
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicDotProd
@{
*/
/**
@brief Dot product of Q7 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in each vector
@param[out] result output result returned here
@return none
@par Scaling and Overflow Behavior
The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these
results are added to an accumulator in 18.14 format.
Nonsaturating additions are used and there is no danger of wrap around as long as
the vectors are less than 2^18 elements long.
The return result is in 18.14 format.
*/
void arm_dot_prod_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
uint32_t blockSize,
q31_t * result)
{
uint32_t blkCnt; /* Loop counter */
q31_t sum = 0; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t input1, input2; /* Temporary variables */
q31_t inA1, inA2, inB1, inB2; /* Temporary variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
#if defined (ARM_MATH_DSP)
/* read 4 samples at a time from sourceA */
input1 = read_q7x4_ia ((q7_t **) &pSrcA);
/* read 4 samples at a time from sourceB */
input2 = read_q7x4_ia ((q7_t **) &pSrcB);
/* extract two q7_t samples to q15_t samples */
inA1 = __SXTB16(__ROR(input1, 8));
/* extract reminaing two samples */
inA2 = __SXTB16(input1);
/* extract two q7_t samples to q15_t samples */
inB1 = __SXTB16(__ROR(input2, 8));
/* extract reminaing two samples */
inB2 = __SXTB16(input2);
/* multiply and accumulate two samples at a time */
sum = __SMLAD(inA1, inB1, sum);
sum = __SMLAD(inA2, inB2, sum);
#else
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and store result in a temporary buffer. */
//#if defined (ARM_MATH_DSP)
// sum = __SMLAD(*pSrcA++, *pSrcB++, sum);
//#else
sum += (q31_t) ((q15_t) *pSrcA++ * *pSrcB++);
//#endif
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer in 18.14 format */
*result = sum;
}
/**
@} end of BasicDotProd group
*/

View File

@ -0,0 +1,148 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mult_f32.c
* Description: Floating-point vector multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicMult Vector Multiplication
Element-by-element multiplication of two vectors.
<pre>
pDst[n] = pSrcA[n] * pSrcB[n], 0 <= n < blockSize.
</pre>
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicMult
@{
*/
/**
@brief Floating-point vector multiplication.
@param[in] pSrcA points to the first input vector.
@param[in] pSrcB points to the second input vector.
@param[out] pDst points to the output vector.
@param[in] blockSize number of samples in each vector.
@return none
*/
void arm_mult_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t vec2;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply the inputs and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrcA);
vec2 = vld1q_f32(pSrcB);
res = vmulq_f32(vec1, vec2);
vst1q_f32(pDst, res);
/* Increment pointers */
pSrcA += 4;
pSrcB += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply inputs and store result in destination buffer. */
*pDst++ = (*pSrcA++) * (*pSrcB++);
*pDst++ = (*pSrcA++) * (*pSrcB++);
*pDst++ = (*pSrcA++) * (*pSrcB++);
*pDst++ = (*pSrcA++) * (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply input and store result in destination buffer. */
*pDst++ = (*pSrcA++) * (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicMult group
*/

View File

@ -0,0 +1,143 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mult_q15.c
* Description: Q15 vector multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicMult
@{
*/
/**
@brief Q15 vector multiplication
@param[in] pSrcA points to first input vector
@param[in] pSrcB points to second input vector
@param[out] pDst points to output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
void arm_mult_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t inA1, inA2, inB1, inB2; /* Temporary input variables */
q15_t out1, out2, out3, out4; /* Temporary output variables */
q31_t mul1, mul2, mul3, mul4; /* Temporary variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * B */
#if defined (ARM_MATH_DSP)
/* read 2 samples at a time from sourceA */
inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
/* read 2 samples at a time from sourceB */
inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
/* read 2 samples at a time from sourceA */
inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
/* read 2 samples at a time from sourceB */
inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
/* multiply mul = sourceA * sourceB */
mul1 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
mul2 = (q31_t) ((q15_t) (inA1 ) * (q15_t) (inB1 ));
mul3 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB2 >> 16));
mul4 = (q31_t) ((q15_t) (inA2 ) * (q15_t) (inB2 ));
/* saturate result to 16 bit */
out1 = (q15_t) __SSAT(mul1 >> 15, 16);
out2 = (q15_t) __SSAT(mul2 >> 15, 16);
out3 = (q15_t) __SSAT(mul3 >> 15, 16);
out4 = (q15_t) __SSAT(mul4 >> 15, 16);
/* store result to destination */
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pDst, __PKHBT(out2, out1, 16));
write_q15x2_ia (&pDst, __PKHBT(out4, out3, 16));
#else
write_q15x2_ia (&pDst, __PKHBT(out1, out2, 16));
write_q15x2_ia (&pDst, __PKHBT(out3, out4, 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
#else
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply inputs and store result in destination buffer. */
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicMult group
*/

View File

@ -0,0 +1,119 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mult_q31.c
* Description: Q31 vector multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicMult
@{
*/
/**
@brief Q31 vector multiplication.
@param[in] pSrcA points to the first input vector.
@param[in] pSrcB points to the second input vector.
@param[out] pDst points to the output vector.
@param[in] blockSize number of samples in each vector.
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated.
*/
void arm_mult_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q31_t out; /* Temporary output variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply inputs and store result in destination buffer. */
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
out = __SSAT(out, 31);
*pDst++ = out << 1U;
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
out = __SSAT(out, 31);
*pDst++ = out << 1U;
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
out = __SSAT(out, 31);
*pDst++ = out << 1U;
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
out = __SSAT(out, 31);
*pDst++ = out << 1U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply inputs and store result in destination buffer. */
out = ((q63_t) *pSrcA++ * *pSrcB++) >> 32;
out = __SSAT(out, 31);
*pDst++ = out << 1U;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicMult group
*/

View File

@ -0,0 +1,119 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mult_q7.c
* Description: Q7 vector multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicMult
@{
*/
/**
@brief Q7 vector multiplication
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
*/
void arm_mult_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q7_t out1, out2, out3, out4; /* Temporary output variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * B */
#if defined (ARM_MATH_DSP)
/* Multiply inputs and store results in temporary variables */
out1 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
out2 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
out3 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
out4 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
/* Pack and store result in destination buffer (in single write) */
write_q7x4_ia (&pDst, __PACKq7(out1, out2, out3, out4));
#else
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply input and store result in destination buffer. */
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicMult group
*/

View File

@ -0,0 +1,145 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_negate_f32.c
* Description: Negates floating-point vectors
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicNegate Vector Negate
Negates the elements of a vector.
<pre>
pDst[n] = -pSrc[n], 0 <= n < blockSize.
</pre>
The functions support in-place computation allowing the source and
destination pointers to reference the same memory buffer.
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicNegate
@{
*/
/**
@brief Negates the elements of a floating-point vector.
@param[in] pSrc points to input vector.
@param[out] pDst points to output vector.
@param[in] blockSize number of samples in each vector.
@return none
*/
void arm_negate_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
float32x4_t vec1;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrc);
res = vnegq_f32(vec1);
vst1q_f32(pDst, res);
/* Increment pointers */
pSrc += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and store result in destination buffer. */
*pDst++ = -*pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and store result in destination buffer. */
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicNegate group
*/

View File

@ -0,0 +1,126 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_negate_q15.c
* Description: Negates Q15 vectors
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicNegate
@{
*/
/**
@brief Negates the elements of a Q15 vector.
@param[in] pSrc points to the input vector.
@param[out] pDst points to the output vector.
@param[in] blockSize number of samples in each vector.
@return none
@par Conditions for optimum performance
Input and output buffers should be aligned by 32-bit
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q15 value -1 (0x8000) is saturated to the maximum allowable positive value 0x7FFF.
*/
void arm_negate_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q15_t in; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t in1; /* Temporary input variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = -A */
#if defined (ARM_MATH_DSP)
/* Negate and store result in destination buffer (2 samples at a time). */
in1 = read_q15x2_ia ((q15_t **) &pSrc);
write_q15x2_ia (&pDst, __QSUB16(0, in1));
in1 = read_q15x2_ia ((q15_t **) &pSrc);
write_q15x2_ia (&pDst, __QSUB16(0, in1));
#else
in = *pSrc++;
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
in = *pSrc++;
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
in = *pSrc++;
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
in = *pSrc++;
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and store result in destination buffer. */
in = *pSrc++;
*pDst++ = (in == (q15_t) 0x8000) ? (q15_t) 0x7fff : -in;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicNegate group
*/

View File

@ -0,0 +1,132 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_negate_q31.c
* Description: Negates Q31 vectors
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicNegate
@{
*/
/**
@brief Negates the elements of a Q31 vector.
@param[in] pSrc points to the input vector.
@param[out] pDst points to the output vector.
@param[in] blockSize number of samples in each vector.
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q31 value -1 (0x80000000) is saturated to the maximum allowable positive value 0x7FFFFFFF.
*/
void arm_negate_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q31_t in; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = __QSUB(0, in);
#else
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicNegate group
*/

View File

@ -0,0 +1,126 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_negate_q7.c
* Description: Negates Q7 vectors
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicNegate
@{
*/
/**
@brief Negates the elements of a Q7 vector.
@param[in] pSrc points to the input vector.
@param[out] pDst points to the output vector.
@param[in] blockSize number of samples in each vector.
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
The Q7 value -1 (0x80) is saturated to the maximum allowable positive value 0x7F.
*/
void arm_negate_q7(
const q7_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q7_t in; /* Temporary input variable */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t in1; /* Temporary input variable */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = -A */
#if defined (ARM_MATH_DSP)
/* Negate and store result in destination buffer (4 samples at a time). */
in1 = read_q7x4_ia ((q7_t **) &pSrc);
write_q7x4_ia (&pDst, __QSUB8(0, in1));
#else
in = *pSrc++;
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
in = *pSrc++;
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
in = *pSrc++;
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
in = *pSrc++;
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and store result in destination buffer. */
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (q7_t) __QSUB(0, in);
#else
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicNegate group
*/

View File

@ -0,0 +1,147 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_offset_f32.c
* Description: Floating-point vector offset
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicOffset Vector Offset
Adds a constant offset to each element of a vector.
<pre>
pDst[n] = pSrc[n] + offset, 0 <= n < blockSize.
</pre>
The functions support in-place computation allowing the source and
destination pointers to reference the same memory buffer.
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicOffset
@{
*/
/**
@brief Adds a constant offset to a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] offset is the offset to be added
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_offset_f32(
const float32_t * pSrc,
float32_t offset,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
float32x4_t vec1;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrc);
res = vaddq_f32(vec1,vdupq_n_f32(offset));
vst1q_f32(pDst, res);
/* Increment pointers */
pSrc += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and store result in destination buffer. */
*pDst++ = (*pSrc++) + offset;
*pDst++ = (*pSrc++) + offset;
*pDst++ = (*pSrc++) + offset;
*pDst++ = (*pSrc++) + offset;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and store result in destination buffer. */
*pDst++ = (*pSrc++) + offset;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicOffset group
*/

View File

@ -0,0 +1,121 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_offset_q15.c
* Description: Q15 vector offset
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicOffset
@{
*/
/**
@brief Adds a constant offset to a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] offset is the offset to be added
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
void arm_offset_q15(
const q15_t * pSrc,
q15_t offset,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t offset_packed; /* Offset packed to 32 bit */
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
offset_packed = __PKHBT(offset, offset, 16);
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + offset */
#if defined (ARM_MATH_DSP)
/* Add offset and store result in destination buffer (2 samples at a time). */
write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia ((q15_t **) &pSrc), offset_packed));
write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia ((q15_t **) &pSrc), offset_packed));
#else
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
*pDst++ = (q15_t) __QADD16(*pSrc++, offset);
#else
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicOffset group
*/

View File

@ -0,0 +1,128 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_offset_q31.c
* Description: Q31 vector offset
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicOffset
@{
*/
/**
@brief Adds a constant offset to a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] offset is the offset to be added
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
*/
void arm_offset_q31(
const q31_t * pSrc,
q31_t offset,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
*pDst++ = __QADD(*pSrc++, offset);
#else
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
#endif
#if defined (ARM_MATH_DSP)
*pDst++ = __QADD(*pSrc++, offset);
#else
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
#endif
#if defined (ARM_MATH_DSP)
*pDst++ = __QADD(*pSrc++, offset);
#else
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
#endif
#if defined (ARM_MATH_DSP)
*pDst++ = __QADD(*pSrc++, offset);
#else
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
*pDst++ = __QADD(*pSrc++, offset);
#else
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicOffset group
*/

View File

@ -0,0 +1,116 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_offset_q7.c
* Description: Q7 vector offset
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicOffset
@{
*/
/**
@brief Adds a constant offset to a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] offset is the offset to be added
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
*/
void arm_offset_q7(
const q7_t * pSrc,
q7_t offset,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t offset_packed; /* Offset packed to 32 bit */
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
offset_packed = __PACKq7(offset, offset, offset, offset);
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + offset */
#if defined (ARM_MATH_DSP)
/* Add offset and store result in destination buffer (4 samples at a time). */
write_q7x4_ia (&pDst, __QADD8(read_q7x4_ia ((q7_t **) &pSrc), offset_packed));
#else
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and store result in destination buffer. */
*pDst++ = (q7_t) __SSAT((q15_t) *pSrc++ + offset, 8);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicOffset group
*/

View File

@ -0,0 +1,159 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_scale_f32.c
* Description: Multiplies a floating-point vector by a scalar
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicScale Vector Scale
Multiply a vector by a scalar value. For floating-point data, the algorithm used is:
<pre>
pDst[n] = pSrc[n] * scale, 0 <= n < blockSize.
</pre>
In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
The shift allows the gain of the scaling operation to exceed 1.0.
The algorithm used with fixed-point data is:
<pre>
pDst[n] = (pSrc[n] * scaleFract) << shift, 0 <= n < blockSize.
</pre>
The overall scale factor applied to the fixed-point data is
<pre>
scale = scaleFract * 2^shift.
</pre>
The functions support in-place computation allowing the source and destination
pointers to reference the same memory buffer.
*/
/**
@addtogroup BasicScale
@{
*/
/**
@brief Multiplies a floating-point vector by a scalar.
@param[in] pSrc points to the input vector
@param[in] scale scale factor to be applied
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_scale_f32(
const float32_t *pSrc,
float32_t scale,
float32_t *pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
float32x4_t vec1;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale the input and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrc);
res = vmulq_f32(vec1, vdupq_n_f32(scale));
vst1q_f32(pDst, res);
/* Increment pointers */
pSrc += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
*pDst++ = (*pSrc++) * scale;
*pDst++ = (*pSrc++) * scale;
*pDst++ = (*pSrc++) * scale;
*pDst++ = (*pSrc++) * scale;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
*pDst++ = (*pSrc++) * scale;
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicScale group
*/

View File

@ -0,0 +1,144 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_scale_q15.c
* Description: Multiplies a Q15 vector by a scalar
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicScale
@{
*/
/**
@brief Multiplies a Q15 vector by a scalar.
@param[in] pSrc points to the input vector
@param[in] scaleFract fractional portion of the scale value
@param[in] shift number of bits to shift the result by
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.15 format.
These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format.
*/
void arm_scale_q15(
const q15_t *pSrc,
q15_t scaleFract,
int8_t shift,
q15_t *pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
int8_t kShift = 15 - shift; /* Shift to apply after scaling */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t inA1, inA2;
q31_t out1, out2, out3, out4; /* Temporary output variables */
q15_t in1, in2, in3, in4; /* Temporary input variables */
#endif
#endif
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * scale */
#if defined (ARM_MATH_DSP)
/* read 2 times 2 samples at a time from source */
inA1 = read_q15x2_ia ((q15_t **) &pSrc);
inA2 = read_q15x2_ia ((q15_t **) &pSrc);
/* Scale inputs and store result in temporary variables
* in single cycle by packing the outputs */
out1 = (q31_t) ((q15_t) (inA1 >> 16) * scaleFract);
out2 = (q31_t) ((q15_t) (inA1 ) * scaleFract);
out3 = (q31_t) ((q15_t) (inA2 >> 16) * scaleFract);
out4 = (q31_t) ((q15_t) (inA2 ) * scaleFract);
/* apply shifting */
out1 = out1 >> kShift;
out2 = out2 >> kShift;
out3 = out3 >> kShift;
out4 = out4 >> kShift;
/* saturate the output */
in1 = (q15_t) (__SSAT(out1, 16));
in2 = (q15_t) (__SSAT(out2, 16));
in3 = (q15_t) (__SSAT(out3, 16));
in4 = (q15_t) (__SSAT(out4, 16));
/* store result to destination */
write_q15x2_ia (&pDst, __PKHBT(in2, in1, 16));
write_q15x2_ia (&pDst, __PKHBT(in4, in3, 16));
#else
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
*pDst++ = (q15_t) (__SSAT(((q31_t) *pSrc++ * scaleFract) >> kShift, 16));
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicScale group
*/

View File

@ -0,0 +1,191 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_scale_q31.c
* Description: Multiplies a Q31 vector by a scalar
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicScale
@{
*/
/**
@brief Multiplies a Q31 vector by a scalar.
@param[in] pSrc points to the input vector
@param[in] scaleFract fractional portion of the scale value
@param[in] shift number of bits to shift the result by
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.
*/
void arm_scale_q31(
const q31_t *pSrc,
q31_t scaleFract,
int8_t shift,
q31_t *pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
q31_t in, out; /* Temporary variables */
int8_t kShift = shift + 1; /* Shift to apply after scaling */
int8_t sign = (kShift & 0x80);
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
in = *pSrc++; /* read input from source */
in = ((q63_t) in * scaleFract) >> 32; /* multiply input with scaler value */
out = in << kShift; /* apply shifting */
if (in != (out >> kShift)) /* saturate the result */
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out; /* Store result destination */
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in << kShift;
if (in != (out >> kShift))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in << kShift;
if (in != (out >> kShift))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in << kShift;
if (in != (out >> kShift))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
in = *pSrc++; /* read four inputs from source */
in = ((q63_t) in * scaleFract) >> 32; /* multiply input with scaler value */
out = in >> -kShift; /* apply shifting */
*pDst++ = out; /* Store result destination */
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in >> -kShift;
*pDst++ = out;
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in >> -kShift;
*pDst++ = out;
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in >> -kShift;
*pDst++ = out;
/* Decrement loop counter */
blkCnt--;
}
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in << kShift;
if (in != (out >> kShift))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in >> -kShift;
*pDst++ = out;
/* Decrement loop counter */
blkCnt--;
}
}
}
/**
@} end of BasicScale group
*/

View File

@ -0,0 +1,129 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_scale_q7.c
* Description: Multiplies a Q7 vector by a scalar
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicScale
@{
*/
/**
@brief Multiplies a Q7 vector by a scalar.
@param[in] pSrc points to the input vector
@param[in] scaleFract fractional portion of the scale value
@param[in] shift number of bits to shift the result by
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.7 format.
These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to 1.7 format.
*/
void arm_scale_q7(
const q7_t * pSrc,
q7_t scaleFract,
int8_t shift,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
int8_t kShift = 7 - shift; /* Shift to apply after scaling */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q7_t in1, in2, in3, in4; /* Temporary input variables */
q7_t out1, out2, out3, out4; /* Temporary output variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * scale */
#if defined (ARM_MATH_DSP)
/* Reading 4 inputs from memory */
in1 = *pSrc++;
in2 = *pSrc++;
in3 = *pSrc++;
in4 = *pSrc++;
/* Scale inputs and store result in the temporary variable. */
out1 = (q7_t) (__SSAT(((in1) * scaleFract) >> kShift, 8));
out2 = (q7_t) (__SSAT(((in2) * scaleFract) >> kShift, 8));
out3 = (q7_t) (__SSAT(((in3) * scaleFract) >> kShift, 8));
out4 = (q7_t) (__SSAT(((in4) * scaleFract) >> kShift, 8));
/* Pack and store result in destination buffer (in single write) */
write_q7x4_ia (&pDst, __PACKq7(out1, out2, out3, out4));
#else
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale input and store result in destination buffer. */
*pDst++ = (q7_t) (__SSAT((((q15_t) *pSrc++ * scaleFract) >> kShift), 8));
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicScale group
*/

View File

@ -0,0 +1,201 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_shift_q15.c
* Description: Shifts the elements of a Q15 vector by a specified number of bits
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicShift
@{
*/
/**
@brief Shifts the elements of a Q15 vector a specified number of bits
@param[in] pSrc points to the input vector
@param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
void arm_shift_q15(
const q15_t * pSrc,
int8_t shiftBits,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q15_t in1, in2; /* Temporary input variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
/* If the shift value is positive then do right shift else left shift */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A << shiftBits */
#if defined (ARM_MATH_DSP)
/* read 2 samples from source */
in1 = *pSrc++;
in2 = *pSrc++;
/* Shift the inputs and then store the results in the destination buffer. */
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pDst, __PKHBT(__SSAT((in1 << shiftBits), 16),
__SSAT((in2 << shiftBits), 16), 16));
#else
write_q15x2_ia (&pDst, __PKHBT(__SSAT((in2 << shiftBits), 16),
__SSAT((in1 << shiftBits), 16), 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* read 2 samples from source */
in1 = *pSrc++;
in2 = *pSrc++;
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pDst, __PKHBT(__SSAT((in1 << shiftBits), 16),
__SSAT((in2 << shiftBits), 16), 16));
#else
write_q15x2_ia (&pDst, __PKHBT(__SSAT((in2 << shiftBits), 16),
__SSAT((in1 << shiftBits), 16), 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
#else
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
#if defined (ARM_MATH_DSP)
/* read 2 samples from source */
in1 = *pSrc++;
in2 = *pSrc++;
/* Shift the inputs and then store the results in the destination buffer. */
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pDst, __PKHBT((in1 >> -shiftBits),
(in2 >> -shiftBits), 16));
#else
write_q15x2_ia (&pDst, __PKHBT((in2 >> -shiftBits),
(in1 >> -shiftBits), 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* read 2 samples from source */
in1 = *pSrc++;
in2 = *pSrc++;
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pDst, __PKHBT((in1 >> -shiftBits),
(in2 >> -shiftBits), 16));
#else
write_q15x2_ia (&pDst, __PKHBT((in2 >> -shiftBits),
(in1 >> -shiftBits), 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
#else
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
/* If the shift value is positive then do right shift else left shift */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Shift input and store result in destination buffer. */
*pDst++ = __SSAT(((q31_t) *pSrc++ << shiftBits), 16);
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Shift input and store result in destination buffer. */
*pDst++ = (*pSrc++ >> -shiftBits);
/* Decrement loop counter */
blkCnt--;
}
}
}
/**
@} end of BasicShift group
*/

View File

@ -0,0 +1,181 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_shift_q31.c
* Description: Shifts the elements of a Q31 vector by a specified number of bits
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicShift Vector Shift
Shifts the elements of a fixed-point vector by a specified number of bits.
There are separate functions for Q7, Q15, and Q31 data types.
The underlying algorithm used is:
<pre>
pDst[n] = pSrc[n] << shift, 0 <= n < blockSize.
</pre>
If <code>shift</code> is positive then the elements of the vector are shifted to the left.
If <code>shift</code> is negative then the elements of the vector are shifted to the right.
The functions support in-place computation allowing the source and destination
pointers to reference the same memory buffer.
*/
/**
@addtogroup BasicShift
@{
*/
/**
@brief Shifts the elements of a Q31 vector a specified number of bits.
@param[in] pSrc points to the input vector
@param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in the vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
*/
void arm_shift_q31(
const q31_t * pSrc,
int8_t shiftBits,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
#if defined (ARM_MATH_LOOPUNROLL)
q31_t in, out; /* Temporary variables */
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
/* If the shift value is positive then do right shift else left shift */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Shift input and store result in destination buffer. */
in = *pSrc++;
out = in << shiftBits;
if (in != (out >> shiftBits))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
in = *pSrc++;
out = in << shiftBits;
if (in != (out >> shiftBits))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
in = *pSrc++;
out = in << shiftBits;
if (in != (out >> shiftBits))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
in = *pSrc++;
out = in << shiftBits;
if (in != (out >> shiftBits))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Shift input and store results in destination buffer. */
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
/* Decrement loop counter */
blkCnt--;
}
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
/* If the shift value is positive then do right shift else left shift */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Shift input and store result in destination buffer. */
*pDst++ = clip_q63_to_q31((q63_t) *pSrc++ << shiftBits);
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Shift input and store result in destination buffer. */
*pDst++ = (*pSrc++ >> -shiftBits);
/* Decrement loop counter */
blkCnt--;
}
}
}
/**
@} end of BasicShift group
*/

View File

@ -0,0 +1,175 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_shift_q7.c
* Description: Processing function for the Q7 Shifting
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicShift
@{
*/
/**
@brief Shifts the elements of a Q7 vector a specified number of bits
@param[in] pSrc points to the input vector
@param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par onditions for optimum performance
Input and output buffers should be aligned by 32-bit
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
*/
void arm_shift_q7(
const q7_t * pSrc,
int8_t shiftBits,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q7_t in1, in2, in3, in4; /* Temporary input variables */
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
/* If the shift value is positive then do right shift else left shift */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A << shiftBits */
#if defined (ARM_MATH_DSP)
/* Read 4 inputs */
in1 = *pSrc++;
in2 = *pSrc++;
in3 = *pSrc++;
in4 = *pSrc++;
/* Pack and store result in destination buffer (in single write) */
write_q7x4_ia (&pDst, __PACKq7(__SSAT((in1 << shiftBits), 8),
__SSAT((in2 << shiftBits), 8),
__SSAT((in3 << shiftBits), 8),
__SSAT((in4 << shiftBits), 8) ));
#else
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
#if defined (ARM_MATH_DSP)
/* Read 4 inputs */
in1 = *pSrc++;
in2 = *pSrc++;
in3 = *pSrc++;
in4 = *pSrc++;
/* Pack and store result in destination buffer (in single write) */
write_q7x4_ia (&pDst, __PACKq7((in1 >> -shiftBits),
(in2 >> -shiftBits),
(in3 >> -shiftBits),
(in4 >> -shiftBits) ));
#else
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
*pDst++ = (*pSrc++ >> -shiftBits);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
/* If the shift value is positive then do right shift else left shift */
if (sign == 0U)
{
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Shift input and store result in destination buffer. */
*pDst++ = (q7_t) __SSAT(((q15_t) *pSrc++ << shiftBits), 8);
/* Decrement loop counter */
blkCnt--;
}
}
else
{
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Shift input and store result in destination buffer. */
*pDst++ = (*pSrc++ >> -shiftBits);
/* Decrement loop counter */
blkCnt--;
}
}
}
/**
@} end of BasicShift group
*/

View File

@ -0,0 +1,148 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sub_f32.c
* Description: Floating-point vector subtraction
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@defgroup BasicSub Vector Subtraction
Element-by-element subtraction of two vectors.
<pre>
pDst[n] = pSrcA[n] - pSrcB[n], 0 <= n < blockSize.
</pre>
There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
@addtogroup BasicSub
@{
*/
/**
@brief Floating-point vector subtraction.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_sub_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t vec2;
float32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and then store the results in the destination buffer. */
vec1 = vld1q_f32(pSrcA);
vec2 = vld1q_f32(pSrcB);
res = vsubq_f32(vec1, vec2);
vst1q_f32(pDst, res);
/* Increment pointers */
pSrcA += 4;
pSrcB += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and store result in destination buffer. */
*pDst++ = (*pSrcA++) - (*pSrcB++);
*pDst++ = (*pSrcA++) - (*pSrcB++);
*pDst++ = (*pSrcA++) - (*pSrcB++);
*pDst++ = (*pSrcA++) - (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
#endif /* #if defined(ARM_MATH_NEON) */
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and store result in destination buffer. */
*pDst++ = (*pSrcA++) - (*pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicSub group
*/

View File

@ -0,0 +1,126 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sub_q15.c
* Description: Q15 vector subtraction
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicSub
@{
*/
/**
@brief Q15 vector subtraction.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
void arm_sub_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_DSP)
q31_t inA1, inA2;
q31_t inB1, inB2;
#endif
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A - B */
#if defined (ARM_MATH_DSP)
/* read 2 times 2 samples at a time from sourceA */
inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
/* read 2 times 2 samples at a time from sourceB */
inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
/* Subtract and store 2 times 2 samples at a time */
write_q15x2_ia (&pDst, __QSUB16(inA1, inB1));
write_q15x2_ia (&pDst, __QSUB16(inA2, inB2));
#else
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
*pDst++ = (q15_t) __QSUB16(*pSrcA++, *pSrcB++);
#else
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrcA++ - *pSrcB++), 16);
#endif
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicSub group
*/

View File

@ -0,0 +1,108 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sub_q31.c
* Description: Q31 vector subtraction
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicSub
@{
*/
/**
@brief Q31 vector subtraction.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
*/
void arm_sub_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and store result in destination buffer. */
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and store result in destination buffer. */
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicSub group
*/

View File

@ -0,0 +1,109 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sub_q7.c
* Description: Q7 vector subtraction
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMath
*/
/**
@addtogroup BasicSub
@{
*/
/**
@brief Q7 vector subtraction.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
@par Scaling and Overflow Behavior
The function uses saturating arithmetic.
Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
*/
void arm_sub_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A - B */
#if defined (ARM_MATH_DSP)
/* Subtract and store result in destination buffer (4 samples at a time). */
write_q7x4_ia (&pDst, __QSUB8(read_q7x4_ia ((q7_t **) &pSrcA), read_q7x4_ia ((q7_t **) &pSrcB)));
#else
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
#endif
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and store result in destination buffer. */
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of BasicSub group
*/