GNU Radio 3.5.3.2 C++ API
volk_32f_x2_multiply_32f_a.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H
2 #define INCLUDED_volk_32f_x2_multiply_32f_a_H
3 
4 #include <inttypes.h>
5 #include <stdio.h>
6 
7 #ifdef LV_HAVE_SSE
8 #include <xmmintrin.h>
9 /*!
10  \brief Multiplys the two input vectors and store their results in the third vector
11  \param cVector The vector where the results will be stored
12  \param aVector One of the vectors to be multiplied
13  \param bVector One of the vectors to be multiplied
14  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
15 */
16 static inline void volk_32f_x2_multiply_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
17  unsigned int number = 0;
18  const unsigned int quarterPoints = num_points / 4;
19 
20  float* cPtr = cVector;
21  const float* aPtr = aVector;
22  const float* bPtr= bVector;
23 
24  __m128 aVal, bVal, cVal;
25  for(;number < quarterPoints; number++){
26 
27  aVal = _mm_load_ps(aPtr);
28  bVal = _mm_load_ps(bPtr);
29 
30  cVal = _mm_mul_ps(aVal, bVal);
31 
32  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
33 
34  aPtr += 4;
35  bPtr += 4;
36  cPtr += 4;
37  }
38 
39  number = quarterPoints * 4;
40  for(;number < num_points; number++){
41  *cPtr++ = (*aPtr++) * (*bPtr++);
42  }
43 }
44 #endif /* LV_HAVE_SSE */
45 
46 #ifdef LV_HAVE_AVX
47 #include <immintrin.h>
48 /*!
49  \brief Multiplies the two input vectors and store their results in the third vector
50  \param cVector The vector where the results will be stored
51  \param aVector One of the vectors to be multiplied
52  \param bVector One of the vectors to be multiplied
53  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
54 */
55 static inline void volk_32f_x2_multiply_32f_a_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
56  unsigned int number = 0;
57  const unsigned int eighthPoints = num_points / 8;
58 
59  float* cPtr = cVector;
60  const float* aPtr = aVector;
61  const float* bPtr= bVector;
62 
63  __m256 aVal, bVal, cVal;
64  for(;number < eighthPoints; number++){
65 
66  aVal = _mm256_load_ps(aPtr);
67  bVal = _mm256_load_ps(bPtr);
68 
69  cVal = _mm256_mul_ps(aVal, bVal);
70 
71  _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
72 
73  aPtr += 8;
74  bPtr += 8;
75  cPtr += 8;
76  }
77 
78  number = eighthPoints * 8;
79  for(;number < num_points; number++){
80  *cPtr++ = (*aPtr++) * (*bPtr++);
81  }
82 }
83 #endif /* LV_HAVE_AVX */
84 
85 #ifdef LV_HAVE_GENERIC
86 /*!
87  \brief Multiplys the two input vectors and store their results in the third vector
88  \param cVector The vector where the results will be stored
89  \param aVector One of the vectors to be multiplied
90  \param bVector One of the vectors to be multiplied
91  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
92 */
93 static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
94  float* cPtr = cVector;
95  const float* aPtr = aVector;
96  const float* bPtr= bVector;
97  unsigned int number = 0;
98 
99  for(number = 0; number < num_points; number++){
100  *cPtr++ = (*aPtr++) * (*bPtr++);
101  }
102 }
103 #endif /* LV_HAVE_GENERIC */
104 
105 #ifdef LV_HAVE_ORC
106 /*!
107  \brief Multiplys the two input vectors and store their results in the third vector
108  \param cVector The vector where the results will be stored
109  \param aVector One of the vectors to be multiplied
110  \param bVector One of the vectors to be multiplied
111  \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
112 */
113 extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
114 static inline void volk_32f_x2_multiply_32f_a_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
115  volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points);
116 }
117 #endif /* LV_HAVE_ORC */
118 
119 
120 #endif /* INCLUDED_volk_32f_x2_multiply_32f_a_H */