1 #ifndef INCLUDED_volk_16i_s32f_convert_32f_u_H
2 #define INCLUDED_volk_16i_s32f_convert_32f_u_H
18 static inline void volk_16i_s32f_convert_32f_u_sse4_1(
float* outputVector,
const int16_t* inputVector,
const float scalar,
unsigned int num_points){
19 unsigned int number = 0;
20 const unsigned int eighthPoints = num_points / 8;
22 float* outputVectorPtr = outputVector;
23 __m128 invScalar = _mm_set_ps1(1.0/scalar);
29 for(;number < eighthPoints; number++){
32 inputVal = _mm_loadu_si128((__m128i*)inputPtr);
35 inputVal2 = _mm_srli_si128(inputVal, 8);
38 inputVal = _mm_cvtepi16_epi32(inputVal);
39 inputVal2 = _mm_cvtepi16_epi32(inputVal2);
41 ret = _mm_cvtepi32_ps(inputVal);
42 ret = _mm_mul_ps(ret, invScalar);
43 _mm_storeu_ps(outputVectorPtr, ret);
46 ret = _mm_cvtepi32_ps(inputVal2);
47 ret = _mm_mul_ps(ret, invScalar);
48 _mm_storeu_ps(outputVectorPtr, ret);
55 number = eighthPoints * 8;
56 for(; number < num_points; number++){
57 outputVector[number] =((float)(inputVector[number])) / scalar;
63 #include <xmmintrin.h>
73 static inline void volk_16i_s32f_convert_32f_u_sse(
float* outputVector,
const int16_t* inputVector,
const float scalar,
unsigned int num_points){
74 unsigned int number = 0;
75 const unsigned int quarterPoints = num_points / 4;
77 float* outputVectorPtr = outputVector;
78 __m128 invScalar = _mm_set_ps1(1.0/scalar);
82 for(;number < quarterPoints; number++){
83 ret = _mm_set_ps((
float)(inputPtr[3]), (
float)(inputPtr[2]), (
float)(inputPtr[1]), (
float)(inputPtr[0]));
85 ret = _mm_mul_ps(ret, invScalar);
86 _mm_storeu_ps(outputVectorPtr, ret);
92 number = quarterPoints * 4;
93 for(; number < num_points; number++){
94 outputVector[number] = (float)(inputVector[number]) / scalar;
99 #ifdef LV_HAVE_GENERIC
108 static inline void volk_16i_s32f_convert_32f_u_generic(
float* outputVector,
const int16_t* inputVector,
const float scalar,
unsigned int num_points){
109 float* outputVectorPtr = outputVector;
110 const int16_t* inputVectorPtr = inputVector;
111 unsigned int number = 0;
113 for(number = 0; number < num_points; number++){
114 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;