RuhNetConsulting
/
rtpengine
mirror of https://github.com/sipwise/rtpengine


								#if defined(__linux__) && defined(__ELF__)

								.section	.note.GNU-stack,"",%progbits

								#endif


								#if defined(__x86_64__)


								.global mvr2s_avx512


								.text


									# void mvr2s_avx512(float *in, const uint16_t len, int16_t *out);

									# convert float array to int16 array with rounding and int16 saturation

								mvr2s_avx512:

									ldmxcsr csr(%rip)	# set "round to nearest"


									mov %rsi, %rax

									and $-16, %al		# 16 samples at a time


									xor %rcx, %rcx

								loop:

									cmp %rax, %rcx

									jge remainder


									vmovups (%rdi,%rcx,4), %zmm0	# load, 32-bit size


									# v16_float = {-2, -2.20000005, -1.70000005, -1.5, 0, 0, 2, 2.20000005, 1.70000005, 1.5, -19187.207, 15405.2158, -4437.91748, -18747.3066, -3701.35034, -19959.6738},

									#    ->

									# v16_int32 = {-2, -2, -2, -2, 0, 0, 2, 2, 2, 2, -19187, 15405, -4438, -18747, -3701, -19960},

									vcvtps2dq %zmm0, %zmm1


									# v16_int32 = {-2, -2, -2, -2, 0, 0, 2, 2, 2, 2, -19187, 15405, -4438, -18747, -3701, -19960},

									#    ->

									# v16_int16 = {-2, -2, -2, -2, 0, 0, 2, 2, 2, 2, -19187, 15405, -4438, -18747, -3701, -19960},

									vpmovsdw %zmm1, %ymm0


									vmovdqu %ymm0, (%rdx,%rcx,2)	# store, 16-bit size


									add $16, %rcx		# 16 samples at a time

									jmp loop


								remainder:

									cmp %rsi, %rcx

									jge done


									vmovss (%rdi,%rcx,4), %xmm0

									vcvtps2dq %ymm0, %ymm1

									vpmovsdw %ymm1, %xmm0

									vpextrw $0, %xmm0, (%rdx,%rcx,2)


									inc %rcx

									jmp remainder


								done:

									ret


								.data


								csr:

									.byte 0x80, 0x1f, 0x00, 0x00	# [ IM DM ZM OM UM PM ]


								#endif