#if defined(__linux__) && defined(__ELF__)
|
|
.section .note.GNU-stack,"",%progbits
|
|
#endif
|
|
|
|
#if defined(__x86_64__)
|
|
|
|
.global mvr2s_avx512
|
|
|
|
.text
|
|
|
|
# void mvr2s_avx512(float *in, const uint16_t len, int16_t *out);
|
|
# convert float array to int16 array with rounding and int16 saturation
|
|
mvr2s_avx512:
|
|
ldmxcsr csr(%rip) # set "round to nearest"
|
|
|
|
mov %rsi, %rax
|
|
and $-16, %al # 16 samples at a time
|
|
|
|
xor %rcx, %rcx
|
|
loop:
|
|
cmp %rax, %rcx
|
|
jge remainder
|
|
|
|
vmovups (%rdi,%rcx,4), %zmm0 # load, 32-bit size
|
|
|
|
# v16_float = {-2, -2.20000005, -1.70000005, -1.5, 0, 0, 2, 2.20000005, 1.70000005, 1.5, -19187.207, 15405.2158, -4437.91748, -18747.3066, -3701.35034, -19959.6738},
|
|
# ->
|
|
# v16_int32 = {-2, -2, -2, -2, 0, 0, 2, 2, 2, 2, -19187, 15405, -4438, -18747, -3701, -19960},
|
|
vcvtps2dq %zmm0, %zmm1
|
|
|
|
# v16_int32 = {-2, -2, -2, -2, 0, 0, 2, 2, 2, 2, -19187, 15405, -4438, -18747, -3701, -19960},
|
|
# ->
|
|
# v16_int16 = {-2, -2, -2, -2, 0, 0, 2, 2, 2, 2, -19187, 15405, -4438, -18747, -3701, -19960},
|
|
vpmovsdw %zmm1, %ymm0
|
|
|
|
vmovdqu %ymm0, (%rdx,%rcx,2) # store, 16-bit size
|
|
|
|
add $16, %rcx # 16 samples at a time
|
|
jmp loop
|
|
|
|
remainder:
|
|
cmp %rsi, %rcx
|
|
jge done
|
|
|
|
vmovss (%rdi,%rcx,4), %xmm0
|
|
vcvtps2dq %ymm0, %ymm1
|
|
vpmovsdw %ymm1, %xmm0
|
|
vpextrw $0, %xmm0, (%rdx,%rcx,2)
|
|
|
|
inc %rcx
|
|
jmp remainder
|
|
|
|
done:
|
|
ret
|
|
|
|
.data
|
|
|
|
csr:
|
|
.byte 0x80, 0x1f, 0x00, 0x00 # [ IM DM ZM OM UM PM ]
|
|
|
|
#endif
|