- Sort Score
- Result 10 results
- Languages All
Results 1 - 6 of 6 for VMOVDQU (0.07 sec)
-
src/runtime/memclr_amd64.s
CMPQ BX, $0x2000000 JAE loop_preheader_avx2_huge loop_avx2: VMOVDQU Y0, 0(DI) VMOVDQU Y0, 32(DI) VMOVDQU Y0, 64(DI) VMOVDQU Y0, 96(DI) SUBQ $128, BX ADDQ $128, DI CMPQ BX, $128 JAE loop_avx2 VMOVDQU Y0, -32(DI)(BX*1) VMOVDQU Y0, -64(DI)(BX*1) VMOVDQU Y0, -96(DI)(BX*1) VMOVDQU Y0, -128(DI)(BX*1) VZEROUPPER RET loop_preheader_erms: #ifndef hasAVX2
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Tue May 10 20:52:34 UTC 2022 - 4.9K bytes - Viewed (0) -
src/internal/bytealg/index_amd64.s
_32_or_more: CMPQ AX, $32 JA _33_to_63 VMOVDQU (R8), Y1 LEAQ -31(DI)(DX*1), DX loop32: VMOVDQU (DI), Y2 VPCMPEQB Y1, Y2, Y3 VPMOVMSKB Y3, SI CMPL SI, $0xffffffff JE success_avx2 ADDQ $1,DI CMPQ DI,DX JB loop32 JMP fail_avx2 _33_to_63: LEAQ 1(DI)(DX*1), DX SUBQ AX, DX VMOVDQU -32(R8)(AX*1), Y0 VMOVDQU (R8), Y1 loop33to63: VMOVDQU (DI), Y2 VPCMPEQB Y1, Y2, Y3
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Mon Aug 07 00:20:48 UTC 2023 - 5.1K bytes - Viewed (0) -
src/internal/bytealg/equal_amd64.s
JEQ hugeloop XORQ AX, AX // return 0 RET #endif // 64 bytes at a time using ymm registers PCALIGN $16 hugeloop_avx2: CMPQ BX, $64 JB bigloop_avx2 VMOVDQU (SI), Y0 VMOVDQU (DI), Y1 VMOVDQU 32(SI), Y2 VMOVDQU 32(DI), Y3 VPCMPEQB Y1, Y0, Y4 VPCMPEQB Y2, Y3, Y5 VPAND Y4, Y5, Y6 VPMOVMSKB Y6, DX ADDQ $64, SI ADDQ $64, DI SUBQ $64, BX CMPL DX, $0xffffffff JEQ hugeloop_avx2
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Fri Nov 17 16:34:40 UTC 2023 - 2.8K bytes - Viewed (0) -
src/internal/bytealg/count_amd64.s
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1 JNE sse #endif MOVD AX, X0 LEAQ -64(SI)(BX*1), R11 LEAQ (SI)(BX*1), R13 VPBROADCASTB X0, Y1 PCALIGN $32 avx2_loop: VMOVDQU (DI), Y2 VMOVDQU 32(DI), Y4 VPCMPEQB Y1, Y2, Y3 VPCMPEQB Y1, Y4, Y5 VPMOVMSKB Y3, DX VPMOVMSKB Y5, CX POPCNTL DX, DX POPCNTL CX, CX ADDQ DX, R12 ADDQ CX, R12 ADDQ $64, DI CMPQ DI, R11
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Fri Oct 06 20:54:43 UTC 2023 - 4.7K bytes - Viewed (0) -
src/internal/bytealg/compare_amd64.s
SUBQ $64, R8 CMPQ R8, $64 JBE loop JMP big_loop #endif // Compare 64-bytes per loop iteration. // Loop is unrolled and uses AVX2. big_loop_avx2: VMOVDQU (SI), Y2 VMOVDQU (DI), Y3 VMOVDQU 32(SI), Y4 VMOVDQU 32(DI), Y5 VPCMPEQB Y2, Y3, Y0 VPMOVMSKB Y0, AX XORL $0xffffffff, AX JNE diff32_avx2 VPCMPEQB Y4, Y5, Y6 VPMOVMSKB Y6, AX XORL $0xffffffff, AX JNE diff64_avx2
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Thu Aug 18 17:17:01 UTC 2022 - 4.3K bytes - Viewed (0) -
src/internal/bytealg/indexbyte_amd64.s
JNE sse #endif MOVD AX, X0 LEAQ -32(SI)(BX*1), R11 VPBROADCASTB X0, Y1 PCALIGN $32 avx2_loop: VMOVDQU (DI), Y2 VPCMPEQB Y1, Y2, Y3 VPTEST Y3, Y3 JNZ avx2success ADDQ $32, DI CMPQ DI, R11 JLT avx2_loop MOVQ R11, DI VMOVDQU (DI), Y2 VPCMPEQB Y1, Y2, Y3 VPTEST Y3, Y3 JNZ avx2success VZEROUPPER MOVQ $-1, (R8) RET avx2success:
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Wed Nov 01 19:06:01 UTC 2023 - 3.1K bytes - Viewed (0)