- Sort Score
- Result 10 results
- Languages All
Results 1 - 10 of 16 for VMOVDQU (0.09 sec)
-
src/runtime/memmove_amd64.s
// The tail will be put on its place after main body copying. // It's time for the unaligned heading part. VMOVDQU (SI), Y4 // Adjust source address to point past head. ADDQ R11, SI SUBQ AX, BX // Aligned memory copying there gobble_128_loop: VMOVDQU (SI), Y0 VMOVDQU 0x20(SI), Y1 VMOVDQU 0x40(SI), Y2 VMOVDQU 0x60(SI), Y3 ADDQ AX, SI VMOVDQA Y0, (DI) VMOVDQA Y1, 0x20(DI) VMOVDQA Y2, 0x40(DI)
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Sun Apr 10 15:52:08 UTC 2022 - 12.5K bytes - Viewed (0) -
src/runtime/memclr_amd64.s
CMPQ BX, $0x2000000 JAE loop_preheader_avx2_huge loop_avx2: VMOVDQU Y0, 0(DI) VMOVDQU Y0, 32(DI) VMOVDQU Y0, 64(DI) VMOVDQU Y0, 96(DI) SUBQ $128, BX ADDQ $128, DI CMPQ BX, $128 JAE loop_avx2 VMOVDQU Y0, -32(DI)(BX*1) VMOVDQU Y0, -64(DI)(BX*1) VMOVDQU Y0, -96(DI)(BX*1) VMOVDQU Y0, -128(DI)(BX*1) VZEROUPPER RET loop_preheader_erms: #ifndef hasAVX2
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Tue May 10 20:52:34 UTC 2022 - 4.9K bytes - Viewed (0) -
src/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s
VMOVDQU CC3, (0*32)(oup); VMOVDQU AA0, (1*32)(oup); VMOVDQU BB0, (2*32)(oup); VMOVDQU CC0, (3*32)(oup) VPERM2I128 $0x02, AA1, BB1, AA0; VPERM2I128 $0x02, CC1, DD1, BB0; VPERM2I128 $0x13, AA1, BB1, CC0; VPERM2I128 $0x13, CC1, DD1, DD0 VPXOR (4*32)(inp), AA0, AA0; VPXOR (5*32)(inp), BB0, BB0; VPXOR (6*32)(inp), CC0, CC0; VPXOR (7*32)(inp), DD0, DD0
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Wed Nov 29 21:28:33 UTC 2023 - 105.6K bytes - Viewed (0) -
src/internal/bytealg/index_amd64.s
_32_or_more: CMPQ AX, $32 JA _33_to_63 VMOVDQU (R8), Y1 LEAQ -31(DI)(DX*1), DX loop32: VMOVDQU (DI), Y2 VPCMPEQB Y1, Y2, Y3 VPMOVMSKB Y3, SI CMPL SI, $0xffffffff JE success_avx2 ADDQ $1,DI CMPQ DI,DX JB loop32 JMP fail_avx2 _33_to_63: LEAQ 1(DI)(DX*1), DX SUBQ AX, DX VMOVDQU -32(R8)(AX*1), Y0 VMOVDQU (R8), Y1 loop33to63: VMOVDQU (DI), Y2 VPCMPEQB Y1, Y2, Y3
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Mon Aug 07 00:20:48 UTC 2023 - 5.1K bytes - Viewed (0) -
src/internal/bytealg/equal_amd64.s
JEQ hugeloop XORQ AX, AX // return 0 RET #endif // 64 bytes at a time using ymm registers PCALIGN $16 hugeloop_avx2: CMPQ BX, $64 JB bigloop_avx2 VMOVDQU (SI), Y0 VMOVDQU (DI), Y1 VMOVDQU 32(SI), Y2 VMOVDQU 32(DI), Y3 VPCMPEQB Y1, Y0, Y4 VPCMPEQB Y2, Y3, Y5 VPAND Y4, Y5, Y6 VPMOVMSKB Y6, DX ADDQ $64, SI ADDQ $64, DI SUBQ $64, BX CMPL DX, $0xffffffff JEQ hugeloop_avx2
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Fri Nov 17 16:34:40 UTC 2023 - 2.8K bytes - Viewed (0) -
src/internal/bytealg/count_amd64.s
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1 JNE sse #endif MOVD AX, X0 LEAQ -64(SI)(BX*1), R11 LEAQ (SI)(BX*1), R13 VPBROADCASTB X0, Y1 PCALIGN $32 avx2_loop: VMOVDQU (DI), Y2 VMOVDQU 32(DI), Y4 VPCMPEQB Y1, Y2, Y3 VPCMPEQB Y1, Y4, Y5 VPMOVMSKB Y3, DX VPMOVMSKB Y5, CX POPCNTL DX, DX POPCNTL CX, CX ADDQ DX, R12 ADDQ CX, R12 ADDQ $64, DI CMPQ DI, R11
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Fri Oct 06 20:54:43 UTC 2023 - 4.7K bytes - Viewed (0) -
src/internal/bytealg/compare_amd64.s
SUBQ $64, R8 CMPQ R8, $64 JBE loop JMP big_loop #endif // Compare 64-bytes per loop iteration. // Loop is unrolled and uses AVX2. big_loop_avx2: VMOVDQU (SI), Y2 VMOVDQU (DI), Y3 VMOVDQU 32(SI), Y4 VMOVDQU 32(DI), Y5 VPCMPEQB Y2, Y3, Y0 VPMOVMSKB Y0, AX XORL $0xffffffff, AX JNE diff32_avx2 VPCMPEQB Y4, Y5, Y6 VPMOVMSKB Y6, AX XORL $0xffffffff, AX JNE diff64_avx2
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Thu Aug 18 17:17:01 UTC 2022 - 4.3K bytes - Viewed (0) -
src/crypto/sha256/sha256block_amd64.s
MOVL 24(CTX), g // g = H6 MOVL 28(CTX), h // h = H7 avx2_loop0: // at each iteration works with one block (512 bit) VMOVDQU (0*32)(INP), XTMP0 VMOVDQU (1*32)(INP), XTMP1 VMOVDQU (2*32)(INP), XTMP2 VMOVDQU (3*32)(INP), XTMP3 VMOVDQU flip_mask<>(SB), BYTE_FLIP_MASK // Apply Byte Flip Mask: LE -> BE VPSHUFB BYTE_FLIP_MASK, XTMP0, XTMP0 VPSHUFB BYTE_FLIP_MASK, XTMP1, XTMP1
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Mon Mar 04 17:29:44 UTC 2024 - 47.3K bytes - Viewed (0) -
src/internal/bytealg/indexbyte_amd64.s
JNE sse #endif MOVD AX, X0 LEAQ -32(SI)(BX*1), R11 VPBROADCASTB X0, Y1 PCALIGN $32 avx2_loop: VMOVDQU (DI), Y2 VPCMPEQB Y1, Y2, Y3 VPTEST Y3, Y3 JNZ avx2success ADDQ $32, DI CMPQ DI, R11 JLT avx2_loop MOVQ R11, DI VMOVDQU (DI), Y2 VPCMPEQB Y1, Y2, Y3 VPTEST Y3, Y3 JNZ avx2success VZEROUPPER MOVQ $-1, (R8) RET avx2success:
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Wed Nov 01 19:06:01 UTC 2023 - 3.1K bytes - Viewed (0) -
src/crypto/sha512/sha512block_amd64.s
Registered: Wed Jun 12 16:32:35 UTC 2024 - Last Modified: Mon Mar 04 17:29:44 UTC 2024 - 27K bytes - Viewed (0)