Search Options

Results per page
Sort
Preferred Languages
Advance

Results 1 - 6 of 6 for VMOVDQU (0.07 sec)

  1. src/runtime/memclr_amd64.s

    	CMPQ    BX, $0x2000000
    	JAE	loop_preheader_avx2_huge
    
    loop_avx2:
    	VMOVDQU	Y0, 0(DI)
    	VMOVDQU	Y0, 32(DI)
    	VMOVDQU	Y0, 64(DI)
    	VMOVDQU	Y0, 96(DI)
    	SUBQ	$128, BX
    	ADDQ	$128, DI
    	CMPQ	BX, $128
    	JAE	loop_avx2
    	VMOVDQU  Y0, -32(DI)(BX*1)
    	VMOVDQU  Y0, -64(DI)(BX*1)
    	VMOVDQU  Y0, -96(DI)(BX*1)
    	VMOVDQU  Y0, -128(DI)(BX*1)
    	VZEROUPPER
    	RET
    
    loop_preheader_erms:
    #ifndef hasAVX2
    Registered: Wed Jun 12 16:32:35 UTC 2024
    - Last Modified: Tue May 10 20:52:34 UTC 2022
    - 4.9K bytes
    - Viewed (0)
  2. src/internal/bytealg/index_amd64.s

    _32_or_more:
    	CMPQ AX, $32
    	JA   _33_to_63
    	VMOVDQU (R8), Y1
    	LEAQ -31(DI)(DX*1), DX
    loop32:
    	VMOVDQU (DI), Y2
    	VPCMPEQB Y1, Y2, Y3
    	VPMOVMSKB Y3, SI
    	CMPL  SI, $0xffffffff
    	JE   success_avx2
    	ADDQ $1,DI
    	CMPQ DI,DX
    	JB loop32
    	JMP fail_avx2
    _33_to_63:
    	LEAQ 1(DI)(DX*1), DX
    	SUBQ AX, DX
    	VMOVDQU -32(R8)(AX*1), Y0
    	VMOVDQU (R8), Y1
    loop33to63:
    	VMOVDQU (DI), Y2
    	VPCMPEQB Y1, Y2, Y3
    Registered: Wed Jun 12 16:32:35 UTC 2024
    - Last Modified: Mon Aug 07 00:20:48 UTC 2023
    - 5.1K bytes
    - Viewed (0)
  3. src/internal/bytealg/equal_amd64.s

    	JEQ	hugeloop
    	XORQ	AX, AX	// return 0
    	RET
    #endif
    
    	// 64 bytes at a time using ymm registers
    	PCALIGN $16
    hugeloop_avx2:
    	CMPQ	BX, $64
    	JB	bigloop_avx2
    	VMOVDQU	(SI), Y0
    	VMOVDQU	(DI), Y1
    	VMOVDQU	32(SI), Y2
    	VMOVDQU	32(DI), Y3
    	VPCMPEQB	Y1, Y0, Y4
    	VPCMPEQB	Y2, Y3, Y5
    	VPAND	Y4, Y5, Y6
    	VPMOVMSKB Y6, DX
    	ADDQ	$64, SI
    	ADDQ	$64, DI
    	SUBQ	$64, BX
    	CMPL	DX, $0xffffffff
    	JEQ	hugeloop_avx2
    Registered: Wed Jun 12 16:32:35 UTC 2024
    - Last Modified: Fri Nov 17 16:34:40 UTC 2023
    - 2.8K bytes
    - Viewed (0)
  4. src/internal/bytealg/count_amd64.s

    	CMPB   internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
    	JNE sse
    #endif
    	MOVD AX, X0
    	LEAQ -64(SI)(BX*1), R11
    	LEAQ (SI)(BX*1), R13
    	VPBROADCASTB  X0, Y1
    	PCALIGN $32
    avx2_loop:
    	VMOVDQU (DI), Y2
    	VMOVDQU 32(DI), Y4
    	VPCMPEQB Y1, Y2, Y3
    	VPCMPEQB Y1, Y4, Y5
    	VPMOVMSKB Y3, DX
    	VPMOVMSKB Y5, CX
    	POPCNTL DX, DX
    	POPCNTL CX, CX
    	ADDQ DX, R12
    	ADDQ CX, R12
    	ADDQ $64, DI
    	CMPQ DI, R11
    Registered: Wed Jun 12 16:32:35 UTC 2024
    - Last Modified: Fri Oct 06 20:54:43 UTC 2023
    - 4.7K bytes
    - Viewed (0)
  5. src/internal/bytealg/compare_amd64.s

    	SUBQ	$64, R8
    	CMPQ	R8, $64
    	JBE	loop
    	JMP	big_loop
    #endif
    
    	// Compare 64-bytes per loop iteration.
    	// Loop is unrolled and uses AVX2.
    big_loop_avx2:
    	VMOVDQU	(SI), Y2
    	VMOVDQU	(DI), Y3
    	VMOVDQU	32(SI), Y4
    	VMOVDQU	32(DI), Y5
    	VPCMPEQB Y2, Y3, Y0
    	VPMOVMSKB Y0, AX
    	XORL	$0xffffffff, AX
    	JNE	diff32_avx2
    	VPCMPEQB Y4, Y5, Y6
    	VPMOVMSKB Y6, AX
    	XORL	$0xffffffff, AX
    	JNE	diff64_avx2
    Registered: Wed Jun 12 16:32:35 UTC 2024
    - Last Modified: Thu Aug 18 17:17:01 UTC 2022
    - 4.3K bytes
    - Viewed (0)
  6. src/internal/bytealg/indexbyte_amd64.s

    	JNE sse
    #endif
    	MOVD AX, X0
    	LEAQ -32(SI)(BX*1), R11
    	VPBROADCASTB  X0, Y1
    
    	PCALIGN $32
    avx2_loop:
    	VMOVDQU (DI), Y2
    	VPCMPEQB Y1, Y2, Y3
    	VPTEST Y3, Y3
    	JNZ avx2success
    	ADDQ $32, DI
    	CMPQ DI, R11
    	JLT avx2_loop
    	MOVQ R11, DI
    	VMOVDQU (DI), Y2
    	VPCMPEQB Y1, Y2, Y3
    	VPTEST Y3, Y3
    	JNZ avx2success
    	VZEROUPPER
    	MOVQ $-1, (R8)
    	RET
    
    avx2success:
    Registered: Wed Jun 12 16:32:35 UTC 2024
    - Last Modified: Wed Nov 01 19:06:01 UTC 2023
    - 3.1K bytes
    - Viewed (0)
Back to top