Mercurial > pmdwin
view memcpy.S @ 0:c55ea9478c80
Hello Gensokyo!
author | Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> |
---|---|
date | Tue, 21 May 2013 10:29:21 +0200 |
parents | |
children |
line wrap: on
line source
.text .global memcpy .type memcpy,%function memcpy: /* Cutoff for the big loop is a size of 64 bytes since otherwise the loop will never be entered. */ cmpq $64, %rdx movq %rdx, %rcx /* We need this in any case. */ cld jbe 1f /* Align destination. */ movq %rdi, %rax negq %rax andq $15, %rax subq %rax, %rcx xchgq %rax, %rcx rep; movsb movq %rax, %rcx subq $64, %rcx js 2f /* Next 3 insns are 11 bytes total, make sure we decode them in one go */ .p2align 4,,11 3: /* Now correct the loop counter. Please note that in the following code the flags are not changed anymore. */ subq $64, %rcx movups (%rsi), %xmm0 movups 16(%rsi), %xmm1 movups 32(%rsi), %xmm2 movups 48(%rsi), %xmm3 movaps %xmm0, (%rdi) movaps %xmm1, 16(%rdi) movaps %xmm2, 32(%rdi) movaps %xmm3, 48(%rdi) leaq 64(%rsi), %rsi leaq 64(%rdi), %rdi jns 3b /* Correct extra loop counter modification. */ 2: addq $64, %rcx 1: movq %rcx, %r10 shr $2, %rcx rep; movsd movq %r10, %rcx andq $3, %rcx rep; movsb movq %rdi, %rax /* Set return value. */ ret .size memcpy,.-memcpy