Mercurial > pmdwin
changeset 8:a7e3a45d3002
Remove the embedded memcpy implementation.
author | Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> |
---|---|
date | Mon, 08 Sep 2014 17:24:29 +0200 |
parents | ceda140f42fb |
children | 4e34698c666b |
files | memcpy.S |
diffstat | 1 files changed, 0 insertions(+), 62 deletions(-) [+] |
line wrap: on
line diff
deleted file mode 100644 --- a/memcpy.S +++ /dev/null @@ -1,62 +0,0 @@ - .text -.global memcpy -.type memcpy,%function -memcpy: - /* Cutoff for the big loop is a size of 64 bytes since otherwise - the loop will never be entered. */ - cmpq $64, %rdx - movq %rdx, %rcx - - /* We need this in any case. */ - cld - jbe 1f - - /* Align destination. */ - movq %rdi, %rax - negq %rax - andq $15, %rax - subq %rax, %rcx - xchgq %rax, %rcx - - rep; movsb - - movq %rax, %rcx - subq $64, %rcx - js 2f - - /* Next 3 insns are 11 bytes total, make sure we decode them in one go */ - .p2align 4,,11 -3: - /* Now correct the loop counter. Please note that in the following - code the flags are not changed anymore. */ - subq $64, %rcx - - movups (%rsi), %xmm0 - movups 16(%rsi), %xmm1 - movups 32(%rsi), %xmm2 - movups 48(%rsi), %xmm3 - movaps %xmm0, (%rdi) - movaps %xmm1, 16(%rdi) - movaps %xmm2, 32(%rdi) - movaps %xmm3, 48(%rdi) - - leaq 64(%rsi), %rsi - leaq 64(%rdi), %rdi - - jns 3b - - /* Correct extra loop counter modification. */ -2: addq $64, %rcx -1: - movq %rcx, %r10 - shr $2, %rcx - rep; movsd - - movq %r10, %rcx - andq $3, %rcx - rep; movsb - - movq %rdi, %rax /* Set return value. */ - ret -.size memcpy,.-memcpy -