comparison memcpy.S @ 0:c55ea9478c80

Hello Gensokyo!
author Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
date Tue, 21 May 2013 10:29:21 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c55ea9478c80
1 .text
2 .global memcpy
3 .type memcpy,%function
4 memcpy:
5 /* Cutoff for the big loop is a size of 64 bytes since otherwise
6 the loop will never be entered. */
7 cmpq $64, %rdx
8 movq %rdx, %rcx
9
10 /* We need this in any case. */
11 cld
12 jbe 1f
13
14 /* Align destination. */
15 movq %rdi, %rax
16 negq %rax
17 andq $15, %rax
18 subq %rax, %rcx
19 xchgq %rax, %rcx
20
21 rep; movsb
22
23 movq %rax, %rcx
24 subq $64, %rcx
25 js 2f
26
27 /* Next 3 insns are 11 bytes total, make sure we decode them in one go */
28 .p2align 4,,11
29 3:
30 /* Now correct the loop counter. Please note that in the following
31 code the flags are not changed anymore. */
32 subq $64, %rcx
33
34 movups (%rsi), %xmm0
35 movups 16(%rsi), %xmm1
36 movups 32(%rsi), %xmm2
37 movups 48(%rsi), %xmm3
38 movaps %xmm0, (%rdi)
39 movaps %xmm1, 16(%rdi)
40 movaps %xmm2, 32(%rdi)
41 movaps %xmm3, 48(%rdi)
42
43 leaq 64(%rsi), %rsi
44 leaq 64(%rdi), %rdi
45
46 jns 3b
47
48 /* Correct extra loop counter modification. */
49 2: addq $64, %rcx
50 1:
51 movq %rcx, %r10
52 shr $2, %rcx
53 rep; movsd
54
55 movq %r10, %rcx
56 andq $3, %rcx
57 rep; movsb
58
59 movq %rdi, %rax /* Set return value. */
60 ret
61 .size memcpy,.-memcpy
62