0
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
1 .text
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
2 .global memcpy
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
3 .type memcpy,%function
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
4 memcpy:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
5 /* Cutoff for the big loop is a size of 64 bytes since otherwise
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
6 the loop will never be entered. */
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
7 cmpq $64, %rdx
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
8 movq %rdx, %rcx
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
9
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
10 /* We need this in any case. */
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
11 cld
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
12 jbe 1f
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
13
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
14 /* Align destination. */
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
15 movq %rdi, %rax
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
16 negq %rax
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
17 andq $15, %rax
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
18 subq %rax, %rcx
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
19 xchgq %rax, %rcx
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
20
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
21 rep; movsb
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
22
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
23 movq %rax, %rcx
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
24 subq $64, %rcx
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
25 js 2f
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
26
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
27 /* Next 3 insns are 11 bytes total, make sure we decode them in one go */
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
28 .p2align 4,,11
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
29 3:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
30 /* Now correct the loop counter. Please note that in the following
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
31 code the flags are not changed anymore. */
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
32 subq $64, %rcx
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
33
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
34 movups (%rsi), %xmm0
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
35 movups 16(%rsi), %xmm1
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
36 movups 32(%rsi), %xmm2
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
37 movups 48(%rsi), %xmm3
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
38 movaps %xmm0, (%rdi)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
39 movaps %xmm1, 16(%rdi)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
40 movaps %xmm2, 32(%rdi)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
41 movaps %xmm3, 48(%rdi)
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
42
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
43 leaq 64(%rsi), %rsi
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
44 leaq 64(%rdi), %rdi
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
45
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
46 jns 3b
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
47
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
48 /* Correct extra loop counter modification. */
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
49 2: addq $64, %rcx
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
50 1:
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
51 movq %rcx, %r10
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
52 shr $2, %rcx
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
53 rep; movsd
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
54
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
55 movq %r10, %rcx
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
56 andq $3, %rcx
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
57 rep; movsb
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
58
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
59 movq %rdi, %rax /* Set return value. */
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
60 ret
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
61 .size memcpy,.-memcpy
|
Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
parents:
diff
changeset
|
62
|