mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
139 lines
3.8 KiB
ArmAsm
139 lines
3.8 KiB
ArmAsm
|
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
|||
|
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
|||
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|||
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|||
|
│ │
|
|||
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|||
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|||
|
│ above copyright notice and this permission notice appear in all copies. │
|
|||
|
│ │
|
|||
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|||
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|||
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|||
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|||
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|||
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|||
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|||
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|||
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|||
|
|
|||
|
// Copies memory.
|
|||
|
//
|
|||
|
// DEST and SRC must not overlap, unless DEST≤SRC.
|
|||
|
//
|
|||
|
// @param rdi is dest
|
|||
|
// @param rsi is src
|
|||
|
// @param rdx is number of bytes
|
|||
|
// @return original rdi copied to rax
|
|||
|
// @mode long
|
|||
|
// @asyncsignalsafe
|
|||
|
memcpy_jart: mov %rdi,%rax
|
|||
|
// 𝑠𝑙𝑖𝑑𝑒
|
|||
|
.align 16
|
|||
|
.type memcpy_jart,@function
|
|||
|
.size memcpy_jart,.-memcpy_jart
|
|||
|
.globl memcpy_jart
|
|||
|
|
|||
|
// Copies memory w/ minimal impact ABI.
|
|||
|
//
|
|||
|
// @param rdi is dest
|
|||
|
// @param rsi is src
|
|||
|
// @param rdx is number of bytes
|
|||
|
// @clob flags,rcx,xmm3,xmm4
|
|||
|
// @mode long
|
|||
|
MemCpy: mov $.Lmemcpytab.size,%ecx
|
|||
|
cmp %rcx,%rdx
|
|||
|
cmovb %rdx,%rcx
|
|||
|
jmp *memcpytab(,%rcx,8)
|
|||
|
.Lanchorpoint:
|
|||
|
.L16r: cmp $1024,%rdx
|
|||
|
jae .Lerms
|
|||
|
.L16: movdqu -16(%rsi,%rdx),%xmm4
|
|||
|
mov $16,%rcx
|
|||
|
0: add $16,%rcx
|
|||
|
movdqu -32(%rsi,%rcx),%xmm3
|
|||
|
movdqu %xmm3,-32(%rdi,%rcx)
|
|||
|
cmp %rcx,%rdx
|
|||
|
ja 0b
|
|||
|
movdqu %xmm4,-16(%rdi,%rdx)
|
|||
|
pxor %xmm4,%xmm4
|
|||
|
pxor %xmm3,%xmm3
|
|||
|
jmp .L0
|
|||
|
.L8: push %rbx
|
|||
|
mov (%rsi),%rcx
|
|||
|
mov -8(%rsi,%rdx),%rbx
|
|||
|
mov %rcx,(%rdi)
|
|||
|
mov %rbx,-8(%rdi,%rdx)
|
|||
|
1: pop %rbx
|
|||
|
.L0: ret
|
|||
|
.L4: push %rbx
|
|||
|
mov (%rsi),%ecx
|
|||
|
mov -4(%rsi,%rdx),%ebx
|
|||
|
mov %ecx,(%rdi)
|
|||
|
mov %ebx,-4(%rdi,%rdx)
|
|||
|
jmp 1b
|
|||
|
.L3: push %rbx
|
|||
|
mov (%rsi),%cx
|
|||
|
mov -2(%rsi,%rdx),%bx
|
|||
|
mov %cx,(%rdi)
|
|||
|
mov %bx,-2(%rdi,%rdx)
|
|||
|
jmp 1b
|
|||
|
.L2: mov (%rsi),%cx
|
|||
|
mov %cx,(%rdi)
|
|||
|
jmp .L0
|
|||
|
.L1: mov (%rsi),%cl
|
|||
|
mov %cl,(%rdi)
|
|||
|
jmp .L0
|
|||
|
.Lerms: cmp $1024*1024,%rdx
|
|||
|
ja .Lnts
|
|||
|
push %rdi
|
|||
|
push %rsi
|
|||
|
mov %rdx,%rcx
|
|||
|
rep movsb
|
|||
|
pop %rsi
|
|||
|
pop %rdi
|
|||
|
jmp .L0
|
|||
|
.Lnts: movdqu (%rsi),%xmm3
|
|||
|
movdqu %xmm3,(%rdi)
|
|||
|
lea 16(%rdi),%rcx
|
|||
|
and $-16,%rcx
|
|||
|
sub %rdi,%rcx
|
|||
|
add %rcx,%rdi
|
|||
|
add %rcx,%rsi
|
|||
|
sub %rcx,%rdx
|
|||
|
mov $16,%rcx
|
|||
|
0: add $16,%rcx
|
|||
|
movdqu -32(%rsi,%rcx),%xmm3
|
|||
|
movntdq %xmm3,-32(%rdi,%rcx)
|
|||
|
cmp %rcx,%rdx
|
|||
|
ja 0b
|
|||
|
sfence
|
|||
|
movdqu -16(%rsi,%rdx),%xmm3
|
|||
|
movdqu %xmm3,-16(%rdi,%rdx)
|
|||
|
pxor %xmm3,%xmm3
|
|||
|
jmp .L0
|
|||
|
.type MemCpy,@function
|
|||
|
.size MemCpy,.-MemCpy
|
|||
|
.globl MemCpy
|
|||
|
|
|||
|
.section .rodata
|
|||
|
.align 8
|
|||
|
memcpytab:
|
|||
|
.quad .L0
|
|||
|
.quad .L1
|
|||
|
.quad .L2
|
|||
|
.quad .L3
|
|||
|
.rept 4
|
|||
|
.quad .L4
|
|||
|
.endr
|
|||
|
.rept 8
|
|||
|
.quad .L8
|
|||
|
.endr
|
|||
|
.rept 16
|
|||
|
.quad .L16
|
|||
|
.endr
|
|||
|
.equ .Lmemcpytab.size,(.-memcpytab)/8
|
|||
|
.quad .L16r # SSE + ERMS + NTS
|
|||
|
.type memcpytab,@object
|
|||
|
.previous
|