diff options
Diffstat (limited to 'rgo/src')
-rw-r--r-- | rgo/src/fndbyte.S | 28 | ||||
-rw-r--r-- | rgo/src/fndchr.S | 30 | ||||
-rw-r--r-- | rgo/src/memcpy.S | 38 | ||||
-rw-r--r-- | rgo/src/memfill.S | 14 | ||||
-rw-r--r-- | rgo/src/strlen.S | 17 |
5 files changed, 110 insertions, 17 deletions
diff --git a/rgo/src/fndbyte.S b/rgo/src/fndbyte.S index c0e4382..4d1e482 100644 --- a/rgo/src/fndbyte.S +++ b/rgo/src/fndbyte.S @@ -20,7 +20,33 @@ rgo_fndbyte: size_t num uint8_t byte */ -#if defined(__x86_64__) +#if defined(__i386__) + /* eax: Address of the current element. */ + movl 0x4(%esp),%eax + /* ecx: Address of the element after the last element. */ + movl 0x8(%esp),%ecx + addl %eax,%ecx + /* edx: Byte value. */ + movb 0xC(%esp),%dl + /* ebx: Current element. */ + pushl %ebx +.loop: + cmpl %eax,%ecx + je .nfnd /* We have went through the entire array without finding the byte. */ + movb (%eax),%bl + cmpb %bl,%dl + je .fnd /* We have found the byte. */ + incl %eax + jmp .loop +.fnd: + popl %ebx + subl 0x4(%esp),%eax + ret +.nfnd: + popl %ebx + movl $0xFFFFFFFF,%eax + ret +#elif defined(__x86_64__) /* rax: Address of the current element. */ movq %rdi,%rax /* rsi: Address of the element after the last element. */ diff --git a/rgo/src/fndchr.S b/rgo/src/fndchr.S index 1008e52..f12f4c5 100644 --- a/rgo/src/fndchr.S +++ b/rgo/src/fndchr.S @@ -19,22 +19,42 @@ rgo_fndchr: char const * str char chr */ -#if defined(__x86_64__) +#if defined(__i386__) + /* eax: Address of the current character. */ + movl 0x4(%esp),%eax + /* ecx: Character. */ + movb 0x8(%esp),%cl + /* edx: Current character. */ +.loop: + movb (%eax),%dl + cmpb %dl,%cl + je .fnd /* Exit loop if we have found the character. */ + testb %dl,%dl + je .nfnd /* We encountered the null-terminator but not the specified character. */ + incl %eax + jmp .loop +.fnd: + subl 0x4(%esp),%eax + ret +.nfnd: + movl $0xFFFFFFFF,%eax + ret +#elif defined(__x86_64__) /* rax: Address of the current character. */ movq %rdi,%rax /* rdx: Current character. */ .loop: movb (%rax),%dl cmpb %dl,%sil - je .done /* Exit loop if we have found the character. */ + je .fnd /* Exit loop if we have found the character. */ testb %dl,%dl - je .err /* We encountered the null-terminator but not the specified character. */ + je .nfnd /* We encountered the null-terminator but not the specified character. */ incq %rax jmp .loop -.done: +.fnd: subq %rdi,%rax ret -.err: +.nfnd: movq $0xFFFFFFFFFFFFFFFF,%rax ret #endif diff --git a/rgo/src/memcpy.S b/rgo/src/memcpy.S index 475da57..820781d 100644 --- a/rgo/src/memcpy.S +++ b/rgo/src/memcpy.S @@ -30,12 +30,33 @@ rgo_memcpy: /* ebx: Current element. */ pushl %ebx /* ebx must be restored. */ /* xmm0: Current element. */ + /* ymm0: Current element. */ +#if defined(__AVX__) +.big256cpy: + cmpl $0x20,%ecx +#if defined(__SSE__) + jl .big128cpy +#else + jl .wrdcpy +#endif + vmovdqu (%eax),%ymm0 + vmovdqu %ymm0,(%edx) + addl $0x20,%eax + addl $0x20,%edx + subl $0x20,%ecx + jmp .big256cpy +#endif #if defined(__SSE__) .big128cpy: cmpl $0x10,%ecx jl .wrdcpy +#if defined(__SSE2__) + movdqu (%eax),%xmm0 + movdqu %xmm0,(%edx) +#else movups (%eax),%xmm0 movups %xmm0,(%edx) +#endif addl $0x10,%eax addl $0x10,%edx subl $0x10,%ecx @@ -68,11 +89,24 @@ rgo_memcpy: /* rdx: Address of the current output element. */ /* rcx: Current element. */ /* xmm0: Current element. */ + /* ymm0: Current element. */ +#if defined(__AVX__) +.big256cpy: + cmpq $0x20,%rsi + jl .big128cpy + vmovups (%rdi),%ymm0 + vmovups %ymm0,(%rdx) + addq $0x20,%rdi + addq $0x20,%rdx + subq $0x20,%rsi + jmp .big256cpy +#endif .big128cpy: + ret cmpq $0x10,%rsi jl .wrdcpy - movups (%rdi),%xmm0 - movups %xmm0,(%rdx) + movdqu (%rdi),%xmm0 + movdqu %xmm0,(%rdx) addq $0x10,%rdi addq $0x10,%rdx subq $0x10,%rsi diff --git a/rgo/src/memfill.S b/rgo/src/memfill.S index c22547e..7dc00c3 100644 --- a/rgo/src/memfill.S +++ b/rgo/src/memfill.S @@ -37,16 +37,14 @@ rgo_memfill: .done: ret #elif defined(__x86_64__) - /* rax: Address of the current element. */ - movq %rdi,%rax - /* rax: Address of the element after the last element. */ - movq %rdi,%rcx - addq %rsi,%rcx + /* rdi: Address of the current element. */ + /* rsi: Address of the element after the last element. */ + addq %rdi,%rsi .loop: - cmpq %rcx,%rax + cmpq %rsi,%rdi je .done /* Exit loop if we have reached the final element. */ - movb %dl,(%rax) - incq %rax + movb %dl,(%rdi) + incq %rdi jmp .loop /* Continue to next element. */ .done: ret diff --git a/rgo/src/strlen.S b/rgo/src/strlen.S index d7ad03e..37b5780 100644 --- a/rgo/src/strlen.S +++ b/rgo/src/strlen.S @@ -18,7 +18,22 @@ rgo_strlen: /* char const * str */ -#if defined(__x86_64__) +#if defined(__i386__) + /* eax: Address of the current character. */ + movl 0x4(%esp),%eax + /* ecx: Address of the first character. */ + movl %eax,%ecx + /* edx: Current character. */ +.loop: + movb (%eax),%dl + testb %dl,%dl + jz .done /* Exit loop if we have reached the null-terminator. */ + incl %eax /* Continue to the next character. */ + jmp .loop +.done: + subl %ecx,%eax + ret +#elif defined(__x86_64__) /* rax: Address of the current character. */ movq %rdi,%rax /* rdx: Current character. */ |