diff options
Diffstat (limited to 'zap/source/amd64/mem/utf8enc.S')
-rw-r--r-- | zap/source/amd64/mem/utf8enc.S | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/zap/source/amd64/mem/utf8enc.S b/zap/source/amd64/mem/utf8enc.S new file mode 100644 index 0000000..357bdaa --- /dev/null +++ b/zap/source/amd64/mem/utf8enc.S @@ -0,0 +1,139 @@ +# Copyright 2022 Gabriel Jensen. +# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +# If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. + +.globl zap_utf8enc + +zap_utf8enc: + # rdi: Current input codepoint. + # rsi: Current output octet. + # rax: Current codepoint. + # rdx: Temporary. + + # Iterate over the input: +.loop: + + movl (%rdi),%eax + + cmpl $0xFFFF,%eax + jg .oct4 + + cmpl $0x7FF,%eax + jg .oct3 + + cmpl $0x7F,%eax + jg .oct2 # Otherwise, only one octet is needed. + + # One octet: +.oct1: + + # Octet #0: + movb %al,(%rsi) # No conversion needed: + + incq %rsi + + # Test if we have reached the null-terminator: + testb %al,%al + jz .done + + jmp .cnt + + # Two octets: +.oct2: + + /* Octet #0: */ + movl %eax,%edx + shrl $0x6,%edx + orb $0xC0,%dl + movb %dl,(%rsi) + + incq %rsi + + # Octet #1: + movl %eax,%edx + andb $0x3F,%dl + orb $0x80,%dl + movb %dl,(%rsi) + + incq %rsi + + jmp .cnt + + # Three octets: +.oct3: + + # Octet #0: + movl %eax,%edx + shrl $0xC,%edx + orb $0xE0,%dl + movb %dl,(%rsi) + + incq %rsi + + # Octet #1: + movl %eax,%edx + shrl $0x6,%edx + andb $0x3F,%dl + orb $0x80,%dl + movb %dl,(%rsi) + + incq %rsi + + # Octet #2: + movl %eax,%edx + andb $0x3F,%dl + orb $0x80,%dl + movb %dl,(%rsi) + + incq %rsi + + jmp .cnt + + # Four octets: +.oct4: + + # Octet #0: + movl %eax,%edx + shrl $0x12,%edx + orb $0xF0,%dl + movb %dl,(%rsi) + + incq %rsi + + # Octet #1: + movl %eax,%edx + shrl $0xC,%edx + andb $0x3F,%dl + orb $0x80,%dl + movb %dl,(%rsi) + + incq %rsi + + # Octet #2: + movl %eax,%edx + shrl $0x6,%edx + andb $0x3F,%dl + orb $0x80,%dl + movb %dl,(%rsi) + + incq %rsi + + # Octet #3: + movl %eax,%edx + andb $0x3F,%dl + orb $0x80,%dl + movb %dl,(%rsi) + + incq %rsi + + # Continue to the next codepoint: +.cnt: + + addq $0x4,%rdi + jmp .loop + + # Done: +.done: + + ret +
\ No newline at end of file |