diff options
Diffstat (limited to 'zap/source/amd64/mem/utf8enclen.S')
-rw-r--r-- | zap/source/amd64/mem/utf8enclen.S | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/zap/source/amd64/mem/utf8enclen.S b/zap/source/amd64/mem/utf8enclen.S new file mode 100644 index 0000000..2e3b09f --- /dev/null +++ b/zap/source/amd64/mem/utf8enclen.S @@ -0,0 +1,67 @@ +# Copyright 2022 Gabriel Jensen. +# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +# If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. + +.globl zap_utf8enclen + +zap_utf8enclen: + # rdi: Address of the current character. + # rax: Length of the string. + # rsi: Current character. + + movq $0x0,%rax + + # Iterate over the input: +.loop: + + movl (%rdi),%esi + + # Test if we have reached the null-terminator: + testl %esi,%esi + jz .done + + cmpl $0xFFFF,%esi + jg .oct4 + + cmpl $0x7FF,%esi + jg .oct3 + + cmpl $0x7F,%esi + jg .oct2 + + # One octet: +.oct1: + + incq %rax + + jmp .cnt + + # Two octets: +.oct2: + + addq $0x2,%rax + + jmp .cnt + + # Three octets: +.oct3: + + addq $0x3,%rax + + jmp .cnt + + # Four octets: +.oct4: + + addq $0x4,%rax + + # Continue to the next codepoint: +.cnt: + + addq $0x4,%rdi + jmp .loop + + # Done: +.done: + + ret |