# Copyright 2022 Gabriel Jensen. # This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. # If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. .globl zap_utf8enc zap_utf8enc: # rdi: Current input codepoint. # rsi: Current output octet. # rax: Current codepoint. # rdx: Temporary. # Iterate over the input: .loop: movl (%rdi),%eax cmpl $0xFFFF,%eax jg .oct4 cmpl $0x7FF,%eax jg .oct3 cmpl $0x7F,%eax jg .oct2 # Otherwise, only one octet is needed. # One octet: .oct1: # Octet #0: movb %al,(%rsi) # No conversion needed: incq %rsi # Test if we have reached the null-terminator: testb %al,%al jz .done jmp .cnt # Two octets: .oct2: /* Octet #0: */ movl %eax,%edx shrl $0x6,%edx orb $0xC0,%dl movb %dl,(%rsi) incq %rsi # Octet #1: movl %eax,%edx andb $0x3F,%dl orb $0x80,%dl movb %dl,(%rsi) incq %rsi jmp .cnt # Three octets: .oct3: # Octet #0: movl %eax,%edx shrl $0xC,%edx orb $0xE0,%dl movb %dl,(%rsi) incq %rsi # Octet #1: movl %eax,%edx shrl $0x6,%edx andb $0x3F,%dl orb $0x80,%dl movb %dl,(%rsi) incq %rsi # Octet #2: movl %eax,%edx andb $0x3F,%dl orb $0x80,%dl movb %dl,(%rsi) incq %rsi jmp .cnt # Four octets: .oct4: # Octet #0: movl %eax,%edx shrl $0x12,%edx orb $0xF0,%dl movb %dl,(%rsi) incq %rsi # Octet #1: movl %eax,%edx shrl $0xC,%edx andb $0x3F,%dl orb $0x80,%dl movb %dl,(%rsi) incq %rsi # Octet #2: movl %eax,%edx shrl $0x6,%edx andb $0x3F,%dl orb $0x80,%dl movb %dl,(%rsi) incq %rsi # Octet #3: movl %eax,%edx andb $0x3F,%dl orb $0x80,%dl movb %dl,(%rsi) incq %rsi # Continue to the next codepoint: .cnt: addq $0x4,%rdi jmp .loop # Done: .done: ret