summaryrefslogblamecommitdiff
path: root/zap/source/amd64/mem/utf8enc.S
blob: 357bdaa6be1165d87143a43a239d9520663048b3 (plain) (tree)
1
2
3
4
5
6
7


                                                                                                              



                  



















































































                                                       
                      













































                                         
       
# Copyright 2022 Gabriel Jensen.
# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
# If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.

.globl zap_utf8enc

zap_utf8enc:
	# rdi: Current input codepoint.
	# rsi: Current output octet.
	# rax: Current codepoint.
	# rdx: Temporary.

	# Iterate over the input:
.loop:

	movl (%rdi),%eax

	cmpl $0xFFFF,%eax
	jg .oct4

	cmpl $0x7FF,%eax
	jg .oct3

	cmpl $0x7F,%eax
	jg .oct2 # Otherwise, only one octet is needed.

	# One octet:
.oct1:

	# Octet #0:
	movb %al,(%rsi) # No conversion needed:

	incq %rsi

	# Test if we have reached the null-terminator:
	testb %al,%al
	jz .done

	jmp .cnt

	# Two octets:
.oct2:

	/* Octet #0: */
	movl %eax,%edx
	shrl $0x6,%edx
	orb $0xC0,%dl
	movb %dl,(%rsi)

	incq %rsi

	# Octet #1:
	movl %eax,%edx
	andb $0x3F,%dl
	orb $0x80,%dl
	movb %dl,(%rsi)

	incq %rsi

	jmp .cnt

	# Three octets:
.oct3:

	# Octet #0:
	movl %eax,%edx
	shrl $0xC,%edx
	orb $0xE0,%dl
	movb %dl,(%rsi)

	incq %rsi

	# Octet #1:
	movl %eax,%edx
	shrl $0x6,%edx
	andb $0x3F,%dl
	orb $0x80,%dl
	movb %dl,(%rsi)

	incq %rsi

	# Octet #2:
	movl %eax,%edx
	andb $0x3F,%dl
	orb $0x80,%dl
	movb %dl,(%rsi)

	incq %rsi

	jmp .cnt

	# Four octets:
.oct4:

	# Octet #0:
	movl %eax,%edx
	shrl $0x12,%edx
	orb $0xF0,%dl
	movb %dl,(%rsi)

	incq %rsi

	# Octet #1:
	movl %eax,%edx
	shrl $0xC,%edx
	andb $0x3F,%dl
	orb $0x80,%dl
	movb %dl,(%rsi)

	incq %rsi

	# Octet #2:
	movl %eax,%edx
	shrl $0x6,%edx
	andb $0x3F,%dl
	orb $0x80,%dl
	movb %dl,(%rsi)

	incq %rsi

	# Octet #3:
	movl %eax,%edx
	andb $0x3F,%dl
	orb $0x80,%dl
	movb %dl,(%rsi)

	incq %rsi

	# Continue to the next codepoint:
.cnt:

	addq $0x4,%rdi
	jmp .loop

	# Done:
.done:

	ret