summaryrefslogblamecommitdiff
path: root/zap/src/fma.c
blob: b2f45ad41780afef031a7fa6a0ca1c491c497c8a (plain) (tree)










































































































































































                                                                                                                                                                    
/*
	Copyright 2022 Gabriel Jensen.
	This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
	If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

#include <zap/priv.h>

#include <zap/math.h>

#include <stdint.h>

#if zap_priv_fastimpl
__asm__ (
	".globl zap_fma_c\n"
	".globl zap_fma_i\n"
	".globl zap_fma_l\n"
	".globl zap_fma_ll\n"
	".globl zap_fma_s\n"
	".globl zap_fma_uc\n"
	".globl zap_fma_ui\n"
	".globl zap_fma_ul\n"
	".globl zap_fma_ull\n"
	".globl zap_fma_us\n"

	"zap_fma_c:\n"
		/*
			signed char a
			signed char b
			signed char c
		*/
#if defined(sus_arch_amd64)
		"movb %sil,%al\n"
		"imulb %dl\n"
		"addb %dil,%al\n"
		"ret\n"
#endif

	"zap_fma_i:\n"
		/*
			int a
			int b
			int c
		*/
#if defined(sus_arch_amd64)
		"movl %edx,%eax\n"
		"imull %esi\n"
		"addl %edi,%eax\n"
		"ret\n"
#endif

	"zap_fma_l:\n"
		/*
			long a
			long b
			long c
		*/
#if defined(sus_arch_amd64)
		"movq %rdx,%rax\n"
		"imulq %rsi\n"
		"addq %rdi,%rax\n"
		"ret\n"
#endif

	"zap_fma_ll:\n"
		/*
			long long a
			long long b
			long long c
		*/
#if defined(sus_arch_amd64)
		"movq %rdx,%rax\n"
		"imulq %rsi\n"
		"addq %rdi,%rax\n"
		"ret\n"
#endif

	"zap_fma_s:\n"
		/*
			short a
			short b
			short c
		*/
#if defined(sus_arch_amd64)
		"movw %dx,%ax\n"
		"imulw %si\n"
		"addw %di,%ax\n"
		"ret\n"
#endif

	"zap_fma_uc:\n"
		/*
			unsigned char a
			unsigned char b
			unsigned char c
		*/
#if defined(sus_arch_amd64)
		"movb %sil,%al\n" /* mulb uses ax instead of al:dl (like the other variants), so we don't need to worry about it overwritting dl. */
		"mulb %dl\n"
		"addb %dil,%al\n"
		"ret\n"
#endif

	"zap_fma_ui:\n"
		/*
			unsigned int a
			unsigned int b
			unsigned int c
		*/
#if defined(sus_arch_amd64)
		"movl %edx,%eax\n"
		"mull %esi\n"
		"addl %edi,%eax\n"
		"ret\n"
#endif

	"zap_fma_ul:\n"
		/*
			unsigned long a
			unsigned long b
			unsigned long c
		*/
#if defined(sus_arch_amd64)
		"movq %rdx,%rax\n"
		"mulq %rsi\n"
		"addq %rdi,%rax\n"
		"ret\n"
#endif

	"zap_fma_ull:\n"
		/*
			unsigned long long a
			unsigned long long b
			unsigned long long c
		*/
#if defined(sus_arch_amd64)
		"movq %rdx,%rax\n" /* rdx get overwritten by mulq, so might as well just make it the first operand (in multiplication, the order is meaningless). */
		"mulq %rsi\n"
		"addq %rdi,%rax\n"
		"ret\n"
#endif

	"zap_fma_us:\n"
		/*
			unsigned short a
			unsigned short b
			unsigned short c
		*/
#if defined(sus_arch_amd64)
		"movw %dx,%ax\n"
		"mulw %si\n"
		"addw %di,%ax\n"
		"ret\n"
#endif
);
#else
#define zap_local_fma(_typ,_sufx) \
	_typ zap_fma_ ## _sufx (_typ const _a,_typ const _b,_typ const _c) {return _a + _b * _c;}

zap_local_fma(signed char,c)
zap_local_fma(int,i)
zap_local_fma(long,l)
zap_local_fma(long long,ll)
zap_local_fma(short,s)
zap_local_fma(unsigned char,uc)
zap_local_fma(unsigned int,ui)
zap_local_fma(unsigned long,ul)
zap_local_fma(unsigned long long,ull)
zap_local_fma(unsigned short,us)

#endif