/* Copyright 2022 Gabriel Jensen. This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. */ #include #include #include #if zap_priv_fastimpl __asm__ ( ".globl zap_fma_c\n" ".globl zap_fma_i\n" ".globl zap_fma_l\n" ".globl zap_fma_ll\n" ".globl zap_fma_s\n" ".globl zap_fma_uc\n" ".globl zap_fma_ui\n" ".globl zap_fma_ul\n" ".globl zap_fma_ull\n" ".globl zap_fma_us\n" "zap_fma_c:\n" /* signed char a signed char b signed char c */ #if defined(sus_arch_amd64) "movb %sil,%al\n" "imulb %dl\n" "addb %dil,%al\n" "ret\n" #endif "zap_fma_i:\n" /* int a int b int c */ #if defined(sus_arch_amd64) "movl %edx,%eax\n" "imull %esi\n" "addl %edi,%eax\n" "ret\n" #endif "zap_fma_l:\n" /* long a long b long c */ #if defined(sus_arch_amd64) "movq %rdx,%rax\n" "imulq %rsi\n" "addq %rdi,%rax\n" "ret\n" #endif "zap_fma_ll:\n" /* long long a long long b long long c */ #if defined(sus_arch_amd64) "movq %rdx,%rax\n" "imulq %rsi\n" "addq %rdi,%rax\n" "ret\n" #endif "zap_fma_s:\n" /* short a short b short c */ #if defined(sus_arch_amd64) "movw %dx,%ax\n" "imulw %si\n" "addw %di,%ax\n" "ret\n" #endif "zap_fma_uc:\n" /* unsigned char a unsigned char b unsigned char c */ #if defined(sus_arch_amd64) "movb %sil,%al\n" /* mulb uses ax instead of al:dl (like the other variants), so we don't need to worry about it overwritting dl. */ "mulb %dl\n" "addb %dil,%al\n" "ret\n" #endif "zap_fma_ui:\n" /* unsigned int a unsigned int b unsigned int c */ #if defined(sus_arch_amd64) "movl %edx,%eax\n" "mull %esi\n" "addl %edi,%eax\n" "ret\n" #endif "zap_fma_ul:\n" /* unsigned long a unsigned long b unsigned long c */ #if defined(sus_arch_amd64) "movq %rdx,%rax\n" "mulq %rsi\n" "addq %rdi,%rax\n" "ret\n" #endif "zap_fma_ull:\n" /* unsigned long long a unsigned long long b unsigned long long c */ #if defined(sus_arch_amd64) "movq %rdx,%rax\n" /* rdx get overwritten by mulq, so might as well just make it the first operand (in multiplication, the order is meaningless). */ "mulq %rsi\n" "addq %rdi,%rax\n" "ret\n" #endif "zap_fma_us:\n" /* unsigned short a unsigned short b unsigned short c */ #if defined(sus_arch_amd64) "movw %dx,%ax\n" "mulw %si\n" "addw %di,%ax\n" "ret\n" #endif ); #else #define zap_local_fma(_typ,_sufx) \ _typ zap_fma_ ## _sufx (_typ const _a,_typ const _b,_typ const _c) {return _a + _b * _c;} zap_local_fma(signed char,c) zap_local_fma(int,i) zap_local_fma(long,l) zap_local_fma(long long,ll) zap_local_fma(short,s) zap_local_fma(unsigned char,uc) zap_local_fma(unsigned int,ui) zap_local_fma(unsigned long,ul) zap_local_fma(unsigned long long,ull) zap_local_fma(unsigned short,us) #endif