From 4d1a0ca8c06ee558a92c9981a572bb962f73a6f6 Mon Sep 17 00:00:00 2001 From: dam Date: Sun, 2 Jul 2023 20:02:26 +0100 Subject: Tweaked signed add. --- Math_Ext.jai | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) (limited to 'Math_Ext.jai') diff --git a/Math_Ext.jai b/Math_Ext.jai index 500962f..368779f 100644 --- a/Math_Ext.jai +++ b/Math_Ext.jai @@ -74,35 +74,53 @@ add :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, saturated: b result: Tr = ---; saturated: bool = ---; + S_ADD_ASM :: #string DONE #asm { - // Calculate limit based on x's sign. - mov limit: gpr, MAX; - mov sign: gpr, x; - shr.SIZE sign, BITS; - add.SIZE limit, sign; // If sign is 1, then limit will overflow from MAX to MIN. - mov result, x; - add.SIZE result, y; - seto saturated; - cmovo result, limit; + // Performance + // s8 | s16 | s32 | s64 + // 1.243 | 1.242 | 1.215 | 1.210 + // + mov result, -1; // Pre-set result with signed maximum: set all ones. + shr.SIZE result, 1; // Pre-set result with signed maximum: insert zero on MSB. + bt x, SHIFT; // Test signal bit (affect CF). + adc result, 0; // Overflow signed maximum to signed minimum if CF is set. + + add.SIZE x, y; // Add values (affect OF). + seto saturated; // Set saturated flat if OF. + cmovno result, x; // Move add-result to result if NOT OF. + + // Performance - a bit of improvement... with some more code. + // s8 | s16 | s32 | s64 + // 1.336 | 1.305 | 1.217 | 1.210 + // + // mov sign: gpr, x; // Copy x value to sign variable. + // mov limit: gpr, MAX; // Pre-set limit with signed maximum. + // shr.SIZE sign, SHIFT; // Get sign of x value. + // add.SIZE limit, sign; // If sign is 1, overflow from signed maximum to signed minimum. + // + // mov result, x; // Copy x value to result. + // add.SIZE result, y; // Add values (affect OF). + // seto saturated; // Set saturated flag if OF. + // cmovo result, limit; // Move limit to result if OF. } DONE #if Tr == s8 - #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".b"), "MAX", "127"), "BITS", "7"); + #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".b"), "SHIFT", "7"); // , "MAX", "127") #if Tr == s16 - #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".w"), "MAX", "32767"), "BITS", "15"); + #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".w"), "SHIFT", "15"); // , "MAX", "32767") #if Tr == s32 - #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".d"), "MAX", "2147483647"), "BITS", "31"); + #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".d"), "SHIFT", "31"); // , "MAX", "2147483647") #if Tr == s64 - #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".q"), "MAX", "9223372036854775807"), "BITS", "63"); + #insert #run replace(replace(S_ADD_ASM, ".SIZE", ".q"), "SHIFT", "63"); // , "MAX", "9223372036854775807") U_ADD_ASM :: #string DONE #asm { - add.SIZE x, y; // Add values. mov result, -1; // Pre-set result with unsigned maximum. + add.SIZE x, y; // Add values (affect CF). setc saturated; // Set saturated flag if CF. cmovnc result, x; // Move add-result to result if NOT CF. } @@ -152,6 +170,7 @@ sub :: (x: $Tx, y: $Ty, $USE_GENERIC: bool = false) -> result: $Tr, overflow: bo result: Tr = ---; saturated: bool = ---; + S_SUB_ASM :: #string DONE #asm { // Calculate limit based on x's sign. -- cgit v1.2.3