From e988d2b7eb253fabcd6e57c37f1c8c385032f03c Mon Sep 17 00:00:00 2001 From: dam Date: Sat, 20 May 2023 03:12:37 +0100 Subject: Implemented branchless saturating integer add. --- Math_Ext.jai | 307 +++++++++++++++++++---------------------------------------- 1 file changed, 96 insertions(+), 211 deletions(-) diff --git a/Math_Ext.jai b/Math_Ext.jai index 44c4d44..b4c8879 100644 --- a/Math_Ext.jai +++ b/Math_Ext.jai @@ -2,12 +2,12 @@ #import "Compiler"; #import "Math"; -// #run test_math_ext(); +// TODO Comparing implementaitons using dump - // TODO Comparing implementaitons using dump +#run test_math_ext(); -// test_math_ext :: () { set_build_options_dc(.{do_output=false}); -main :: () { +test_math_ext :: () { set_build_options_dc(.{do_output=false}); +// main :: () { write_strings("=====================\n", "--- Test Math_Ext ---\n"); @@ -23,7 +23,7 @@ main :: () { */ #import "Random"; - add_test :: (x: $Tx, y: $Ty, r: $Tr, t: Type, o: bool) { + test_add :: (x: $Tx, y: $Ty, r: $Tr, t: Type, o: bool) { tr, to := add(cast(Tx)x, cast(Ty)y); print("add(%): % + % = % : %\n", t, x, y, r, o); if r != tr print(" > incorrect result value: got % expected %\n", tr, r); @@ -31,30 +31,34 @@ main :: () { if o != to print(" > incorrect overflow flag: got % expected %\n", to, o); } - // add_test(cast(u8)1, cast(u8)2, 3, u8, false); - // add_test(cast(u8)255, cast(u8)1, 255, u8, true); + // test_add(cast(u8)1, cast(u8)2, 3, u8, false); + // test_add(cast(u8)255, cast(u8)1, 255, u8, true); - add_test(cast( s8) S8_MAX, cast( s8)1, S8_MAX, s8, true); - add_test(cast(s16)S16_MAX, cast(s16)1, S16_MAX, s16, true); - add_test(cast(s32)S32_MAX, cast(s32)1, S32_MAX, s32, true); - add_test(cast(s64)S64_MAX, cast(s64)1, S64_MAX, s64, true); - // add_test(cast(s32)66, cast(s64)-2, 64, s64, false); - // add_test(cast(u32)66, cast(s64)4, 70, s64, false); - // add_test(cast(s32)S32_MAX, cast(s64)1, 2147483648, s64, false); - // add_test(cast(s32)S32_MAX, cast(s32)1, S32_MAX, s32, true); - // add_test(cast(s64)S64_MAX, cast(s64)0, S64_MAX, s64, false); - // add_test(cast(s64)9223372036854775806, cast(s64)1, S64_MAX, s64, false); - // add_test(cast(s64)9223372036854775806, cast(s64)2, S64_MAX, s64, true); + test_add(cast( s8) S8_MAX, cast( s8)1, S8_MAX, s8, true); + test_add(cast(s16)S16_MAX, cast( u8)1, S16_MAX, s16, true); + test_add(cast(s32)S32_MAX, cast(s32)1, S32_MAX, s32, true); + test_add(cast(s64)S64_MAX, cast(u32)1, S64_MAX, s64, true); - // add_test(cast(u8)7, cast(u8)1, 8, u8, false); - // add_test(cast(u8)U8_MAX, cast(u8)1, U8_MAX, u8, true); - - // add_test(cast(u16)10, cast(u8)3, 13, u16, false); - // add_test(cast(u8)1, cast(u16)U16_MAX, U16_MAX, u16, true); - - return; + test_add(cast( u8) U8_MAX, cast( u8)1, U8_MAX, u8, true); + test_add(cast(u16)U16_MAX, cast(u16)1, U16_MAX, u16, true); + test_add(cast(u32)U32_MAX, cast(u32)1, U32_MAX, u32, true); + test_add(cast(u64)U64_MAX, cast(u64)1, U64_MAX, u64, true); + + // test_add(cast(s32)66, cast(s64)-2, 64, s64, false); + // test_add(cast(u32)66, cast(s64)4, 70, s64, false); + // test_add(cast(s32)S32_MAX, cast(s64)1, 2147483648, s64, false); + // test_add(cast(s32)S32_MAX, cast(s32)1, S32_MAX, s32, true); + // test_add(cast(s64)S64_MAX, cast(s64)0, S64_MAX, s64, false); + // test_add(cast(s64)9223372036854775806, cast(s64)1, S64_MAX, s64, false); + // test_add(cast(s64)9223372036854775806, cast(s64)2, S64_MAX, s64, true); + + // test_add(cast(u8)7, cast(u8)1, 8, u8, false); + // test_add(cast(u8)U8_MAX, cast(u8)1, U8_MAX, u8, true); + + // test_add(cast(u16)10, cast(u8)3, 13, u16, false); + // test_add(cast(u8)1, cast(u16)U16_MAX, U16_MAX, u16, true); -/* +/* PERFORMANCE TEST best_generic: float; best_asm: float; for 0..100 { @@ -65,7 +69,7 @@ main :: () { best_asm = max(best_asm, perf_asm); } - print("generic : %\nasm : %\n", best_generic, best_asm); + print("method : ops/usec\ngeneric : %\nasm : %\n", best_generic, best_asm); performance_test :: () -> sum_size: s64, time_generic: Apollo_Time, time_asm: Apollo_Time { @@ -79,7 +83,7 @@ main :: () { sum := 0; start := current_time_monotonic(); - for numbers sum = old_add(sum, it); + for numbers sum = add_bad(sum, it); time := current_time_monotonic() - start; sum_asm := 0; @@ -91,11 +95,10 @@ main :: () { return SUM_SIZE, time, time_asm; } - */ - +*/ } -old_add :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool // #dump +add :: (x: $Tx, y: $Ty) -> result: $Tr, saturated: bool #dump #modify { type_info_x := cast(*Type_Info)Tx; type_info_y := cast(*Type_Info)Ty; @@ -125,205 +128,87 @@ old_add :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool // #dump } else return false, "Number signedness mismatch."; - print("old>tx:ty:%:%\n", Tx, Ty); + print(">tx:ty:%:%\n", Tx, Ty); return true; } { - #if Tr == u64 { MAX :: U64_MAX; MIN :: 0; BITS :: 64; } - #if Tr == u32 { MAX :: U32_MAX; MIN :: 0; BITS :: 32; } - #if Tr == u16 { MAX :: U16_MAX; MIN :: 0; BITS :: 16; } - #if Tr == u8 { MAX :: U8_MAX; MIN :: 0; BITS :: 8; } - #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; BITS :: 63; } - #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; BITS :: 31; } - #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; BITS :: 15; } + // TODO Are these constants really needed? #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; BITS :: 7; } - - if (y > 0 && x > MAX - y) then return MAX, true; - if (y < 0 && x < MIN - y) then return MIN, true; - return x + y, false; -} - -add :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool #dump - #modify { - type_info_x := cast(*Type_Info)Tx; - type_info_y := cast(*Type_Info)Ty; - if type_info_x.type != .INTEGER || type_info_y.type != .INTEGER return false, "Non integers values passed."; - tx := cast(*Type_Info_Integer)type_info_x; - ty := cast(*Type_Info_Integer)type_info_y; - - largest_type := - ifx tx.runtime_size > ty.runtime_size then Tx else - ifx ty.runtime_size > tx.runtime_size then Ty else - ifx tx.signed == ty.signed then Tx else - void; - - // Only allow to add different signedness values if largest type is the signed one (as in JAI). - if tx.signed == ty.signed { - Tx = largest_type; - Ty = largest_type; - Tr = largest_type; - } - else if tx.signed && Tx == largest_type { - Ty = largest_type; - Tr = largest_type; - } - else if ty.signed && Ty == largest_type { - Tx = largest_type; - Tr = largest_type; - } - else return false, "Number signedness mismatch."; - - print(">tx:ty:%:%\n", Tx, Ty); - return true; - } -{ + #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; BITS :: 15; } + #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; BITS :: 31; } + #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; BITS :: 63; } - #if Tr == u64 { MAX :: U64_MAX; MIN :: 0; BITS :: 64; } - #if Tr == u32 { MAX :: U32_MAX; MIN :: 0; BITS :: 32; } - #if Tr == u16 { MAX :: U16_MAX; MIN :: 0; BITS :: 16; } #if Tr == u8 { MAX :: U8_MAX; MIN :: 0; BITS :: 8; } - - #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; BITS :: 63; } - #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; BITS :: 31; } - #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; BITS :: 15; } - #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; BITS :: 7; } + #if Tr == u16 { MAX :: U16_MAX; MIN :: 0; BITS :: 16; } + #if Tr == u32 { MAX :: U32_MAX; MIN :: 0; BITS :: 32; } + #if Tr == u64 { MAX :: U64_MAX; MIN :: 0; BITS :: 64; } #if CPU != .X64 { - if (y > 0 && x > MAX - y) then return MAX, true; - if (y < 0 && x < MIN - y) then return MIN, true; - return x + y, false; + + #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 { + if (y > 0 && x > MAX - y) then return MAX, true; + if (y < 0 && x < MIN - y) then return MIN, true; + return x + y, false; + } else { + if (x > MAX - y) then return MAX, true; + return x + y, false; + } + } else { + + #import "String"; result: Tr = ---; - overflow: bool = ---; + saturated: bool = ---; -#import "String"; - ADD_ASM :: #string DONE - #asm { // s8 - x === a; - y === b; - mov max: gpr === d, MAX; - mov.SIZE c: gpr === c, x; // TODO Not using .b was erroing. - shr c, BITS; - add c, max; - add.SIZE x, y; // add.b for 8bits - cmovo x, c; - seto overflow; + S_ADD_ASM :: #string DONE + #asm { + // Calculate limit based on x's sign. + mov limit: gpr, MAX; + mov sign: gpr, x; + shr sign, BITS; + add.SIZE limit, sign; + mov result, x; + add.SIZE result, y; + seto saturated; + cmovo result, limit; } - DONE - - #if Tr == s8 - #insert #run replace(replace(replace(ADD_ASM, ".SIZE", ".b"), "MAX", "127"), "BITS", "7"); - #if Tr == s16 - #insert #run replace(replace(replace(ADD_ASM, ".SIZE", ".w"), "MAX", "32767"), "BITS", "15"); - #if Tr == s32 - #insert #run replace(replace(replace(ADD_ASM, ".SIZE", ".d"), "MAX", "2147483647"), "BITS", "31"); - #if Tr == s64 - #insert #run replace(replace(replace(ADD_ASM, ".SIZE", ".q"), "MAX", "9223372036854775807"), "BITS", "63"); - - - // TODO - // WIP - doing a similar process for unsigned saturation add - else #if Tr == u8 { - #asm { // u8 - x === a; - y === b; - d: gpr; - add.b x, y; // add.b for 8bits - setc overflow; - sbb d, d; - or d, x; - mov result, d; - } - } - else #if Tr == u16 { - #asm { // u16 - x === a; - y === b; - d: gpr; - add.w x, y; // add.w for 16bits - setc overflow; - sbb d, d; - or d, x; - mov result, d; - } - } + DONE + #if Tr == s8 + #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".b"), "MAX", "127"), "BITS", "7"); + #if Tr == s16 + #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".w"), "MAX", "32767"), "BITS", "15"); + #if Tr == s32 + #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".d"), "MAX", "2147483647"), "BITS", "31"); + #if Tr == s64 + #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".q"), "MAX", "9223372036854775807"), "BITS", "63"); - // #asm { // s8 - // x === a; - // y === b; - // mov d: gpr === d, MAX; - // mov.b c: gpr === c, x; // TODO Not using .b was erroing. - // shr c, BITS; - // add c, d; - // add.b x, y; // add.b for 8bits - // cmovo x, c; - // seto overflow; - // mov result, x; - // } - // } - /* - else #if Tr == s32 { - #asm { // s32 - x === a; - y === b; - mov d: gpr === d, MAX; - mov.d c: gpr === c, x; - shr c, BITS; - add c, d; - add.d x, y; // add.d for 32bits - cmovo x, c; - seto overflow; - mov result, x; - } - } - else #if Tr == s64 { - #asm { // s64 - x === a; - y === b; - mov d: gpr === d, MAX; - mov c: gpr === c, x; - shr c, BITS; - add c, d; - // add.b x, y; // 8bits - // add.w x, y; // 16bits - // add.d x, y; // 32bits - // add.q x, y; // 64bits - add x, y; - cmovo x, c; - seto overflow; + + U_ADD_ASM :: #string DONE + #asm { // s8 + mov limit: gpr, MAX; mov result, x; + add.SIZE result, y; + setc saturated; + cmovc result, limit; } - } - else #if Tr == u8 { - #asm { // u8 - x === a; - y === b; - d: gpr; - add.b x, y; // add.b for 8bits - setc overflow; - sbb d, d; - or d, x; - mov result, d; - } - } - else #if Tr == u16 { - #asm { // u16 - x === a; - y === b; - d: gpr; - add.w x, y; // add.w for 16bits - setc overflow; - sbb d, d; - or d, x; - mov result, d; - } - } - */ - return result, overflow; + DONE + + #if Tr == u8 + #insert #run replace(replace(U_ADD_ASM, ".SIZE", ".b"), "MAX", "255"); + #if Tr == u16 + #insert #run replace(replace(U_ADD_ASM, ".SIZE", ".w"), "MAX", "65535"); + #if Tr == u32 + #insert #run replace(replace(U_ADD_ASM, ".SIZE", ".d"), "MAX", "4294967295"); + #if Tr == u64 + #insert #run replace(replace(U_ADD_ASM, ".SIZE", ".q"), "MAX", "18446744073709551615"); + + + return result, saturated; + } } -- cgit v1.2.3