diff options
| -rw-r--r-- | Math_Ext.jai | 413 |
1 files changed, 256 insertions, 157 deletions
diff --git a/Math_Ext.jai b/Math_Ext.jai index e2bba35..bf1e8d6 100644 --- a/Math_Ext.jai +++ b/Math_Ext.jai @@ -2,182 +2,281 @@ #import "Compiler"; #import "Math"; -#run test_math_ext(); +// #run test_math_ext(); -test_math_ext :: () { + // TODO Comparing implementaitons using dump - set_build_options_dc(.{do_output=false}); +// test_math_ext :: () { set_build_options_dc(.{do_output=false}); +main :: () { write_strings("=====================\n", "--- Test Math_Ext ---\n"); - Test_Inputs :: struct(ia: $T, ib: T, ir: T, is: bool) { - // t: Type; - a := ia; - b := ib; - r := ir; - s := is; - }; - -/* - tests := Test_Inputs.[ - // .{1, 2, 3, false}, - ]; - - for * tests { - result, saturated := add(cast(it.t)it.a, cast(it.t)it.b); - assert(result == it.r && saturated == it.s, "Failed: % %\n", result, saturated); + // Different signals: only works if signaled variable is higher. + /* + #run cena(); + cena :: () { + a: s64 = -232; + b: u32 = 4; + c := a+b; + print("\n\n--- --- ---\ntttt : % : % + % = %\n--- --- ---\n\n", type_of(c), a, b, c); } -*/ + */ + + #import "Random"; + add_test :: (x: $Tx, y: $Ty, r: $Tr, t: Type, o: bool) { + tr, to := add(cast(Tx)x, cast(Ty)y); + print("add(%): % + % = % : %\n", t, x, y, r, o); + if r != tr print(" > incorrect result value: got % expected %\n", tr, r); + if t != type_of(tr) print(" > incorrect result type: got % expected %\n", type_of(tr), t); + if o != to print(" > incorrect overflow flag: got % expected %\n", to, o); + } + + // add_test(cast(u8)1, cast(u8)2, 3, u8, false); + // add_test(cast(u8)255, cast(u8)1, 255, u8, true); + + add_test(cast(s8)S8_MAX, cast(s8)1, S8_MAX, s8, true); + // add_test(cast(s32)66, cast(s64)-2, 64, s64, false); + // add_test(cast(u32)66, cast(s64)4, 70, s64, false); + // add_test(cast(s32)S32_MAX, cast(s64)1, 2147483648, s64, false); + // add_test(cast(s32)S32_MAX, cast(s32)1, S32_MAX, s32, true); + // add_test(cast(s64)S64_MAX, cast(s64)0, S64_MAX, s64, false); + // add_test(cast(s64)9223372036854775806, cast(s64)1, S64_MAX, s64, false); + // add_test(cast(s64)9223372036854775806, cast(s64)2, S64_MAX, s64, true); + + // add_test(cast(u8)7, cast(u8)1, 8, u8, false); + // add_test(cast(u8)U8_MAX, cast(u8)1, U8_MAX, u8, true); + + // add_test(cast(u16)10, cast(u8)3, 13, u16, false); + // add_test(cast(u8)1, cast(u16)U16_MAX, U16_MAX, u16, true); + + return; - t1a := S64_MAX; - t1b := 0; - t1v, t1r := add(t1a, t1b); - assert(t1v == S64_MAX && t1r == false, "Failed: % %\n", t1v, t1r); - write_string("t1: OK\n"); + best_generic: float; + best_asm: float; + for 0..100 { + size, time_generic, time_asm := performance_test(); + perf_generic := cast(float)size/cast(float)to_microseconds(time_generic); + perf_asm := cast(float)size/cast(float)to_microseconds(time_asm); + best_generic = max(best_generic, perf_generic); + best_asm = max(best_asm, perf_asm); + } - // t2a := S64_MAX; - // t2b := 1; - // t2v, t2r := add(t2a, t2b); - // assert(t2v == S64_MAX && t2r == true, "Failed: % %\n", t2v, t2r); -} + print("generic : %\nasm : %\n", best_generic, best_asm); + + performance_test :: () -> sum_size: s64, time_generic: Apollo_Time, time_asm: Apollo_Time { + + SUM_SIZE := 2000000; + numbers: [..] s64; + array_reserve(*numbers, SUM_SIZE); + + for 0..SUM_SIZE-1 { + array_add(*numbers, cast(s64)random_get()); + } + sum := 0; + start := current_time_monotonic(); + for numbers sum = old_add(sum, it); + time := current_time_monotonic() - start; -add_int64 :: (x :s64, y: s64) -> s64 #dump { // TODO Comparing implementations. - return - ifx (y > 0 && x > S64_MAX - y) then S64_MAX else - ifx (y < 0 && x < S64_MIN - y) then S64_MIN else - x + y; -} + sum_asm := 0; + start_asm := current_time_monotonic(); + for numbers sum_asm = add(sum_asm, it); + time_asm := current_time_monotonic() - start_asm; + + assert(sum == sum_asm); + + return SUM_SIZE, time, time_asm; + } -sub_int64 :: (x :s64, y :s64) -> s64 { - return - ifx (y < 0 && x > S64_MAX + y) then S64_MAX else - ifx (y > 0 && x < S64_MIN + y) then S64_MIN else - x - y; } -add :: (x: s64, y: s64) -> result: s64, saturated: bool #dump { // TODO Comparing implementaitons using dump - #if CPU == .X64 { - saturated := false; - ...test this case... - if (y > 0 && x > S64_MAX - y) then return S64_MAX, true; - if (y < 0 && x < S64_MIN - y) then return S64_MIN, true; - return x + y, false; - // if (y > 0 && x > S64_MAX - y) then return S64_MAX, true; - // if (y < 0 && x < S64_MIN - y) then return S64_MIN, true; - // return x + y, false; - } else { - result: s64 = ---; - flag: bool = ---; - #asm { - mov d: gpr === d, 9223372036854775807; - mov a: gpr === a, x; - mov b: gpr === b, y; - add a, b; - seto flag; // Flag overflow. - mov result, a; - mov a, x; - shr a, 63; - add a, d; - mov c: gpr, x; - xor c, b; - xor b, result; - not b; - or c, b; - test c, c; - cmovns result, a; - } - return result, flag; +old_add :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool // #dump + #modify { + type_info_x := cast(*Type_Info)Tx; + type_info_y := cast(*Type_Info)Ty; + if type_info_x.type != .INTEGER || type_info_y.type != .INTEGER return false, "Non integers values passed."; + tx := cast(*Type_Info_Integer)type_info_x; + ty := cast(*Type_Info_Integer)type_info_y; + + largest_type := + ifx tx.runtime_size > ty.runtime_size then Tx else + ifx ty.runtime_size > tx.runtime_size then Ty else + ifx tx.signed == ty.signed then Tx else + void; + + // Only allow to add different signedness values if largest type is the signed one (as in JAI). + if tx.signed == ty.signed { + Tx = largest_type; + Ty = largest_type; + Tr = largest_type; + } + else if tx.signed && Tx == largest_type { + Ty = largest_type; + Tr = largest_type; + } + else if ty.signed && Ty == largest_type { + Tx = largest_type; + Tr = largest_type; + } + else return false, "Number signedness mismatch."; + + print("old>tx:ty:%:%\n", Tx, Ty); + return true; } +{ + #if Tr == u64 { MAX :: U64_MAX; MIN :: 0; BITS :: 64; } + #if Tr == u32 { MAX :: U32_MAX; MIN :: 0; BITS :: 32; } + #if Tr == u16 { MAX :: U16_MAX; MIN :: 0; BITS :: 16; } + #if Tr == u8 { MAX :: U8_MAX; MIN :: 0; BITS :: 8; } + + #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; BITS :: 63; } + #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; BITS :: 31; } + #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; BITS :: 15; } + #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; BITS :: 7; } + + if (y > 0 && x > MAX - y) then return MAX, true; + if (y < 0 && x < MIN - y) then return MIN, true; + return x + y, false; } -/* -// value_a: s64 = 2; -// value_b: s64 = S64_MAX-1; -value_a: s64 = -2; -value_b: s64 = S64_MIN+1; -print(">%\n", S64_MAX); -argx := get_command_line_arguments(); -if argx.count > 1 - value_a = parse_int(*argx[1]); -if argx.count > 2 - value_b = parse_int(*argx[2]); +add :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool // #dump + #modify { + type_info_x := cast(*Type_Info)Tx; + type_info_y := cast(*Type_Info)Ty; + if type_info_x.type != .INTEGER || type_info_y.type != .INTEGER return false, "Non integers values passed."; + tx := cast(*Type_Info_Integer)type_info_x; + ty := cast(*Type_Info_Integer)type_info_y; -result: s64 = ---; -flags: s64;// = ---; -#asm LAHF_SAHF { // TODO Remove LAHF_SAHF it not required. + largest_type := + ifx tx.runtime_size > ty.runtime_size then Tx else + ifx ty.runtime_size > tx.runtime_size then Ty else + ifx tx.signed == ty.signed then Tx else + void; - // Code from https://locklessinc.com/articles/sat_arithmetic/ + // Only allow to add different signedness values if largest type is the signed one (as in JAI). + if tx.signed == ty.signed { + Tx = largest_type; + Ty = largest_type; + Tr = largest_type; + } + else if tx.signed && Tx == largest_type { + Ty = largest_type; + Tr = largest_type; + } + else if ty.signed && Ty == largest_type { + Tx = largest_type; + Tr = largest_type; + } + else return false, "Number signedness mismatch."; + + print(">tx:ty:%:%\n", Tx, Ty); + return true; + } +{ + + #if Tr == u64 { MAX :: U64_MAX; MIN :: 0; BITS :: 64; } + #if Tr == u32 { MAX :: U32_MAX; MIN :: 0; BITS :: 32; } + #if Tr == u16 { MAX :: U16_MAX; MIN :: 0; BITS :: 16; } + #if Tr == u8 { MAX :: U8_MAX; MIN :: 0; BITS :: 8; } + #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; BITS :: 63; } + #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; BITS :: 31; } + #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; BITS :: 15; } + #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; BITS :: 7; } - // value_a === b; - // value_b === c; - // add value_a, value_b; - // mov result, value_a; - - // Version 1 - // mov b: gpr === b, value_a; - // mov c: gpr === c, value_b; - // add b, c; - // mov result, b; - // seto flags; - // cmovns b, c; - -// s64b sat_adds64b(s64b x, s64b y) -// { -// u64b ux = x; -// u64b uy = y; -// u64b res = ux + uy; -// -// ux = (ux >> 63) + LONG_MAX; -// -// /* Force compiler to use cmovns instruction */ -// if ((s64b) ((ux ^ uy) | ~(uy ^ res)) >= 0) -// { -// res = ux; -// } -// -// return res; -// } - // Version 2 - WORKS - mov d: gpr === d, 9223372036854775807; - mov a: gpr === a, value_a; - mov b: gpr === b, value_b; - add a, b; - seto flags; // Flag overflow. - mov result, a; - mov a, value_a; - shr a, 63; - add a, d; - mov c: gpr, value_a; - xor c, b; - xor b, result; - not b; - or c, b; - test c, c; - cmovns result, a; - -// s64b sat_subs64b(s64b x, s64b y) -// { -// u64b ux = x; -// u64b uy = y; -// u64b res = ux - uy; -// -// ux = (ux >> 63) + LONG_MAX; -// -// // Force compiler to use cmovns instruction -// if ((s64b)((ux ^ uy) & (ux ^ res)) < 0) -// { -// res = ux; -// } -// -// return res; -// } - // TODO Use https://godbolt.org/ to help - + #if CPU != .X64 { + if (y > 0 && x > MAX - y) then return MAX, true; + if (y < 0 && x < MIN - y) then return MIN, true; + return x + y, false; + } else { + result: Tr = ---; + overflow: bool = ---; + + #if Tr == s8 { + #asm { // s8 + x === a; + y === b; + mov d: gpr === d, MAX; + mov.b c: gpr === c, x; // TODO Not using .b was erroing. + shr c, BITS; + add c, d; + add.b x, y; // add.b for 8bits + cmovo x, c; + seto overflow; + mov result, x; + } + } + else #if Tr == s32 { + #asm { // s32 + x === a; + y === b; + mov d: gpr === d, MAX; + mov.d c: gpr === c, x; + shr c, BITS; + add c, d; + add.d x, y; // add.d for 32bits + cmovo x, c; + seto overflow; + mov result, x; + } + } + else #if Tr == s64 { + #asm { // s64 + x === a; + y === b; + mov d: gpr === d, MAX; + mov c: gpr === c, x; + shr c, BITS; + add c, d; + // add.b x, y; // 8bits + // add.w x, y; // 16bits + // add.d x, y; // 32bits + // add.q x, y; // 64bits + add x, y; + cmovo x, c; + seto overflow; + mov result, x; + } + } + else #if Tr == u8 { + #asm { // u8 + x === a; + y === b; + d: gpr; + add.b x, y; // add.b for 8bits + setc overflow; + sbb d, d; + or d, x; + mov result, d; + } + } + else #if Tr == u16 { + #asm { // u16 + x === a; + y === b; + d: gpr; + add.w x, y; // add.w for 16bits + setc overflow; + sbb d, d; + or d, x; + mov result, d; + } + } + return result, overflow; + } +} + +sub :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool //#dump + #modify { + return true; + } +{ + #if CPU != .X64 { + if (y < 0 && x > S64_MAX + y) then return S64_MAX, true; + if (y > 0 && x < S64_MIN + y) then return S64_MIN, true; + return x - y, false; + } else { + return x + y, false; // TODO Implement me please. + } } -print("% + % = %\n", value_a, value_b, result); -print("flag: %\n", flags); -return; -*/
\ No newline at end of file |
