aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordam <dam@gudinoff>2023-05-04 23:41:24 +0100
committerdam <dam@gudinoff>2023-05-04 23:41:24 +0100
commit73a5595f19f7190e65d6338f828d80343dedd378 (patch)
tree3128baf43fda60151d17b634d49d33cb7ce0ea55
parent63a4eca8209598f6dea850c9a9a29516923c76ca (diff)
downloadtask-time-tracker-73a5595f19f7190e65d6338f828d80343dedd378.tar.zst
task-time-tracker-73a5595f19f7190e65d6338f828d80343dedd378.zip
Prototyping branchfree saturating arithmetics.
-rw-r--r--Math_Ext.jai413
1 files changed, 256 insertions, 157 deletions
diff --git a/Math_Ext.jai b/Math_Ext.jai
index e2bba35..bf1e8d6 100644
--- a/Math_Ext.jai
+++ b/Math_Ext.jai
@@ -2,182 +2,281 @@
#import "Compiler";
#import "Math";
-#run test_math_ext();
+// #run test_math_ext();
-test_math_ext :: () {
+ // TODO Comparing implementaitons using dump
- set_build_options_dc(.{do_output=false});
+// test_math_ext :: () { set_build_options_dc(.{do_output=false});
+main :: () {
write_strings("=====================\n", "--- Test Math_Ext ---\n");
- Test_Inputs :: struct(ia: $T, ib: T, ir: T, is: bool) {
- // t: Type;
- a := ia;
- b := ib;
- r := ir;
- s := is;
- };
-
-/*
- tests := Test_Inputs.[
- // .{1, 2, 3, false},
- ];
-
- for * tests {
- result, saturated := add(cast(it.t)it.a, cast(it.t)it.b);
- assert(result == it.r && saturated == it.s, "Failed: % %\n", result, saturated);
+ // Different signals: only works if signaled variable is higher.
+ /*
+ #run cena();
+ cena :: () {
+ a: s64 = -232;
+ b: u32 = 4;
+ c := a+b;
+ print("\n\n--- --- ---\ntttt : % : % + % = %\n--- --- ---\n\n", type_of(c), a, b, c);
}
-*/
+ */
+
+ #import "Random";
+ add_test :: (x: $Tx, y: $Ty, r: $Tr, t: Type, o: bool) {
+ tr, to := add(cast(Tx)x, cast(Ty)y);
+ print("add(%): % + % = % : %\n", t, x, y, r, o);
+ if r != tr print(" > incorrect result value: got % expected %\n", tr, r);
+ if t != type_of(tr) print(" > incorrect result type: got % expected %\n", type_of(tr), t);
+ if o != to print(" > incorrect overflow flag: got % expected %\n", to, o);
+ }
+
+ // add_test(cast(u8)1, cast(u8)2, 3, u8, false);
+ // add_test(cast(u8)255, cast(u8)1, 255, u8, true);
+
+ add_test(cast(s8)S8_MAX, cast(s8)1, S8_MAX, s8, true);
+ // add_test(cast(s32)66, cast(s64)-2, 64, s64, false);
+ // add_test(cast(u32)66, cast(s64)4, 70, s64, false);
+ // add_test(cast(s32)S32_MAX, cast(s64)1, 2147483648, s64, false);
+ // add_test(cast(s32)S32_MAX, cast(s32)1, S32_MAX, s32, true);
+ // add_test(cast(s64)S64_MAX, cast(s64)0, S64_MAX, s64, false);
+ // add_test(cast(s64)9223372036854775806, cast(s64)1, S64_MAX, s64, false);
+ // add_test(cast(s64)9223372036854775806, cast(s64)2, S64_MAX, s64, true);
+
+ // add_test(cast(u8)7, cast(u8)1, 8, u8, false);
+ // add_test(cast(u8)U8_MAX, cast(u8)1, U8_MAX, u8, true);
+
+ // add_test(cast(u16)10, cast(u8)3, 13, u16, false);
+ // add_test(cast(u8)1, cast(u16)U16_MAX, U16_MAX, u16, true);
+
+ return;
- t1a := S64_MAX;
- t1b := 0;
- t1v, t1r := add(t1a, t1b);
- assert(t1v == S64_MAX && t1r == false, "Failed: % %\n", t1v, t1r);
- write_string("t1: OK\n");
+ best_generic: float;
+ best_asm: float;
+ for 0..100 {
+ size, time_generic, time_asm := performance_test();
+ perf_generic := cast(float)size/cast(float)to_microseconds(time_generic);
+ perf_asm := cast(float)size/cast(float)to_microseconds(time_asm);
+ best_generic = max(best_generic, perf_generic);
+ best_asm = max(best_asm, perf_asm);
+ }
- // t2a := S64_MAX;
- // t2b := 1;
- // t2v, t2r := add(t2a, t2b);
- // assert(t2v == S64_MAX && t2r == true, "Failed: % %\n", t2v, t2r);
-}
+ print("generic : %\nasm : %\n", best_generic, best_asm);
+
+ performance_test :: () -> sum_size: s64, time_generic: Apollo_Time, time_asm: Apollo_Time {
+
+ SUM_SIZE := 2000000;
+ numbers: [..] s64;
+ array_reserve(*numbers, SUM_SIZE);
+
+ for 0..SUM_SIZE-1 {
+ array_add(*numbers, cast(s64)random_get());
+ }
+ sum := 0;
+ start := current_time_monotonic();
+ for numbers sum = old_add(sum, it);
+ time := current_time_monotonic() - start;
-add_int64 :: (x :s64, y: s64) -> s64 #dump { // TODO Comparing implementations.
- return
- ifx (y > 0 && x > S64_MAX - y) then S64_MAX else
- ifx (y < 0 && x < S64_MIN - y) then S64_MIN else
- x + y;
-}
+ sum_asm := 0;
+ start_asm := current_time_monotonic();
+ for numbers sum_asm = add(sum_asm, it);
+ time_asm := current_time_monotonic() - start_asm;
+
+ assert(sum == sum_asm);
+
+ return SUM_SIZE, time, time_asm;
+ }
-sub_int64 :: (x :s64, y :s64) -> s64 {
- return
- ifx (y < 0 && x > S64_MAX + y) then S64_MAX else
- ifx (y > 0 && x < S64_MIN + y) then S64_MIN else
- x - y;
}
-add :: (x: s64, y: s64) -> result: s64, saturated: bool #dump { // TODO Comparing implementaitons using dump
- #if CPU == .X64 {
- saturated := false;
- ...test this case...
- if (y > 0 && x > S64_MAX - y) then return S64_MAX, true;
- if (y < 0 && x < S64_MIN - y) then return S64_MIN, true;
- return x + y, false;
- // if (y > 0 && x > S64_MAX - y) then return S64_MAX, true;
- // if (y < 0 && x < S64_MIN - y) then return S64_MIN, true;
- // return x + y, false;
- } else {
- result: s64 = ---;
- flag: bool = ---;
- #asm {
- mov d: gpr === d, 9223372036854775807;
- mov a: gpr === a, x;
- mov b: gpr === b, y;
- add a, b;
- seto flag; // Flag overflow.
- mov result, a;
- mov a, x;
- shr a, 63;
- add a, d;
- mov c: gpr, x;
- xor c, b;
- xor b, result;
- not b;
- or c, b;
- test c, c;
- cmovns result, a;
- }
- return result, flag;
+old_add :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool // #dump
+ #modify {
+ type_info_x := cast(*Type_Info)Tx;
+ type_info_y := cast(*Type_Info)Ty;
+ if type_info_x.type != .INTEGER || type_info_y.type != .INTEGER return false, "Non integers values passed.";
+ tx := cast(*Type_Info_Integer)type_info_x;
+ ty := cast(*Type_Info_Integer)type_info_y;
+
+ largest_type :=
+ ifx tx.runtime_size > ty.runtime_size then Tx else
+ ifx ty.runtime_size > tx.runtime_size then Ty else
+ ifx tx.signed == ty.signed then Tx else
+ void;
+
+ // Only allow to add different signedness values if largest type is the signed one (as in JAI).
+ if tx.signed == ty.signed {
+ Tx = largest_type;
+ Ty = largest_type;
+ Tr = largest_type;
+ }
+ else if tx.signed && Tx == largest_type {
+ Ty = largest_type;
+ Tr = largest_type;
+ }
+ else if ty.signed && Ty == largest_type {
+ Tx = largest_type;
+ Tr = largest_type;
+ }
+ else return false, "Number signedness mismatch.";
+
+ print("old>tx:ty:%:%\n", Tx, Ty);
+ return true;
}
+{
+ #if Tr == u64 { MAX :: U64_MAX; MIN :: 0; BITS :: 64; }
+ #if Tr == u32 { MAX :: U32_MAX; MIN :: 0; BITS :: 32; }
+ #if Tr == u16 { MAX :: U16_MAX; MIN :: 0; BITS :: 16; }
+ #if Tr == u8 { MAX :: U8_MAX; MIN :: 0; BITS :: 8; }
+
+ #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; BITS :: 63; }
+ #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; BITS :: 31; }
+ #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; BITS :: 15; }
+ #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; BITS :: 7; }
+
+ if (y > 0 && x > MAX - y) then return MAX, true;
+ if (y < 0 && x < MIN - y) then return MIN, true;
+ return x + y, false;
}
-/*
-// value_a: s64 = 2;
-// value_b: s64 = S64_MAX-1;
-value_a: s64 = -2;
-value_b: s64 = S64_MIN+1;
-print(">%\n", S64_MAX);
-argx := get_command_line_arguments();
-if argx.count > 1
- value_a = parse_int(*argx[1]);
-if argx.count > 2
- value_b = parse_int(*argx[2]);
+add :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool // #dump
+ #modify {
+ type_info_x := cast(*Type_Info)Tx;
+ type_info_y := cast(*Type_Info)Ty;
+ if type_info_x.type != .INTEGER || type_info_y.type != .INTEGER return false, "Non integers values passed.";
+ tx := cast(*Type_Info_Integer)type_info_x;
+ ty := cast(*Type_Info_Integer)type_info_y;
-result: s64 = ---;
-flags: s64;// = ---;
-#asm LAHF_SAHF { // TODO Remove LAHF_SAHF it not required.
+ largest_type :=
+ ifx tx.runtime_size > ty.runtime_size then Tx else
+ ifx ty.runtime_size > tx.runtime_size then Ty else
+ ifx tx.signed == ty.signed then Tx else
+ void;
- // Code from https://locklessinc.com/articles/sat_arithmetic/
+ // Only allow to add different signedness values if largest type is the signed one (as in JAI).
+ if tx.signed == ty.signed {
+ Tx = largest_type;
+ Ty = largest_type;
+ Tr = largest_type;
+ }
+ else if tx.signed && Tx == largest_type {
+ Ty = largest_type;
+ Tr = largest_type;
+ }
+ else if ty.signed && Ty == largest_type {
+ Tx = largest_type;
+ Tr = largest_type;
+ }
+ else return false, "Number signedness mismatch.";
+
+ print(">tx:ty:%:%\n", Tx, Ty);
+ return true;
+ }
+{
+
+ #if Tr == u64 { MAX :: U64_MAX; MIN :: 0; BITS :: 64; }
+ #if Tr == u32 { MAX :: U32_MAX; MIN :: 0; BITS :: 32; }
+ #if Tr == u16 { MAX :: U16_MAX; MIN :: 0; BITS :: 16; }
+ #if Tr == u8 { MAX :: U8_MAX; MIN :: 0; BITS :: 8; }
+ #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; BITS :: 63; }
+ #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; BITS :: 31; }
+ #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; BITS :: 15; }
+ #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; BITS :: 7; }
- // value_a === b;
- // value_b === c;
- // add value_a, value_b;
- // mov result, value_a;
-
- // Version 1
- // mov b: gpr === b, value_a;
- // mov c: gpr === c, value_b;
- // add b, c;
- // mov result, b;
- // seto flags;
- // cmovns b, c;
-
-// s64b sat_adds64b(s64b x, s64b y)
-// {
-// u64b ux = x;
-// u64b uy = y;
-// u64b res = ux + uy;
-//
-// ux = (ux >> 63) + LONG_MAX;
-//
-// /* Force compiler to use cmovns instruction */
-// if ((s64b) ((ux ^ uy) | ~(uy ^ res)) >= 0)
-// {
-// res = ux;
-// }
-//
-// return res;
-// }
- // Version 2 - WORKS
- mov d: gpr === d, 9223372036854775807;
- mov a: gpr === a, value_a;
- mov b: gpr === b, value_b;
- add a, b;
- seto flags; // Flag overflow.
- mov result, a;
- mov a, value_a;
- shr a, 63;
- add a, d;
- mov c: gpr, value_a;
- xor c, b;
- xor b, result;
- not b;
- or c, b;
- test c, c;
- cmovns result, a;
-
-// s64b sat_subs64b(s64b x, s64b y)
-// {
-// u64b ux = x;
-// u64b uy = y;
-// u64b res = ux - uy;
-//
-// ux = (ux >> 63) + LONG_MAX;
-//
-// // Force compiler to use cmovns instruction
-// if ((s64b)((ux ^ uy) & (ux ^ res)) < 0)
-// {
-// res = ux;
-// }
-//
-// return res;
-// }
- // TODO Use https://godbolt.org/ to help
-
+ #if CPU != .X64 {
+ if (y > 0 && x > MAX - y) then return MAX, true;
+ if (y < 0 && x < MIN - y) then return MIN, true;
+ return x + y, false;
+ } else {
+ result: Tr = ---;
+ overflow: bool = ---;
+
+ #if Tr == s8 {
+ #asm { // s8
+ x === a;
+ y === b;
+ mov d: gpr === d, MAX;
+ mov.b c: gpr === c, x; // TODO Not using .b was erroing.
+ shr c, BITS;
+ add c, d;
+ add.b x, y; // add.b for 8bits
+ cmovo x, c;
+ seto overflow;
+ mov result, x;
+ }
+ }
+ else #if Tr == s32 {
+ #asm { // s32
+ x === a;
+ y === b;
+ mov d: gpr === d, MAX;
+ mov.d c: gpr === c, x;
+ shr c, BITS;
+ add c, d;
+ add.d x, y; // add.d for 32bits
+ cmovo x, c;
+ seto overflow;
+ mov result, x;
+ }
+ }
+ else #if Tr == s64 {
+ #asm { // s64
+ x === a;
+ y === b;
+ mov d: gpr === d, MAX;
+ mov c: gpr === c, x;
+ shr c, BITS;
+ add c, d;
+ // add.b x, y; // 8bits
+ // add.w x, y; // 16bits
+ // add.d x, y; // 32bits
+ // add.q x, y; // 64bits
+ add x, y;
+ cmovo x, c;
+ seto overflow;
+ mov result, x;
+ }
+ }
+ else #if Tr == u8 {
+ #asm { // u8
+ x === a;
+ y === b;
+ d: gpr;
+ add.b x, y; // add.b for 8bits
+ setc overflow;
+ sbb d, d;
+ or d, x;
+ mov result, d;
+ }
+ }
+ else #if Tr == u16 {
+ #asm { // u16
+ x === a;
+ y === b;
+ d: gpr;
+ add.w x, y; // add.w for 16bits
+ setc overflow;
+ sbb d, d;
+ or d, x;
+ mov result, d;
+ }
+ }
+ return result, overflow;
+ }
+}
+
+sub :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool //#dump
+ #modify {
+ return true;
+ }
+{
+ #if CPU != .X64 {
+ if (y < 0 && x > S64_MAX + y) then return S64_MAX, true;
+ if (y > 0 && x < S64_MIN + y) then return S64_MIN, true;
+ return x - y, false;
+ } else {
+ return x + y, false; // TODO Implement me please.
+ }
}
-print("% + % = %\n", value_a, value_b, result);
-print("flag: %\n", flags);
-return;
-*/ \ No newline at end of file