aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordam <dam@gudinoff>2023-05-20 03:12:37 +0100
committerdam <dam@gudinoff>2023-05-20 03:12:37 +0100
commite988d2b7eb253fabcd6e57c37f1c8c385032f03c (patch)
treecd91013ca40231d954818dbbdbd526c43fb26ba5
parenta219c3c1ef3d9cc5479426ef48035c44282b3cd2 (diff)
downloadtask-time-tracker-e988d2b7eb253fabcd6e57c37f1c8c385032f03c.tar.zst
task-time-tracker-e988d2b7eb253fabcd6e57c37f1c8c385032f03c.zip
Implemented branchless saturating integer add.
-rw-r--r--Math_Ext.jai307
1 files changed, 96 insertions, 211 deletions
diff --git a/Math_Ext.jai b/Math_Ext.jai
index 44c4d44..b4c8879 100644
--- a/Math_Ext.jai
+++ b/Math_Ext.jai
@@ -2,12 +2,12 @@
#import "Compiler";
#import "Math";
-// #run test_math_ext();
+// TODO Comparing implementaitons using dump
- // TODO Comparing implementaitons using dump
+#run test_math_ext();
-// test_math_ext :: () { set_build_options_dc(.{do_output=false});
-main :: () {
+test_math_ext :: () { set_build_options_dc(.{do_output=false});
+// main :: () {
write_strings("=====================\n", "--- Test Math_Ext ---\n");
@@ -23,7 +23,7 @@ main :: () {
*/
#import "Random";
- add_test :: (x: $Tx, y: $Ty, r: $Tr, t: Type, o: bool) {
+ test_add :: (x: $Tx, y: $Ty, r: $Tr, t: Type, o: bool) {
tr, to := add(cast(Tx)x, cast(Ty)y);
print("add(%): % + % = % : %\n", t, x, y, r, o);
if r != tr print(" > incorrect result value: got % expected %\n", tr, r);
@@ -31,30 +31,34 @@ main :: () {
if o != to print(" > incorrect overflow flag: got % expected %\n", to, o);
}
- // add_test(cast(u8)1, cast(u8)2, 3, u8, false);
- // add_test(cast(u8)255, cast(u8)1, 255, u8, true);
+ // test_add(cast(u8)1, cast(u8)2, 3, u8, false);
+ // test_add(cast(u8)255, cast(u8)1, 255, u8, true);
- add_test(cast( s8) S8_MAX, cast( s8)1, S8_MAX, s8, true);
- add_test(cast(s16)S16_MAX, cast(s16)1, S16_MAX, s16, true);
- add_test(cast(s32)S32_MAX, cast(s32)1, S32_MAX, s32, true);
- add_test(cast(s64)S64_MAX, cast(s64)1, S64_MAX, s64, true);
- // add_test(cast(s32)66, cast(s64)-2, 64, s64, false);
- // add_test(cast(u32)66, cast(s64)4, 70, s64, false);
- // add_test(cast(s32)S32_MAX, cast(s64)1, 2147483648, s64, false);
- // add_test(cast(s32)S32_MAX, cast(s32)1, S32_MAX, s32, true);
- // add_test(cast(s64)S64_MAX, cast(s64)0, S64_MAX, s64, false);
- // add_test(cast(s64)9223372036854775806, cast(s64)1, S64_MAX, s64, false);
- // add_test(cast(s64)9223372036854775806, cast(s64)2, S64_MAX, s64, true);
+ test_add(cast( s8) S8_MAX, cast( s8)1, S8_MAX, s8, true);
+ test_add(cast(s16)S16_MAX, cast( u8)1, S16_MAX, s16, true);
+ test_add(cast(s32)S32_MAX, cast(s32)1, S32_MAX, s32, true);
+ test_add(cast(s64)S64_MAX, cast(u32)1, S64_MAX, s64, true);
- // add_test(cast(u8)7, cast(u8)1, 8, u8, false);
- // add_test(cast(u8)U8_MAX, cast(u8)1, U8_MAX, u8, true);
-
- // add_test(cast(u16)10, cast(u8)3, 13, u16, false);
- // add_test(cast(u8)1, cast(u16)U16_MAX, U16_MAX, u16, true);
-
- return;
+ test_add(cast( u8) U8_MAX, cast( u8)1, U8_MAX, u8, true);
+ test_add(cast(u16)U16_MAX, cast(u16)1, U16_MAX, u16, true);
+ test_add(cast(u32)U32_MAX, cast(u32)1, U32_MAX, u32, true);
+ test_add(cast(u64)U64_MAX, cast(u64)1, U64_MAX, u64, true);
+
+ // test_add(cast(s32)66, cast(s64)-2, 64, s64, false);
+ // test_add(cast(u32)66, cast(s64)4, 70, s64, false);
+ // test_add(cast(s32)S32_MAX, cast(s64)1, 2147483648, s64, false);
+ // test_add(cast(s32)S32_MAX, cast(s32)1, S32_MAX, s32, true);
+ // test_add(cast(s64)S64_MAX, cast(s64)0, S64_MAX, s64, false);
+ // test_add(cast(s64)9223372036854775806, cast(s64)1, S64_MAX, s64, false);
+ // test_add(cast(s64)9223372036854775806, cast(s64)2, S64_MAX, s64, true);
+
+ // test_add(cast(u8)7, cast(u8)1, 8, u8, false);
+ // test_add(cast(u8)U8_MAX, cast(u8)1, U8_MAX, u8, true);
+
+ // test_add(cast(u16)10, cast(u8)3, 13, u16, false);
+ // test_add(cast(u8)1, cast(u16)U16_MAX, U16_MAX, u16, true);
-/*
+/* PERFORMANCE TEST
best_generic: float;
best_asm: float;
for 0..100 {
@@ -65,7 +69,7 @@ main :: () {
best_asm = max(best_asm, perf_asm);
}
- print("generic : %\nasm : %\n", best_generic, best_asm);
+ print("method : ops/usec\ngeneric : %\nasm : %\n", best_generic, best_asm);
performance_test :: () -> sum_size: s64, time_generic: Apollo_Time, time_asm: Apollo_Time {
@@ -79,7 +83,7 @@ main :: () {
sum := 0;
start := current_time_monotonic();
- for numbers sum = old_add(sum, it);
+ for numbers sum = add_bad(sum, it);
time := current_time_monotonic() - start;
sum_asm := 0;
@@ -91,11 +95,10 @@ main :: () {
return SUM_SIZE, time, time_asm;
}
- */
-
+*/
}
-old_add :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool // #dump
+add :: (x: $Tx, y: $Ty) -> result: $Tr, saturated: bool #dump
#modify {
type_info_x := cast(*Type_Info)Tx;
type_info_y := cast(*Type_Info)Ty;
@@ -125,205 +128,87 @@ old_add :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool // #dump
}
else return false, "Number signedness mismatch.";
- print("old>tx:ty:%:%\n", Tx, Ty);
+ print(">tx:ty:%:%\n", Tx, Ty);
return true;
}
{
- #if Tr == u64 { MAX :: U64_MAX; MIN :: 0; BITS :: 64; }
- #if Tr == u32 { MAX :: U32_MAX; MIN :: 0; BITS :: 32; }
- #if Tr == u16 { MAX :: U16_MAX; MIN :: 0; BITS :: 16; }
- #if Tr == u8 { MAX :: U8_MAX; MIN :: 0; BITS :: 8; }
- #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; BITS :: 63; }
- #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; BITS :: 31; }
- #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; BITS :: 15; }
+ // TODO Are these constants really needed?
#if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; BITS :: 7; }
-
- if (y > 0 && x > MAX - y) then return MAX, true;
- if (y < 0 && x < MIN - y) then return MIN, true;
- return x + y, false;
-}
-
-add :: (x: $Tx, y: $Ty) -> result: $Tr, overflow: bool #dump
- #modify {
- type_info_x := cast(*Type_Info)Tx;
- type_info_y := cast(*Type_Info)Ty;
- if type_info_x.type != .INTEGER || type_info_y.type != .INTEGER return false, "Non integers values passed.";
- tx := cast(*Type_Info_Integer)type_info_x;
- ty := cast(*Type_Info_Integer)type_info_y;
-
- largest_type :=
- ifx tx.runtime_size > ty.runtime_size then Tx else
- ifx ty.runtime_size > tx.runtime_size then Ty else
- ifx tx.signed == ty.signed then Tx else
- void;
-
- // Only allow to add different signedness values if largest type is the signed one (as in JAI).
- if tx.signed == ty.signed {
- Tx = largest_type;
- Ty = largest_type;
- Tr = largest_type;
- }
- else if tx.signed && Tx == largest_type {
- Ty = largest_type;
- Tr = largest_type;
- }
- else if ty.signed && Ty == largest_type {
- Tx = largest_type;
- Tr = largest_type;
- }
- else return false, "Number signedness mismatch.";
-
- print(">tx:ty:%:%\n", Tx, Ty);
- return true;
- }
-{
+ #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; BITS :: 15; }
+ #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; BITS :: 31; }
+ #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; BITS :: 63; }
- #if Tr == u64 { MAX :: U64_MAX; MIN :: 0; BITS :: 64; }
- #if Tr == u32 { MAX :: U32_MAX; MIN :: 0; BITS :: 32; }
- #if Tr == u16 { MAX :: U16_MAX; MIN :: 0; BITS :: 16; }
#if Tr == u8 { MAX :: U8_MAX; MIN :: 0; BITS :: 8; }
-
- #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; BITS :: 63; }
- #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; BITS :: 31; }
- #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; BITS :: 15; }
- #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; BITS :: 7; }
+ #if Tr == u16 { MAX :: U16_MAX; MIN :: 0; BITS :: 16; }
+ #if Tr == u32 { MAX :: U32_MAX; MIN :: 0; BITS :: 32; }
+ #if Tr == u64 { MAX :: U64_MAX; MIN :: 0; BITS :: 64; }
#if CPU != .X64 {
- if (y > 0 && x > MAX - y) then return MAX, true;
- if (y < 0 && x < MIN - y) then return MIN, true;
- return x + y, false;
+
+ #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 {
+ if (y > 0 && x > MAX - y) then return MAX, true;
+ if (y < 0 && x < MIN - y) then return MIN, true;
+ return x + y, false;
+ } else {
+ if (x > MAX - y) then return MAX, true;
+ return x + y, false;
+ }
+
} else {
+
+ #import "String";
result: Tr = ---;
- overflow: bool = ---;
+ saturated: bool = ---;
-#import "String";
- ADD_ASM :: #string DONE
- #asm { // s8
- x === a;
- y === b;
- mov max: gpr === d, MAX;
- mov.SIZE c: gpr === c, x; // TODO Not using .b was erroing.
- shr c, BITS;
- add c, max;
- add.SIZE x, y; // add.b for 8bits
- cmovo x, c;
- seto overflow;
+ S_ADD_ASM :: #string DONE
+ #asm {
+ // Calculate limit based on x's sign.
+ mov limit: gpr, MAX;
+ mov sign: gpr, x;
+ shr sign, BITS;
+ add.SIZE limit, sign;
+
mov result, x;
+ add.SIZE result, y;
+ seto saturated;
+ cmovo result, limit;
}
- DONE
-
- #if Tr == s8
- #insert #run replace(replace(replace(ADD_ASM, ".SIZE", ".b"), "MAX", "127"), "BITS", "7");
- #if Tr == s16
- #insert #run replace(replace(replace(ADD_ASM, ".SIZE", ".w"), "MAX", "32767"), "BITS", "15");
- #if Tr == s32
- #insert #run replace(replace(replace(ADD_ASM, ".SIZE", ".d"), "MAX", "2147483647"), "BITS", "31");
- #if Tr == s64
- #insert #run replace(replace(replace(ADD_ASM, ".SIZE", ".q"), "MAX", "9223372036854775807"), "BITS", "63");
-
-
- // TODO
- // WIP - doing a similar process for unsigned saturation add
- else #if Tr == u8 {
- #asm { // u8
- x === a;
- y === b;
- d: gpr;
- add.b x, y; // add.b for 8bits
- setc overflow;
- sbb d, d;
- or d, x;
- mov result, d;
- }
- }
- else #if Tr == u16 {
- #asm { // u16
- x === a;
- y === b;
- d: gpr;
- add.w x, y; // add.w for 16bits
- setc overflow;
- sbb d, d;
- or d, x;
- mov result, d;
- }
- }
+ DONE
+ #if Tr == s8
+ #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".b"), "MAX", "127"), "BITS", "7");
+ #if Tr == s16
+ #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".w"), "MAX", "32767"), "BITS", "15");
+ #if Tr == s32
+ #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".d"), "MAX", "2147483647"), "BITS", "31");
+ #if Tr == s64
+ #insert #run replace(replace(replace(S_ADD_ASM, ".SIZE", ".q"), "MAX", "9223372036854775807"), "BITS", "63");
- // #asm { // s8
- // x === a;
- // y === b;
- // mov d: gpr === d, MAX;
- // mov.b c: gpr === c, x; // TODO Not using .b was erroing.
- // shr c, BITS;
- // add c, d;
- // add.b x, y; // add.b for 8bits
- // cmovo x, c;
- // seto overflow;
- // mov result, x;
- // }
- // }
- /*
- else #if Tr == s32 {
- #asm { // s32
- x === a;
- y === b;
- mov d: gpr === d, MAX;
- mov.d c: gpr === c, x;
- shr c, BITS;
- add c, d;
- add.d x, y; // add.d for 32bits
- cmovo x, c;
- seto overflow;
- mov result, x;
- }
- }
- else #if Tr == s64 {
- #asm { // s64
- x === a;
- y === b;
- mov d: gpr === d, MAX;
- mov c: gpr === c, x;
- shr c, BITS;
- add c, d;
- // add.b x, y; // 8bits
- // add.w x, y; // 16bits
- // add.d x, y; // 32bits
- // add.q x, y; // 64bits
- add x, y;
- cmovo x, c;
- seto overflow;
+
+ U_ADD_ASM :: #string DONE
+ #asm { // s8
+ mov limit: gpr, MAX;
mov result, x;
+ add.SIZE result, y;
+ setc saturated;
+ cmovc result, limit;
}
- }
- else #if Tr == u8 {
- #asm { // u8
- x === a;
- y === b;
- d: gpr;
- add.b x, y; // add.b for 8bits
- setc overflow;
- sbb d, d;
- or d, x;
- mov result, d;
- }
- }
- else #if Tr == u16 {
- #asm { // u16
- x === a;
- y === b;
- d: gpr;
- add.w x, y; // add.w for 16bits
- setc overflow;
- sbb d, d;
- or d, x;
- mov result, d;
- }
- }
- */
- return result, overflow;
+ DONE
+
+ #if Tr == u8
+ #insert #run replace(replace(U_ADD_ASM, ".SIZE", ".b"), "MAX", "255");
+ #if Tr == u16
+ #insert #run replace(replace(U_ADD_ASM, ".SIZE", ".w"), "MAX", "65535");
+ #if Tr == u32
+ #insert #run replace(replace(U_ADD_ASM, ".SIZE", ".d"), "MAX", "4294967295");
+ #if Tr == u64
+ #insert #run replace(replace(U_ADD_ASM, ".SIZE", ".q"), "MAX", "18446744073709551615");
+
+
+ return result, saturated;
+
}
}