aboutsummaryrefslogtreecommitdiff
path: root/Saturation
diff options
context:
space:
mode:
authordam <dam@gudinoff>2024-05-26 01:22:59 +0100
committerdam <dam@gudinoff>2024-08-29 10:03:35 +0100
commit7d358263c8b7dd154f2e7f049659f4c706ab5b75 (patch)
treed72ad3b62f25d8927b94ec16da4ebd095f5a8e85 /Saturation
downloadjai-modules-7d358263c8b7dd154f2e7f049659f4c706ab5b75.tar.zst
jai-modules-7d358263c8b7dd154f2e7f049659f4c706ab5b75.zip
Saturation, TUI, and UTF8 version 1.0.
Diffstat (limited to 'Saturation')
-rw-r--r--Saturation/module.jai418
-rw-r--r--Saturation/tests.jai647
2 files changed, 1065 insertions, 0 deletions
diff --git a/Saturation/module.jai b/Saturation/module.jai
new file mode 100644
index 0000000..50c9b3c
--- /dev/null
+++ b/Saturation/module.jai
@@ -0,0 +1,418 @@
+// Integer saturating arighmetic (with assembly branch-free procedures for x64 - expecting signed values in two's complement).
+
+#module_parameters(PREFER_BRANCH_FREE_CODE := false);
+
+#import "Basic";
+#import "Math";
+#import "String";
+
+
+INTEGER_ARITHMETIC_TYPES_CHECK :: #string DONE
+ type_info_x := cast(*Type_Info)Tx;
+ type_info_y := cast(*Type_Info)Ty;
+ if type_info_x.type != .INTEGER || type_info_y.type != .INTEGER return false, "Non integers values passed.";
+ tx := cast(*Type_Info_Integer)type_info_x;
+ ty := cast(*Type_Info_Integer)type_info_y;
+
+ largest_type :=
+ ifx tx.runtime_size > ty.runtime_size then Tx else
+ ifx ty.runtime_size > tx.runtime_size then Ty else
+ ifx tx.signed == ty.signed then Tx else
+ void;
+
+ // Only allow to add different signedness values if largest type is the signed one (as in JAI).
+ if tx.signed == ty.signed {
+ Tx = largest_type;
+ Ty = largest_type;
+ Tr = largest_type;
+ }
+ else if tx.signed && Tx == largest_type {
+ Ty = largest_type;
+ Tr = largest_type;
+ }
+ else if ty.signed && Ty == largest_type {
+ Tx = largest_type;
+ Tr = largest_type;
+ }
+ else return false, "Number signedness mismatch.";
+
+ return true;
+DONE
+
+add :: (x: $Tx, y: $Ty, $prefer_branch_free_code := PREFER_BRANCH_FREE_CODE) -> result: $Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; }
+{
+
+ #if !(prefer_branch_free_code && CPU == .X64) {
+
+ #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 {
+
+ #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; }
+ #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; }
+ #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; }
+ #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; }
+
+ if (y > 0 && x > MAX - y) then return MAX, true;
+ if (y < 0 && x < MIN - y) then return MIN, true;
+
+ } else {
+
+ #if Tr == u8 { MAX :: U8_MAX; }
+ #if Tr == u16 { MAX :: U16_MAX; }
+ #if Tr == u32 { MAX :: U32_MAX; }
+ #if Tr == u64 { MAX :: U64_MAX; }
+
+ if (x > MAX - y) then return MAX, true;
+
+ }
+
+ return x + y, false;
+
+ } else {
+
+ result: Tr = ---;
+ saturated: bool = ---;
+
+
+ ADD_SIGNED_ASM :: #string DONE
+ #asm {
+ mov result, -1; // Pre-set result with signed maximum (set all bits...
+ shr.SIZE result, 1; // ...then, clear MSB).
+ bt x, SIGN_BIT; // Test sign bit (affect CF).
+ adc result, 0; // Overflow signed maximum to signed minimum if CF is set.
+
+ add.SIZE x, y; // Add values (affect OF).
+ seto saturated; // Set saturated flag if OF.
+ cmovno result, x; // Move add-result to result if NOT OF.
+ }
+ DONE
+
+ #if Tr == s8
+ #insert #run replace(replace(ADD_SIGNED_ASM, ".SIZE", ".b"), "SIGN_BIT", "7");
+ #if Tr == s16
+ #insert #run replace(replace(ADD_SIGNED_ASM, ".SIZE", ".w"), "SIGN_BIT", "15");
+ #if Tr == s32
+ #insert #run replace(replace(ADD_SIGNED_ASM, ".SIZE", ".d"), "SIGN_BIT", "31");
+ #if Tr == s64
+ #insert #run replace(replace(ADD_SIGNED_ASM, ".SIZE", ".q"), "SIGN_BIT", "63");
+
+
+ ADD_UNSIGNED_ASM :: #string DONE
+ #asm {
+ mov result, -1; // Pre-set result with unsigned maximum.
+ add.SIZE x, y; // Add values (affect CF).
+ setc saturated; // Set saturated flag if CF.
+ cmovnc result, x; // Move add-result to result if NOT CF.
+ }
+ DONE
+
+ #if Tr == u8
+ #insert #run replace(ADD_UNSIGNED_ASM, ".SIZE", ".b");
+ #if Tr == u16
+ #insert #run replace(ADD_UNSIGNED_ASM, ".SIZE", ".w");
+ #if Tr == u32
+ #insert #run replace(ADD_UNSIGNED_ASM, ".SIZE", ".d");
+ #if Tr == u64
+ #insert #run replace(ADD_UNSIGNED_ASM, ".SIZE", ".q");
+
+
+ return result, saturated;
+
+ }
+}
+
+sub :: (x: $Tx, y: $Ty, $prefer_branch_free_code := PREFER_BRANCH_FREE_CODE) -> result: $Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; }
+{
+
+ #if !(prefer_branch_free_code && CPU == .X64) {
+
+ #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 {
+
+ #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; }
+ #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; }
+ #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; }
+ #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; }
+
+ if (y < 0 && x > MAX + y) then return MAX, true;
+ if (y > 0 && x < MIN + y) then return MIN, true;
+
+ } else {
+
+ if (y > x) then return 0, true;
+
+ }
+
+ return x - y, false;
+
+ } else {
+
+ result: Tr = ---;
+ saturated: bool = ---;
+
+
+ SUB_SIGNED_ASM :: #string DONE
+ #asm {
+ mov result, -1; // Pre-set result with signed maximum (set all bits...
+ shr.SIZE result, 1; // ...then, clear MSB).
+ bt x, SIGN_BIT; // Test signal bit (affect CF).
+ adc result, 0; // Overflow signed maximum to signed minimum if CF is set.
+
+ sub.SIZE x, y; // Subtract values (affect OF).
+ seto saturated; // Set saturated flag if OF.
+ cmovno result, x; // Move subtract-result to result if NOT OF.
+ }
+ DONE
+
+ #if Tr == s8
+ #insert #run replace(replace(SUB_SIGNED_ASM, ".SIZE", ".b"), "SIGN_BIT", "7");
+ #if Tr == s16
+ #insert #run replace(replace(SUB_SIGNED_ASM, ".SIZE", ".w"), "SIGN_BIT", "15");
+ #if Tr == s32
+ #insert #run replace(replace(SUB_SIGNED_ASM, ".SIZE", ".d"), "SIGN_BIT", "31");
+ #if Tr == s64
+ #insert #run replace(replace(SUB_SIGNED_ASM, ".SIZE", ".q"), "SIGN_BIT", "63");
+
+
+ SUB_UNSIGNED_ASM :: #string DONE
+ #asm {
+ xor result, result; // Pre-set result with usigned minimum (zero).
+ sub.SIZE x, y; // Subtract values (affect CF).
+ setc saturated; // Set saturated flag if CF.
+ cmovnc result, x; // Move subtract-result to result if NOT CF.
+ }
+ DONE
+
+ #if Tr == u8
+ #insert #run replace(SUB_UNSIGNED_ASM, ".SIZE", ".b");
+ #if Tr == u16
+ #insert #run replace(SUB_UNSIGNED_ASM, ".SIZE", ".w");
+ #if Tr == u32
+ #insert #run replace(SUB_UNSIGNED_ASM, ".SIZE", ".d");
+ #if Tr == u64
+ #insert #run replace(SUB_UNSIGNED_ASM, ".SIZE", ".q");
+
+
+ return result, saturated;
+
+ }
+
+}
+
+mul :: (x: $Tx, y: $Ty, $prefer_branch_free_code := PREFER_BRANCH_FREE_CODE) -> result: $Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; }
+{
+
+ #if !(prefer_branch_free_code && CPU == .X64) {
+
+ #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 {
+
+ #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; }
+ #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; }
+ #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; }
+ #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; }
+
+ if x == 0 || y == 0 then return 0, false;
+ if x > 0 && y > 0 && x > MAX / y then return MAX, true;
+ if x < 0 && y < 0 && x < MAX / y then return MAX, true;
+ if (y < 0 && x > 0 && y < MIN / x) || (x < 0 && y > 0 && x < MIN / y) then return MIN, true;
+
+ } else {
+
+ #if Tr == u8 { MAX :: U8_MAX; }
+ #if Tr == u16 { MAX :: U16_MAX; }
+ #if Tr == u32 { MAX :: U32_MAX; }
+ #if Tr == u64 { MAX :: U64_MAX; }
+
+ if x == 0 || y == 0 then return 0, false;
+ if x > MAX / y then return MAX, true;
+
+ }
+
+ return x * y, false;
+
+ } else {
+
+ result: Tr = ---;
+ saturated: bool = ---;
+
+ MUL_SIGNED_ASM :: #string DONE
+ #asm {
+ // Using two copies of the x value (x_, sign) seems to be a bit faster (not sure why).
+ mov x_: gpr === a, x; // Pin copy of x value to register A.
+
+ mov result, -1; // Pre-set result with signed maximum (set all bits...
+ shr.SIZE result, 1; // ...then, clear MSB).
+ mov sign:, x; // Use copy of x value.
+ xor sign, y; // Calculate result signal bit using xor.
+ bt sign, SIGN_BIT; // Test signal bit (affect CF).
+ adc result, 0; // Overflow signed maximum to signed minimum if CF is set.
+
+ imul.SIZE x_, y; // Multiply values (affect OF).
+ seto saturated; // Set saturated flag if OF.
+ cmovno result, x_; // Move multiply-result to result if NOT OF.
+ }
+ DONE
+
+ #if Tr == s8
+ #insert #run replace(replace(MUL_SIGNED_ASM, ".SIZE", ".b"), "SIGN_BIT", "7");
+ #if Tr == s16
+ #insert #run replace(replace(MUL_SIGNED_ASM, ".SIZE", ".w"), "SIGN_BIT", "15");
+ #if Tr == s32
+ #insert #run replace(replace(MUL_SIGNED_ASM, ".SIZE", ".d"), "SIGN_BIT", "31");
+ #if Tr == s64
+ #insert #run replace(replace(MUL_SIGNED_ASM, ".SIZE", ".q"), "SIGN_BIT", "63");
+
+
+ MUL_UNSIGNED_ASM :: #string DONE
+ #asm {
+ result === a; // Pin result to register A.
+
+ mov result, x; // Move value x to result.
+ mul.SIZE reg_d:, result, y; // Multiply values (affect CF).
+ setc saturated; // Set saturated flag if CF.
+ sbb mask:, mask; // If CF: mask = -1 (all bits set); else: mask = 0.
+ or result, mask; // If CF was set, then result will be set to unsigned maximum (all bits set).
+ }
+ DONE
+
+ #if Tr == u8
+ #insert #run replace(replace(MUL_UNSIGNED_ASM, ".SIZE", ".b"), "reg_d:,", ""); // For 8bits mul, we do not need D register.
+ #if Tr == u16
+ #insert #run replace(MUL_UNSIGNED_ASM, ".SIZE", ".w");
+ #if Tr == u32
+ #insert #run replace(MUL_UNSIGNED_ASM, ".SIZE", ".d");
+ #if Tr == u64
+ #insert #run replace(MUL_UNSIGNED_ASM, ".SIZE", ".q");
+
+
+ return result, saturated;
+
+ }
+}
+
+div :: (x: $Tx, y: $Ty, $prefer_branch_free_code := PREFER_BRANCH_FREE_CODE) -> result: $Tr, remainder: Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; }
+{
+
+ #if !(prefer_branch_free_code && CPU == .X64) {
+
+ #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 {
+
+ #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; }
+ #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; }
+ #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; }
+ #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; }
+
+ if x == MIN && y == -1 then return MAX, -1, true;
+
+ }
+
+ result := x / y;
+ remainder := x - (y * result);
+ return result, remainder, false;
+
+ } else {
+
+ result: Tr = ---;
+ remainder: Tr = ---;
+ saturated: bool = ---;
+
+ DIV_SIGNED_ASM :: #string DONE
+ #asm {
+ result === a; // Pin result to register A (to be used as dividend on idiv).
+ remainder === d; // Pin remainder to register D.
+
+ xor saturated, saturated; // Clear saturated.
+
+ // Detect div(MIN/-1) and flag it on ZF.
+ mov t_dividend:, -1; // Pre-set t_dividend with signed minimum (set all bits...
+ shr.SIZE t_dividend, 1; // ...then, clear MSB...
+ not t_dividend; // ...then, negate to obtain MSB set and all other bits cleared).
+ //
+ mov limit:, t_dividend; // Keep copy of signed minimum on limit.
+ add limit, 1; // Set limit as signed minimum + 1.
+ //
+ xor.SIZE t_dividend, x; // Clear dividend if x value is equal to signed minimum.
+ //
+ mov t_divisor:, -1; // Pre-set test_divisor with -1.
+ xor.SIZE t_divisor, y; // Clear test_divisor if y value is equal to -1.
+ //
+ or.SIZE t_dividend, t_divisor; // Or t_dividend with t_divisor (affect ZF).
+
+ setz saturated; // Set saturated flag if ZF.
+ mov result, x; // Copy x value to result (dividend).
+ cmovz result, limit; // If ZF: copy limit (signed minimum + 1) to result (dividend).
+
+ DIVIDE_PLACEHOLDER
+
+ sub.SIZE remainder, saturated; // If saturated: remainder = 0 - 1; otherwise: remainder = x - 0.
+ }
+ DONE
+
+ DIV_SIGNED_CALC_8BITS :: #string DONE
+ cbw result; // Prepare dividend high bits (sign-extend).
+ idiv.SIZE result, y; // Divide values.
+ mov remainder, result; // Extract remainder from result's high bits.
+ sar remainder, 8; // Shift remainder from high to low bits.
+ DONE
+
+ DIV_SIGNED_CALC_16BITS :: #string DONE
+ cwd remainder, result; // Prepare dividend high bits (sign-extend).
+ idiv.SIZE remainder, result, y; // Divide values.
+ DONE
+
+ DIV_SIGNED_CALC_32BITS :: #string DONE
+ cdq remainder, result; // Prepare dividend high bits (sign-extend).
+ idiv.SIZE remainder, result, y; // Divide values.
+ DONE
+
+ DIV_SIGNED_CALC_64BITS :: #string DONE
+ cqo remainder, result; // Prepare dividend high bits (sign-extend).
+ idiv.SIZE remainder, result, y; // Divide values.
+ DONE
+
+ #if Tr == s8
+ #insert #run replace(replace(DIV_SIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_SIGNED_CALC_8BITS), ".SIZE", ".b");
+ #if Tr == s16
+ #insert #run replace(replace(DIV_SIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_SIGNED_CALC_16BITS), ".SIZE", ".w");
+ #if Tr == s32
+ #insert #run replace(replace(DIV_SIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_SIGNED_CALC_32BITS), ".SIZE", ".d");
+ #if Tr == s64
+ #insert #run replace(replace(DIV_SIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_SIGNED_CALC_64BITS), ".SIZE", ".q");
+
+
+ DIV_UNSIGNED_ASM :: #string DONE
+ #asm {
+ result === a; // Pin result to register A.
+ remainder === d; // Pin remainder to register D.
+
+ xor result, result; // Clear result.
+ xor remainder, remainder; // Clear remainder (required when used as dividend's high bits).
+ xor saturated, saturated; // Clear saturated (unsigned division never saturates).
+ mov.SIZE result, x; // Copy x value to result.
+
+ DIVIDE_PLACEHOLDER
+ }
+ DONE
+
+ DIV_UNSIGNED_CALC_8BITS :: #string DONE
+ div.SIZE result, y; // Divide values.
+ mov remainder, result; // Extract remainder from result's high bits.
+ sar remainder, 8; // Shift remainder from high to low bits.
+ DONE
+
+ DIV_UNSIGNED_CALC :: #string DONE
+ div.SIZE remainder, result, y; // Divide values.
+ DONE
+
+ #if Tr == u8
+ #insert #run replace(replace(DIV_UNSIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_UNSIGNED_CALC_8BITS), ".SIZE", ".b");
+ #if Tr == u16
+ #insert #run replace(replace(DIV_UNSIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_UNSIGNED_CALC), ".SIZE", ".w");
+ #if Tr == u32
+ #insert #run replace(replace(DIV_UNSIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_UNSIGNED_CALC), ".SIZE", ".d");
+ #if Tr == u64
+ #insert #run replace(replace(DIV_UNSIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_UNSIGNED_CALC), ".SIZE", ".q");
+
+
+ return result, remainder, saturated;
+
+ }
+}
diff --git a/Saturation/tests.jai b/Saturation/tests.jai
new file mode 100644
index 0000000..2a82300
--- /dev/null
+++ b/Saturation/tests.jai
@@ -0,0 +1,647 @@
+// Tests for integer saturating arighmetic procedures.
+
+AVOID_INFINITE_FOR_LOOP :: true;
+
+#import "Basic";
+#import "Compiler";
+#import "Math";
+#import "String";
+#import "Saturation";
+
+main :: () {
+
+ write_strings(
+ "#=======================#\n",
+ "# Basic tests #\n"
+ );
+
+
+ test_op :: ($operation: string, x: $Tx, y: $Ty, result: $Tr, type: Type, saturated: bool, remainder: Tr = 0) -> errors_found: int #expand {
+
+ #insert #run () -> string {
+ // Build test call.
+ builder: String_Builder;
+ call := ifx operation == "div"
+ then "t_result, t_remainder, t_saturated := %(cast(Tx)x, cast(Ty)y);"
+ else "t_result, t_saturated := %(cast(Tx)x, cast(Ty)y);";
+
+ print(*builder, call, operation);
+
+ return builder_to_string(*builder);
+ }();
+
+ errors := 0;
+ log: String_Builder;
+ if result != t_result { errors += 1; print(*log, " > incorrect result value: got % expected %\n", t_result, result); };
+ if type != type_of(t_result) { errors += 1; print(*log, " > incorrect result type: got % expected %\n", type_of(t_result), type); };
+ if saturated != t_saturated { errors += 1; print(*log, " > incorrect saturated flag: got % expected %\n", t_saturated, saturated); };
+ #if operation == "div" {
+ if remainder != t_remainder { errors += 1; print(*log, " > incorrect remainder value: got % expected %\n", t_remainder, remainder); };
+ }
+
+ if errors > 0 {
+ #if operation == "div" {
+ print("%_%(%, %) = % + %0%0\n", operation, type, x, y, result, remainder, ifx saturated then " : saturated");
+ }
+ else {
+ print("%_%(%, %) = %0%0\n", operation, type, x, y, result, ifx saturated then " : saturated");
+ }
+ write_builder(*log);
+ }
+
+ return errors;
+ }
+
+ errors := 0;
+
+ // Test signed add.
+ errors += test_op("add", cast( s8) S8_MAX, cast( s8)1, S8_MAX, s8, true);
+ errors += test_op("add", cast(s16)S16_MAX, cast( u8)1, S16_MAX, s16, true);
+ errors += test_op("add", cast(s32)S32_MAX, cast(s32)1, S32_MAX, s32, true);
+ errors += test_op("add", cast(s64)S64_MAX, cast(u32)1, S64_MAX, s64, true);
+
+ errors += test_op("add", cast( s8) S8_MAX, cast( s8) S8_MIN, -1, s8, false);
+ errors += test_op("add", cast(s16)S16_MAX, cast(s16)S16_MIN, -1, s16, false);
+ errors += test_op("add", cast(s32)S32_MAX, cast(s32)S32_MIN, -1, s32, false);
+ errors += test_op("add", cast(s64)S64_MAX, cast(s64)S64_MIN, -1, s64, false);
+
+ // Test unsigned add.
+ errors += test_op("add", cast( u8) U8_MAX, cast( u8)1, U8_MAX, u8, true);
+ errors += test_op("add", cast(u16)U16_MAX, cast(u16)1, U16_MAX, u16, true);
+ errors += test_op("add", cast(u32)U32_MAX, cast(u32)1, U32_MAX, u32, true);
+ errors += test_op("add", cast(u64)U64_MAX, cast(u64)1, U64_MAX, u64, true);
+
+ errors += test_op("add", cast( u8) U8_MAX, cast( u8)0, U8_MAX, u8, false);
+ errors += test_op("add", cast(u16)U16_MAX, cast(u16)0, U16_MAX, u16, false);
+ errors += test_op("add", cast(u32)U32_MAX, cast(u32)0, U32_MAX, u32, false);
+ errors += test_op("add", cast(u64)U64_MAX, cast(u64)0, U64_MAX, u64, false);
+
+ // Test signed sub.
+ errors += test_op("sub", cast( s8) S8_MIN, cast( s8)1, S8_MIN, s8, true);
+ errors += test_op("sub", cast(s16)S16_MIN, cast( u8)1, S16_MIN, s16, true);
+ errors += test_op("sub", cast(s32)S32_MIN, cast(s32)1, S32_MIN, s32, true);
+ errors += test_op("sub", cast(s64)S64_MIN, cast(u32)1, S64_MIN, s64, true);
+
+ errors += test_op("sub", cast( s8)-1, cast( s8) S8_MAX, S8_MIN, s8, false);
+ errors += test_op("sub", cast(s16)-1, cast(s16)S16_MAX, S16_MIN, s16, false);
+ errors += test_op("sub", cast(s32)-1, cast(s32)S32_MAX, S32_MIN, s32, false);
+ errors += test_op("sub", cast(s64)-1, cast(s64)S64_MAX, S64_MIN, s64, false);
+
+ // Test unsigned sub.
+ errors += test_op("sub", cast( u8)1, cast( u8) U8_MAX, 0, u8, true);
+ errors += test_op("sub", cast( u8)1, cast(u16)U16_MAX, 0, u16, true);
+ errors += test_op("sub", cast(u32)1, cast(u32)U32_MAX, 0, u32, true);
+ errors += test_op("sub", cast(u32)1, cast(u64)U64_MAX, 0, u64, true);
+
+ errors += test_op("sub", cast( u8) U8_MAX, cast( u8)0, U8_MAX, u8, false);
+ errors += test_op("sub", cast(u16)U16_MAX, cast( u8)0, U16_MAX, u16, false);
+ errors += test_op("sub", cast(u32)U32_MAX, cast(u32)0, U32_MAX, u32, false);
+ errors += test_op("sub", cast(u64)U64_MAX, cast(u32)0, U64_MAX, u64, false);
+
+ // Test signed mul.
+ errors += test_op("mul", cast( s8) S8_MIN, cast( s8)-1, S8_MAX, s8, true);
+ errors += test_op("mul", cast(s16)S16_MIN, cast( s8)-1, S16_MAX, s16, true);
+ errors += test_op("mul", cast(s32)S32_MIN, cast(s32)-1, S32_MAX, s32, true);
+ errors += test_op("mul", cast(s64)S64_MIN, cast(s32)-1, S64_MAX, s64, true);
+
+ errors += test_op("mul", cast( s8) S8_MAX, cast( s8)-2, S8_MIN, s8, true);
+ errors += test_op("mul", cast(s16)S16_MAX, cast( s8)-2, S16_MIN, s16, true);
+ errors += test_op("mul", cast(s32)S32_MAX, cast(s32)-2, S32_MIN, s32, true);
+ errors += test_op("mul", cast(s64)S64_MAX, cast(s32)-2, S64_MIN, s64, true);
+
+ errors += test_op("mul", cast( s8)-2, cast( s8) S8_MAX, S8_MIN, s8, true);
+ errors += test_op("mul", cast( s8)-2, cast(s16)S16_MAX, S16_MIN, s16, true);
+ errors += test_op("mul", cast(s32)-2, cast(s32)S32_MAX, S32_MIN, s32, true);
+ errors += test_op("mul", cast(s32)-2, cast(s64)S64_MAX, S64_MIN, s64, true);
+
+ errors += test_op("mul", cast( s8) S8_MAX, cast( s8)2, S8_MAX, s8, true);
+ errors += test_op("mul", cast(s16)S16_MAX, cast( s8)2, S16_MAX, s16, true);
+ errors += test_op("mul", cast(s32)S32_MAX, cast(s32)2, S32_MAX, s32, true);
+ errors += test_op("mul", cast(s64)S64_MAX, cast(s32)2, S64_MAX, s64, true);
+
+ errors += test_op("mul", cast( s8) S8_MAX, cast( s8)-1, -S8_MAX, s8, false);
+ errors += test_op("mul", cast(s16)S16_MAX, cast( s8)-1, -S16_MAX, s16, false);
+ errors += test_op("mul", cast(s32)S32_MAX, cast(s32)-1, -S32_MAX, s32, false);
+ errors += test_op("mul", cast(s64)S64_MAX, cast(s32)-1, -S64_MAX, s64, false);
+
+ errors += test_op("mul", cast( s8) S8_MAX, cast( s8)0, 0, s8, false);
+ errors += test_op("mul", cast(s16)S16_MAX, cast( u8)0, 0, s16, false);
+ errors += test_op("mul", cast(s32)S32_MAX, cast(s32)0, 0, s32, false);
+ errors += test_op("mul", cast(s64)S64_MAX, cast(u32)0, 0, s64, false);
+
+ // Test unsigned mul.
+ errors += test_op("mul", cast( u8) U8_MAX, cast( u8)1, U8_MAX, u8, false);
+ errors += test_op("mul", cast(u16)U16_MAX, cast( u8)1, U16_MAX, u16, false);
+ errors += test_op("mul", cast(u32)U32_MAX, cast(u32)1, U32_MAX, u32, false);
+ errors += test_op("mul", cast(u64)U64_MAX, cast(u32)1, U64_MAX, u64, false);
+
+ errors += test_op("mul", cast( u8) U8_MAX, cast( u8)2, U8_MAX, u8, true);
+ errors += test_op("mul", cast(u16)U16_MAX, cast( u8)2, U16_MAX, u16, true);
+ errors += test_op("mul", cast(u32)U32_MAX, cast(u32)2, U32_MAX, u32, true);
+ errors += test_op("mul", cast(u64)U64_MAX, cast(u32)2, U64_MAX, u64, true);
+
+ // Test signed div.
+ errors += test_op("div", cast( s8) S8_MIN, cast( s8)-1, S8_MAX, s8, true, -1);
+ errors += test_op("div", cast(s16)S16_MIN, cast( s8)-1, S16_MAX, s16, true, -1);
+ errors += test_op("div", cast(s32)S32_MIN, cast(s32)-1, S32_MAX, s32, true, -1);
+ errors += test_op("div", cast(s64)S64_MIN, cast(s32)-1, S64_MAX, s64, true, -1);
+
+ errors += test_op("div", cast( s8) S8_MAX, cast( s8)-2, - S8_MAX/2, s8, false, 1);
+ errors += test_op("div", cast(s16)S16_MAX, cast( s8)-2, -S16_MAX/2, s16, false, 1);
+ errors += test_op("div", cast(s32)S32_MAX, cast(s32)-2, -S32_MAX/2, s32, false, 1);
+ errors += test_op("div", cast(s64)S64_MAX, cast(s32)-2, -S64_MAX/2, s64, false, 1);
+
+ errors += test_op("div", cast( s8)15, cast( s8)5, 3, s8, false, 0);
+ errors += test_op("div", cast( u8)15, cast(s16)7, 2, s16, false, 1);
+ errors += test_op("div", cast(s16)15, cast(s32)13, 1, s32, false, 2);
+ errors += test_op("div", cast(u16)100, cast(s64)3, 33, s64, false, 1);
+
+ // Test unsigned div.
+ errors += test_op("div", cast( u8) U8_MAX, cast( u8)2, U8_MAX/2, u8, false, 1);
+ errors += test_op("div", cast(u16)U16_MAX, cast( u8)2, U16_MAX/2, u16, false, 1);
+ errors += test_op("div", cast(u32)U32_MAX, cast(u32)2, U32_MAX/2, u32, false, 1);
+ errors += test_op("div", cast(u64)U64_MAX, cast(u32)2, U64_MAX/2, u64, false, 1);
+
+ if errors > 0 print("# Found % %!\n", errors, ifx errors == 1 then "error" else "errors"); else print(" No errors found.\n");
+
+
+ // Test generic agains branch-free alternative.
+ write_strings(
+ "#=======================#\n",
+ "# generic == x64 asm ? #\n"
+ );
+
+
+ full_test :: ($type: Type, test: (a: type, b: type)) {
+ #if type == {
+ case u8;
+ min :u8 = 0;
+ max :u8 = U8_MAX;
+
+ case u16;
+ min :u16 = 0;
+ max :u16 = U16_MAX;
+
+ case s8;
+ min :s8 = S8_MIN;
+ max :s8 = S8_MAX;
+
+ case s16;
+ min :s16 = S16_MIN;
+ max :s16 = S16_MAX;
+
+ case;
+ assert(false, "This will take way too long.");
+ }
+
+ #if !AVOID_INFINITE_FOR_LOOP {
+ for a : min..max {
+ for b : min..max {
+ test(a, b);
+ }
+ }
+ }
+ else {
+ a :type = min;
+ b :type = min;
+ while loop_a := true {
+ while loop_b := true {
+ test(a, b);
+ if b == max then break loop_b; else b += 1;
+ }
+ if a == max then break loop_a; else a += 1;
+ }
+ }
+ }
+
+ partial_test :: ($type: Type, test: (a: type, b: type)) {
+ min, max: type;
+ #if type == {
+ case u8;
+ min = 0;
+ max = U8_MAX;
+
+ case u16;
+ min = 0;
+ max = U16_MAX;
+
+ case u32;
+ min = 0;
+ max = U32_MAX;
+
+ case s8;
+ min = S8_MIN;
+ max = S8_MAX;
+
+ case s16;
+ min = S16_MIN;
+ max = S16_MAX;
+
+ case s32;
+ min = S32_MIN;
+ max = S32_MAX;
+
+ case;
+ assert(false, "This will take way too long.");
+ }
+
+ #if !AVOID_INFINITE_FOR_LOOP {
+ for a: min..max {
+ b := a;
+ c := max - a + min;
+ test(a, b);
+ test(a, c);
+ }
+ }
+ else {
+ a := min;
+ while loop := true {
+ b := a;
+ c := max - a + min;
+ test(a, b);
+ test(a, c);
+ if a == max then break loop; else a += 1;
+ }
+ }
+ }
+
+ minimal_test :: ($type: Type, test: (a: type, b: type)) {
+ #if type == {
+ case u32;
+ min :u32 = 0;
+ mid :u32 = U32_MAX / 2;
+ max :u32 = U32_MAX;
+ range :u32 = cast(u32)U16_MAX * 2048;
+
+ case u64;
+ min :u64 = 0;
+ mid :u64 = U64_MAX / 2;
+ max :u64 = U64_MAX;
+ range :u64 = cast(u64)U16_MAX * 2048;
+
+ case s32;
+ min :s32 = S32_MIN;
+ mid :s32 = (S32_MIN / 2) + (S32_MAX / 2);
+ max :s32 = S32_MAX;
+ range :s32 = cast(s32)S16_MAX * 2048;
+
+ case s64;
+ min :s64 = S64_MIN;
+ mid :s64 = (S64_MIN / 2) + (S64_MAX / 2);
+ max :s64 = S64_MAX;
+ range :s64 = cast(s64)S16_MAX * 2048;
+
+ case;
+ assert(false, "Invalid type % given.", type);
+ }
+
+ #if !AVOID_INFINITE_FOR_LOOP {
+ start, end : type;
+
+ start = min;
+ end = min+range;
+ for a: start..end {
+ b := a;
+ c := end - a + start;
+ test(a, b);
+ test(a, c);
+ }
+
+ start = mid-range;
+ end = mid+range;
+ for a: start..end {
+ b := a;
+ c := end - a + start;
+ test(a, b);
+ test(a, c);
+ }
+
+ start = max-range;
+ end = max;
+ for a: start..end {
+ b := a;
+ c := end - a + start;
+ test(a, b);
+ test(a, c);
+ }
+ }
+ else {
+ start, end, a : type;
+
+ start = min;
+ end = min + range;
+ a = start;
+ while loop := true {
+ b := a;
+ c := end - a + start;
+ test(a, b);
+ test(a, c);
+ if a == end then break loop; else a += 1;
+ }
+
+ start = mid - range;
+ end = mid + range;
+ a = start;
+ while loop := true {
+ b := a;
+ c := end - a + start;
+ test(a, b);
+ test(a, c);
+ if a == end then break loop; else a += 1;
+ }
+
+ start = max - range;
+ end = max;
+ a = start;
+ while loop := true {
+ b := a;
+ c := end - a + start;
+ test(a, b);
+ test(a, c);
+ if a == end then break loop; else a += 1;
+ }
+ }
+ }
+
+
+ // add
+
+ ADD_TEST_TEMPLATE :: (a: $T, b: T) {
+ rT, sT := add(a, b, true);
+ rF, sF := add(a, b, false);
+ assert(rT == rF && sT == sF, "> add(%1, %2, true) = %3,%4 != add(%1, %2, false) = %5,%6\n", a, b, rT, sT, rF, sF);
+ }
+
+ write_string("# testing add,u8 #\n");
+ full_test(u8, ADD_TEST_TEMPLATE);
+
+ write_string("# testing add,u16 #\n");
+ full_test(u16, ADD_TEST_TEMPLATE);
+
+ write_string("# testing add,u32 #\n");
+ partial_test(u32, ADD_TEST_TEMPLATE);
+
+ write_string("# testing add,u64 #\n");
+ minimal_test(u64, ADD_TEST_TEMPLATE);
+
+ write_string("# testing add,s8 #\n");
+ full_test(s8, ADD_TEST_TEMPLATE);
+
+ write_string("# testing add,s16 #\n");
+ full_test(s16, ADD_TEST_TEMPLATE);
+
+ write_string("# testing add,s32 #\n");
+ partial_test(s32, ADD_TEST_TEMPLATE);
+
+ write_string("# testing add,s64 #\n");
+ minimal_test(s64, ADD_TEST_TEMPLATE);
+
+
+ // sub
+
+ SUB_TEST_TEMPLATE :: (a: $T, b: T) {
+ rT, sT := sub(a, b, true);
+ rF, sF := sub(a, b, false);
+ assert(rT == rF && sT == sF, "> sub(%1, %2, true) = %3,%4 != sub(%1, %2, false) = %5,%6\n", a, b, rT, sT, rF, sF);
+ }
+
+ write_string("# testing sub,u8 #\n");
+ full_test(u8, SUB_TEST_TEMPLATE);
+
+ write_string("# testing sub,u16 #\n");
+ full_test(u16, SUB_TEST_TEMPLATE);
+
+ write_string("# testing sub,u32 #\n");
+ partial_test(u32, SUB_TEST_TEMPLATE);
+
+ write_string("# testing sub,u64 #\n");
+ minimal_test(u64, SUB_TEST_TEMPLATE);
+
+ write_string("# testing sub,s8 #\n");
+ full_test(s8, SUB_TEST_TEMPLATE);
+
+ write_string("# testing sub,s16 #\n");
+ full_test(s16, SUB_TEST_TEMPLATE);
+
+ write_string("# testing sub,s32 #\n");
+ partial_test(s32, SUB_TEST_TEMPLATE);
+
+ write_string("# testing sub,s64 #\n");
+ minimal_test(s64, SUB_TEST_TEMPLATE);
+
+
+ // mul
+
+ MUL_TEST_TEMPLATE :: (a: $T, b: T) {
+ rT, sT := mul(a, b, true);
+ rF, sF := mul(a, b, false);
+ assert(rT == rF && sT == sF, "> mul(%1, %2, true) = %3,%4 != mul(%1, %2, false) = %5,%6\n", a, b, rT, sT, rF, sF);
+ }
+
+ write_string("# testing mul,u8 #\n");
+ full_test(u8, MUL_TEST_TEMPLATE);
+
+ write_string("# testing mul,u16 #\n");
+ full_test(u16, MUL_TEST_TEMPLATE);
+
+ write_string("# testing mul,u32 #\n");
+ partial_test(u32, MUL_TEST_TEMPLATE);
+
+ write_string("# testing mul,u64 #\n");
+ minimal_test(u64, MUL_TEST_TEMPLATE);
+
+ write_string("# testing mul,s8 #\n");
+ full_test(s8, MUL_TEST_TEMPLATE);
+
+ write_string("# testing mul,s16 #\n");
+ full_test(s16, MUL_TEST_TEMPLATE);
+
+ write_string("# testing mul,s32 #\n");
+ partial_test(s32, MUL_TEST_TEMPLATE);
+
+ write_string("# testing mul,s64 #\n");
+ minimal_test(s64, MUL_TEST_TEMPLATE);
+
+
+ // div
+
+ DIV_TEST_TEMPLATE :: (a: $T, b: T) {
+ if b == 0 then return;
+ rT, remT, sT := div(a, b, true);
+ rF, remF, sF := div(a, b, false);
+ assert(rT == rF && sT == sF, "> mul(%1, %2, true) = %3,%4,%5 != mul(%1, %2, false) = %6,%7,%8\n", a, b, rT, remT, sT, rF, remF, sF);
+ }
+
+ write_string("# testing div,u8 #\n");
+ full_test(u8, DIV_TEST_TEMPLATE);
+
+ write_string("# testing div,u16 #\n");
+ full_test(u16, DIV_TEST_TEMPLATE);
+
+ write_string("# testing div,u32 #\n");
+ partial_test(u32, DIV_TEST_TEMPLATE);
+
+ write_string("# testing div,u64 #\n");
+ minimal_test(u64, DIV_TEST_TEMPLATE);
+
+ write_string("# testing div,s8 #\n");
+ full_test(s8, DIV_TEST_TEMPLATE);
+
+ write_string("# testing div,s16 #\n");
+ full_test(s16, DIV_TEST_TEMPLATE);
+
+ write_string("# testing div,s32 #\n");
+ partial_test(s32, DIV_TEST_TEMPLATE);
+
+ write_string("# testing div,s64 #\n");
+ minimal_test(s64, DIV_TEST_TEMPLATE);
+
+
+ write_string(" No errors found.\n");
+
+
+ write_strings(
+ "#=======================#\n",
+ "# Benchmarks #\n"
+ );
+
+ #import "Random";
+
+ performance_test :: ($operation: string, $type: Type, print_result: bool = true) -> ops_per_us_gen: float, ops_per_us_asm: float {
+
+ #if type == u8 { MIN :: 0; MAX :: U8_MAX; }
+ #if type == u16 { MIN :: 0; MAX :: U16_MAX; }
+ #if type == u32 { MIN :: 0; MAX :: U32_MAX; }
+ #if type == u64 { MIN :: 0; MAX :: U64_MAX; }
+ #if type == s8 { MIN :: S8_MIN; MAX :: S8_MAX; }
+ #if type == s16 { MIN :: S16_MIN; MAX :: S16_MAX; }
+ #if type == s32 { MIN :: S32_MIN; MAX :: S32_MAX; }
+ #if type == s64 { MIN :: S64_MIN; MAX :: S64_MAX; }
+
+ NUM_TESTS :: 50000;
+ DATA_SIZE_BITS :: 64*1024*8;
+ #if type == s8 || type == u8 then
+ DATA_SIZE :: DATA_SIZE_BITS/8;
+ else #if type == s16 || type == u16 then
+ DATA_SIZE :: DATA_SIZE_BITS/16;
+ else #if type == s32 || type == u32 then
+ DATA_SIZE :: DATA_SIZE_BITS/32;
+ else #if type == s64 || type == u64 then
+ DATA_SIZE :: DATA_SIZE_BITS/64;
+
+ best_gen := 0.0;
+ best_asm := 0.0;
+ numbers_x: [..] type;
+ numbers_y: [..] type;
+ array_reserve(*numbers_x, DATA_SIZE);
+ array_reserve(*numbers_y, DATA_SIZE);
+
+ // Comment the line bellow to use the same "random" values.
+ random_seed(cast(u64)to_nanoseconds(current_time_monotonic()));
+
+ for 0..DATA_SIZE-1 {
+ x := cast(type) random_get_within_range(xx MIN, xx MAX);
+ y := cast(type) random_get_within_range(xx MIN, xx MAX);
+ if y == 0 && operation == "div" {
+ y = 1;
+ }
+ array_add(*numbers_x, x);
+ array_add(*numbers_y, y);
+ }
+
+ for 0..NUM_TESTS-1 {
+
+ r_gen: type = 0;
+ r_asm: type = 0;
+
+ time_gen := current_time_monotonic();
+ for idx: 0..DATA_SIZE-1 #insert #run replace("r_gen ^= OP(numbers_x[idx], numbers_y[idx], false);", "OP", operation);
+ time_gen = current_time_monotonic() - time_gen;
+
+ time_asm := current_time_monotonic();
+ for idx: 0..DATA_SIZE-1 #insert #run replace("r_asm ^= OP(numbers_x[idx], numbers_y[idx], true);", "OP", operation);
+ time_asm = current_time_monotonic() - time_asm;
+
+ assert(r_gen == r_asm);
+
+ perf_gen := cast(float)DATA_SIZE/cast(float)to_nanoseconds(time_gen);
+ perf_asm := cast(float)DATA_SIZE/cast(float)to_nanoseconds(time_asm);
+ best_gen = max(best_gen, perf_gen);
+ best_asm = max(best_asm, perf_asm);
+ }
+
+ tmp_context := context;
+ push_context tmp_context {
+ ff := *context.print_style.default_format_float;
+ ff.zero_removal = .NO;
+ ff.width = 7;
+ ff.trailing_width = 2;
+
+ fi := *context.print_style.default_format_int;
+ fi.minimum_digits = 3;
+
+ if print_result {
+ if type == s8 || type == u8 write_string(" ");
+ print("% | % | % | %\n", type, best_gen, best_asm, cast(int)(100*best_asm/best_gen));
+ }
+ }
+ return best_gen, best_asm;
+ }
+
+ write_strings(
+ " | (ops / nsec) |\n",
+ " T | generic | x64 asm | %\n"
+ );
+
+ write_strings(
+ "--- | ----------------- | ---\n",
+ " | add |\n"
+ );
+ performance_test("add", u8);
+ performance_test("add", u16);
+ performance_test("add", u32);
+ performance_test("add", u64);
+ performance_test("add", s8);
+ performance_test("add", s16);
+ performance_test("add", s32);
+ performance_test("add", s64);
+
+ write_strings(
+ "--- | ----------------- | ---\n",
+ " | sub |\n"
+ );
+ performance_test("sub", u8);
+ performance_test("sub", u16);
+ performance_test("sub", u32);
+ performance_test("sub", u64);
+ performance_test("sub", s8);
+ performance_test("sub", s16);
+ performance_test("sub", s32);
+ performance_test("sub", s64);
+
+ write_strings(
+ "--- | ----------------- | ---\n",
+ " | mul |\n"
+ );
+ performance_test("mul", u8);
+ performance_test("mul", u16);
+ performance_test("mul", u32);
+ performance_test("mul", u64);
+ performance_test("mul", s8);
+ performance_test("mul", s16);
+ performance_test("mul", s32);
+ performance_test("mul", s64);
+
+ write_strings(
+ "--- | ----------------- | ---\n",
+ " | div |\n"
+ );
+ performance_test("div", u8);
+ performance_test("div", u16);
+ performance_test("div", u32);
+ performance_test("div", u64);
+ performance_test("div", s8);
+ performance_test("div", s16);
+ performance_test("div", s32);
+ performance_test("div", s64);
+}