diff options
Diffstat (limited to 'Saturation')
| -rw-r--r-- | Saturation/module.jai | 418 | ||||
| -rw-r--r-- | Saturation/tests.jai | 647 |
2 files changed, 1065 insertions, 0 deletions
diff --git a/Saturation/module.jai b/Saturation/module.jai new file mode 100644 index 0000000..50c9b3c --- /dev/null +++ b/Saturation/module.jai @@ -0,0 +1,418 @@ +// Integer saturating arighmetic (with assembly branch-free procedures for x64 - expecting signed values in two's complement). + +#module_parameters(PREFER_BRANCH_FREE_CODE := false); + +#import "Basic"; +#import "Math"; +#import "String"; + + +INTEGER_ARITHMETIC_TYPES_CHECK :: #string DONE + type_info_x := cast(*Type_Info)Tx; + type_info_y := cast(*Type_Info)Ty; + if type_info_x.type != .INTEGER || type_info_y.type != .INTEGER return false, "Non integers values passed."; + tx := cast(*Type_Info_Integer)type_info_x; + ty := cast(*Type_Info_Integer)type_info_y; + + largest_type := + ifx tx.runtime_size > ty.runtime_size then Tx else + ifx ty.runtime_size > tx.runtime_size then Ty else + ifx tx.signed == ty.signed then Tx else + void; + + // Only allow to add different signedness values if largest type is the signed one (as in JAI). + if tx.signed == ty.signed { + Tx = largest_type; + Ty = largest_type; + Tr = largest_type; + } + else if tx.signed && Tx == largest_type { + Ty = largest_type; + Tr = largest_type; + } + else if ty.signed && Ty == largest_type { + Tx = largest_type; + Tr = largest_type; + } + else return false, "Number signedness mismatch."; + + return true; +DONE + +add :: (x: $Tx, y: $Ty, $prefer_branch_free_code := PREFER_BRANCH_FREE_CODE) -> result: $Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } +{ + + #if !(prefer_branch_free_code && CPU == .X64) { + + #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 { + + #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; } + #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; } + #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; } + #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; } + + if (y > 0 && x > MAX - y) then return MAX, true; + if (y < 0 && x < MIN - y) then return MIN, true; + + } else { + + #if Tr == u8 { MAX :: U8_MAX; } + #if Tr == u16 { MAX :: U16_MAX; } + #if Tr == u32 { MAX :: U32_MAX; } + #if Tr == u64 { MAX :: U64_MAX; } + + if (x > MAX - y) then return MAX, true; + + } + + return x + y, false; + + } else { + + result: Tr = ---; + saturated: bool = ---; + + + ADD_SIGNED_ASM :: #string DONE + #asm { + mov result, -1; // Pre-set result with signed maximum (set all bits... + shr.SIZE result, 1; // ...then, clear MSB). + bt x, SIGN_BIT; // Test sign bit (affect CF). + adc result, 0; // Overflow signed maximum to signed minimum if CF is set. + + add.SIZE x, y; // Add values (affect OF). + seto saturated; // Set saturated flag if OF. + cmovno result, x; // Move add-result to result if NOT OF. + } + DONE + + #if Tr == s8 + #insert #run replace(replace(ADD_SIGNED_ASM, ".SIZE", ".b"), "SIGN_BIT", "7"); + #if Tr == s16 + #insert #run replace(replace(ADD_SIGNED_ASM, ".SIZE", ".w"), "SIGN_BIT", "15"); + #if Tr == s32 + #insert #run replace(replace(ADD_SIGNED_ASM, ".SIZE", ".d"), "SIGN_BIT", "31"); + #if Tr == s64 + #insert #run replace(replace(ADD_SIGNED_ASM, ".SIZE", ".q"), "SIGN_BIT", "63"); + + + ADD_UNSIGNED_ASM :: #string DONE + #asm { + mov result, -1; // Pre-set result with unsigned maximum. + add.SIZE x, y; // Add values (affect CF). + setc saturated; // Set saturated flag if CF. + cmovnc result, x; // Move add-result to result if NOT CF. + } + DONE + + #if Tr == u8 + #insert #run replace(ADD_UNSIGNED_ASM, ".SIZE", ".b"); + #if Tr == u16 + #insert #run replace(ADD_UNSIGNED_ASM, ".SIZE", ".w"); + #if Tr == u32 + #insert #run replace(ADD_UNSIGNED_ASM, ".SIZE", ".d"); + #if Tr == u64 + #insert #run replace(ADD_UNSIGNED_ASM, ".SIZE", ".q"); + + + return result, saturated; + + } +} + +sub :: (x: $Tx, y: $Ty, $prefer_branch_free_code := PREFER_BRANCH_FREE_CODE) -> result: $Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } +{ + + #if !(prefer_branch_free_code && CPU == .X64) { + + #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 { + + #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; } + #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; } + #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; } + #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; } + + if (y < 0 && x > MAX + y) then return MAX, true; + if (y > 0 && x < MIN + y) then return MIN, true; + + } else { + + if (y > x) then return 0, true; + + } + + return x - y, false; + + } else { + + result: Tr = ---; + saturated: bool = ---; + + + SUB_SIGNED_ASM :: #string DONE + #asm { + mov result, -1; // Pre-set result with signed maximum (set all bits... + shr.SIZE result, 1; // ...then, clear MSB). + bt x, SIGN_BIT; // Test signal bit (affect CF). + adc result, 0; // Overflow signed maximum to signed minimum if CF is set. + + sub.SIZE x, y; // Subtract values (affect OF). + seto saturated; // Set saturated flag if OF. + cmovno result, x; // Move subtract-result to result if NOT OF. + } + DONE + + #if Tr == s8 + #insert #run replace(replace(SUB_SIGNED_ASM, ".SIZE", ".b"), "SIGN_BIT", "7"); + #if Tr == s16 + #insert #run replace(replace(SUB_SIGNED_ASM, ".SIZE", ".w"), "SIGN_BIT", "15"); + #if Tr == s32 + #insert #run replace(replace(SUB_SIGNED_ASM, ".SIZE", ".d"), "SIGN_BIT", "31"); + #if Tr == s64 + #insert #run replace(replace(SUB_SIGNED_ASM, ".SIZE", ".q"), "SIGN_BIT", "63"); + + + SUB_UNSIGNED_ASM :: #string DONE + #asm { + xor result, result; // Pre-set result with usigned minimum (zero). + sub.SIZE x, y; // Subtract values (affect CF). + setc saturated; // Set saturated flag if CF. + cmovnc result, x; // Move subtract-result to result if NOT CF. + } + DONE + + #if Tr == u8 + #insert #run replace(SUB_UNSIGNED_ASM, ".SIZE", ".b"); + #if Tr == u16 + #insert #run replace(SUB_UNSIGNED_ASM, ".SIZE", ".w"); + #if Tr == u32 + #insert #run replace(SUB_UNSIGNED_ASM, ".SIZE", ".d"); + #if Tr == u64 + #insert #run replace(SUB_UNSIGNED_ASM, ".SIZE", ".q"); + + + return result, saturated; + + } + +} + +mul :: (x: $Tx, y: $Ty, $prefer_branch_free_code := PREFER_BRANCH_FREE_CODE) -> result: $Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } +{ + + #if !(prefer_branch_free_code && CPU == .X64) { + + #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 { + + #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; } + #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; } + #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; } + #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; } + + if x == 0 || y == 0 then return 0, false; + if x > 0 && y > 0 && x > MAX / y then return MAX, true; + if x < 0 && y < 0 && x < MAX / y then return MAX, true; + if (y < 0 && x > 0 && y < MIN / x) || (x < 0 && y > 0 && x < MIN / y) then return MIN, true; + + } else { + + #if Tr == u8 { MAX :: U8_MAX; } + #if Tr == u16 { MAX :: U16_MAX; } + #if Tr == u32 { MAX :: U32_MAX; } + #if Tr == u64 { MAX :: U64_MAX; } + + if x == 0 || y == 0 then return 0, false; + if x > MAX / y then return MAX, true; + + } + + return x * y, false; + + } else { + + result: Tr = ---; + saturated: bool = ---; + + MUL_SIGNED_ASM :: #string DONE + #asm { + // Using two copies of the x value (x_, sign) seems to be a bit faster (not sure why). + mov x_: gpr === a, x; // Pin copy of x value to register A. + + mov result, -1; // Pre-set result with signed maximum (set all bits... + shr.SIZE result, 1; // ...then, clear MSB). + mov sign:, x; // Use copy of x value. + xor sign, y; // Calculate result signal bit using xor. + bt sign, SIGN_BIT; // Test signal bit (affect CF). + adc result, 0; // Overflow signed maximum to signed minimum if CF is set. + + imul.SIZE x_, y; // Multiply values (affect OF). + seto saturated; // Set saturated flag if OF. + cmovno result, x_; // Move multiply-result to result if NOT OF. + } + DONE + + #if Tr == s8 + #insert #run replace(replace(MUL_SIGNED_ASM, ".SIZE", ".b"), "SIGN_BIT", "7"); + #if Tr == s16 + #insert #run replace(replace(MUL_SIGNED_ASM, ".SIZE", ".w"), "SIGN_BIT", "15"); + #if Tr == s32 + #insert #run replace(replace(MUL_SIGNED_ASM, ".SIZE", ".d"), "SIGN_BIT", "31"); + #if Tr == s64 + #insert #run replace(replace(MUL_SIGNED_ASM, ".SIZE", ".q"), "SIGN_BIT", "63"); + + + MUL_UNSIGNED_ASM :: #string DONE + #asm { + result === a; // Pin result to register A. + + mov result, x; // Move value x to result. + mul.SIZE reg_d:, result, y; // Multiply values (affect CF). + setc saturated; // Set saturated flag if CF. + sbb mask:, mask; // If CF: mask = -1 (all bits set); else: mask = 0. + or result, mask; // If CF was set, then result will be set to unsigned maximum (all bits set). + } + DONE + + #if Tr == u8 + #insert #run replace(replace(MUL_UNSIGNED_ASM, ".SIZE", ".b"), "reg_d:,", ""); // For 8bits mul, we do not need D register. + #if Tr == u16 + #insert #run replace(MUL_UNSIGNED_ASM, ".SIZE", ".w"); + #if Tr == u32 + #insert #run replace(MUL_UNSIGNED_ASM, ".SIZE", ".d"); + #if Tr == u64 + #insert #run replace(MUL_UNSIGNED_ASM, ".SIZE", ".q"); + + + return result, saturated; + + } +} + +div :: (x: $Tx, y: $Ty, $prefer_branch_free_code := PREFER_BRANCH_FREE_CODE) -> result: $Tr, remainder: Tr, saturated: bool #modify { #insert INTEGER_ARITHMETIC_TYPES_CHECK; } +{ + + #if !(prefer_branch_free_code && CPU == .X64) { + + #if Tr == s8 || Tr == s16 || Tr == s32 || Tr == s64 { + + #if Tr == s8 { MAX :: S8_MAX; MIN :: S8_MIN; } + #if Tr == s16 { MAX :: S16_MAX; MIN :: S16_MIN; } + #if Tr == s32 { MAX :: S32_MAX; MIN :: S32_MIN; } + #if Tr == s64 { MAX :: S64_MAX; MIN :: S64_MIN; } + + if x == MIN && y == -1 then return MAX, -1, true; + + } + + result := x / y; + remainder := x - (y * result); + return result, remainder, false; + + } else { + + result: Tr = ---; + remainder: Tr = ---; + saturated: bool = ---; + + DIV_SIGNED_ASM :: #string DONE + #asm { + result === a; // Pin result to register A (to be used as dividend on idiv). + remainder === d; // Pin remainder to register D. + + xor saturated, saturated; // Clear saturated. + + // Detect div(MIN/-1) and flag it on ZF. + mov t_dividend:, -1; // Pre-set t_dividend with signed minimum (set all bits... + shr.SIZE t_dividend, 1; // ...then, clear MSB... + not t_dividend; // ...then, negate to obtain MSB set and all other bits cleared). + // + mov limit:, t_dividend; // Keep copy of signed minimum on limit. + add limit, 1; // Set limit as signed minimum + 1. + // + xor.SIZE t_dividend, x; // Clear dividend if x value is equal to signed minimum. + // + mov t_divisor:, -1; // Pre-set test_divisor with -1. + xor.SIZE t_divisor, y; // Clear test_divisor if y value is equal to -1. + // + or.SIZE t_dividend, t_divisor; // Or t_dividend with t_divisor (affect ZF). + + setz saturated; // Set saturated flag if ZF. + mov result, x; // Copy x value to result (dividend). + cmovz result, limit; // If ZF: copy limit (signed minimum + 1) to result (dividend). + + DIVIDE_PLACEHOLDER + + sub.SIZE remainder, saturated; // If saturated: remainder = 0 - 1; otherwise: remainder = x - 0. + } + DONE + + DIV_SIGNED_CALC_8BITS :: #string DONE + cbw result; // Prepare dividend high bits (sign-extend). + idiv.SIZE result, y; // Divide values. + mov remainder, result; // Extract remainder from result's high bits. + sar remainder, 8; // Shift remainder from high to low bits. + DONE + + DIV_SIGNED_CALC_16BITS :: #string DONE + cwd remainder, result; // Prepare dividend high bits (sign-extend). + idiv.SIZE remainder, result, y; // Divide values. + DONE + + DIV_SIGNED_CALC_32BITS :: #string DONE + cdq remainder, result; // Prepare dividend high bits (sign-extend). + idiv.SIZE remainder, result, y; // Divide values. + DONE + + DIV_SIGNED_CALC_64BITS :: #string DONE + cqo remainder, result; // Prepare dividend high bits (sign-extend). + idiv.SIZE remainder, result, y; // Divide values. + DONE + + #if Tr == s8 + #insert #run replace(replace(DIV_SIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_SIGNED_CALC_8BITS), ".SIZE", ".b"); + #if Tr == s16 + #insert #run replace(replace(DIV_SIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_SIGNED_CALC_16BITS), ".SIZE", ".w"); + #if Tr == s32 + #insert #run replace(replace(DIV_SIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_SIGNED_CALC_32BITS), ".SIZE", ".d"); + #if Tr == s64 + #insert #run replace(replace(DIV_SIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_SIGNED_CALC_64BITS), ".SIZE", ".q"); + + + DIV_UNSIGNED_ASM :: #string DONE + #asm { + result === a; // Pin result to register A. + remainder === d; // Pin remainder to register D. + + xor result, result; // Clear result. + xor remainder, remainder; // Clear remainder (required when used as dividend's high bits). + xor saturated, saturated; // Clear saturated (unsigned division never saturates). + mov.SIZE result, x; // Copy x value to result. + + DIVIDE_PLACEHOLDER + } + DONE + + DIV_UNSIGNED_CALC_8BITS :: #string DONE + div.SIZE result, y; // Divide values. + mov remainder, result; // Extract remainder from result's high bits. + sar remainder, 8; // Shift remainder from high to low bits. + DONE + + DIV_UNSIGNED_CALC :: #string DONE + div.SIZE remainder, result, y; // Divide values. + DONE + + #if Tr == u8 + #insert #run replace(replace(DIV_UNSIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_UNSIGNED_CALC_8BITS), ".SIZE", ".b"); + #if Tr == u16 + #insert #run replace(replace(DIV_UNSIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_UNSIGNED_CALC), ".SIZE", ".w"); + #if Tr == u32 + #insert #run replace(replace(DIV_UNSIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_UNSIGNED_CALC), ".SIZE", ".d"); + #if Tr == u64 + #insert #run replace(replace(DIV_UNSIGNED_ASM, "DIVIDE_PLACEHOLDER", DIV_UNSIGNED_CALC), ".SIZE", ".q"); + + + return result, remainder, saturated; + + } +} diff --git a/Saturation/tests.jai b/Saturation/tests.jai new file mode 100644 index 0000000..2a82300 --- /dev/null +++ b/Saturation/tests.jai @@ -0,0 +1,647 @@ +// Tests for integer saturating arighmetic procedures. + +AVOID_INFINITE_FOR_LOOP :: true; + +#import "Basic"; +#import "Compiler"; +#import "Math"; +#import "String"; +#import "Saturation"; + +main :: () { + + write_strings( + "#=======================#\n", + "# Basic tests #\n" + ); + + + test_op :: ($operation: string, x: $Tx, y: $Ty, result: $Tr, type: Type, saturated: bool, remainder: Tr = 0) -> errors_found: int #expand { + + #insert #run () -> string { + // Build test call. + builder: String_Builder; + call := ifx operation == "div" + then "t_result, t_remainder, t_saturated := %(cast(Tx)x, cast(Ty)y);" + else "t_result, t_saturated := %(cast(Tx)x, cast(Ty)y);"; + + print(*builder, call, operation); + + return builder_to_string(*builder); + }(); + + errors := 0; + log: String_Builder; + if result != t_result { errors += 1; print(*log, " > incorrect result value: got % expected %\n", t_result, result); }; + if type != type_of(t_result) { errors += 1; print(*log, " > incorrect result type: got % expected %\n", type_of(t_result), type); }; + if saturated != t_saturated { errors += 1; print(*log, " > incorrect saturated flag: got % expected %\n", t_saturated, saturated); }; + #if operation == "div" { + if remainder != t_remainder { errors += 1; print(*log, " > incorrect remainder value: got % expected %\n", t_remainder, remainder); }; + } + + if errors > 0 { + #if operation == "div" { + print("%_%(%, %) = % + %0%0\n", operation, type, x, y, result, remainder, ifx saturated then " : saturated"); + } + else { + print("%_%(%, %) = %0%0\n", operation, type, x, y, result, ifx saturated then " : saturated"); + } + write_builder(*log); + } + + return errors; + } + + errors := 0; + + // Test signed add. + errors += test_op("add", cast( s8) S8_MAX, cast( s8)1, S8_MAX, s8, true); + errors += test_op("add", cast(s16)S16_MAX, cast( u8)1, S16_MAX, s16, true); + errors += test_op("add", cast(s32)S32_MAX, cast(s32)1, S32_MAX, s32, true); + errors += test_op("add", cast(s64)S64_MAX, cast(u32)1, S64_MAX, s64, true); + + errors += test_op("add", cast( s8) S8_MAX, cast( s8) S8_MIN, -1, s8, false); + errors += test_op("add", cast(s16)S16_MAX, cast(s16)S16_MIN, -1, s16, false); + errors += test_op("add", cast(s32)S32_MAX, cast(s32)S32_MIN, -1, s32, false); + errors += test_op("add", cast(s64)S64_MAX, cast(s64)S64_MIN, -1, s64, false); + + // Test unsigned add. + errors += test_op("add", cast( u8) U8_MAX, cast( u8)1, U8_MAX, u8, true); + errors += test_op("add", cast(u16)U16_MAX, cast(u16)1, U16_MAX, u16, true); + errors += test_op("add", cast(u32)U32_MAX, cast(u32)1, U32_MAX, u32, true); + errors += test_op("add", cast(u64)U64_MAX, cast(u64)1, U64_MAX, u64, true); + + errors += test_op("add", cast( u8) U8_MAX, cast( u8)0, U8_MAX, u8, false); + errors += test_op("add", cast(u16)U16_MAX, cast(u16)0, U16_MAX, u16, false); + errors += test_op("add", cast(u32)U32_MAX, cast(u32)0, U32_MAX, u32, false); + errors += test_op("add", cast(u64)U64_MAX, cast(u64)0, U64_MAX, u64, false); + + // Test signed sub. + errors += test_op("sub", cast( s8) S8_MIN, cast( s8)1, S8_MIN, s8, true); + errors += test_op("sub", cast(s16)S16_MIN, cast( u8)1, S16_MIN, s16, true); + errors += test_op("sub", cast(s32)S32_MIN, cast(s32)1, S32_MIN, s32, true); + errors += test_op("sub", cast(s64)S64_MIN, cast(u32)1, S64_MIN, s64, true); + + errors += test_op("sub", cast( s8)-1, cast( s8) S8_MAX, S8_MIN, s8, false); + errors += test_op("sub", cast(s16)-1, cast(s16)S16_MAX, S16_MIN, s16, false); + errors += test_op("sub", cast(s32)-1, cast(s32)S32_MAX, S32_MIN, s32, false); + errors += test_op("sub", cast(s64)-1, cast(s64)S64_MAX, S64_MIN, s64, false); + + // Test unsigned sub. + errors += test_op("sub", cast( u8)1, cast( u8) U8_MAX, 0, u8, true); + errors += test_op("sub", cast( u8)1, cast(u16)U16_MAX, 0, u16, true); + errors += test_op("sub", cast(u32)1, cast(u32)U32_MAX, 0, u32, true); + errors += test_op("sub", cast(u32)1, cast(u64)U64_MAX, 0, u64, true); + + errors += test_op("sub", cast( u8) U8_MAX, cast( u8)0, U8_MAX, u8, false); + errors += test_op("sub", cast(u16)U16_MAX, cast( u8)0, U16_MAX, u16, false); + errors += test_op("sub", cast(u32)U32_MAX, cast(u32)0, U32_MAX, u32, false); + errors += test_op("sub", cast(u64)U64_MAX, cast(u32)0, U64_MAX, u64, false); + + // Test signed mul. + errors += test_op("mul", cast( s8) S8_MIN, cast( s8)-1, S8_MAX, s8, true); + errors += test_op("mul", cast(s16)S16_MIN, cast( s8)-1, S16_MAX, s16, true); + errors += test_op("mul", cast(s32)S32_MIN, cast(s32)-1, S32_MAX, s32, true); + errors += test_op("mul", cast(s64)S64_MIN, cast(s32)-1, S64_MAX, s64, true); + + errors += test_op("mul", cast( s8) S8_MAX, cast( s8)-2, S8_MIN, s8, true); + errors += test_op("mul", cast(s16)S16_MAX, cast( s8)-2, S16_MIN, s16, true); + errors += test_op("mul", cast(s32)S32_MAX, cast(s32)-2, S32_MIN, s32, true); + errors += test_op("mul", cast(s64)S64_MAX, cast(s32)-2, S64_MIN, s64, true); + + errors += test_op("mul", cast( s8)-2, cast( s8) S8_MAX, S8_MIN, s8, true); + errors += test_op("mul", cast( s8)-2, cast(s16)S16_MAX, S16_MIN, s16, true); + errors += test_op("mul", cast(s32)-2, cast(s32)S32_MAX, S32_MIN, s32, true); + errors += test_op("mul", cast(s32)-2, cast(s64)S64_MAX, S64_MIN, s64, true); + + errors += test_op("mul", cast( s8) S8_MAX, cast( s8)2, S8_MAX, s8, true); + errors += test_op("mul", cast(s16)S16_MAX, cast( s8)2, S16_MAX, s16, true); + errors += test_op("mul", cast(s32)S32_MAX, cast(s32)2, S32_MAX, s32, true); + errors += test_op("mul", cast(s64)S64_MAX, cast(s32)2, S64_MAX, s64, true); + + errors += test_op("mul", cast( s8) S8_MAX, cast( s8)-1, -S8_MAX, s8, false); + errors += test_op("mul", cast(s16)S16_MAX, cast( s8)-1, -S16_MAX, s16, false); + errors += test_op("mul", cast(s32)S32_MAX, cast(s32)-1, -S32_MAX, s32, false); + errors += test_op("mul", cast(s64)S64_MAX, cast(s32)-1, -S64_MAX, s64, false); + + errors += test_op("mul", cast( s8) S8_MAX, cast( s8)0, 0, s8, false); + errors += test_op("mul", cast(s16)S16_MAX, cast( u8)0, 0, s16, false); + errors += test_op("mul", cast(s32)S32_MAX, cast(s32)0, 0, s32, false); + errors += test_op("mul", cast(s64)S64_MAX, cast(u32)0, 0, s64, false); + + // Test unsigned mul. + errors += test_op("mul", cast( u8) U8_MAX, cast( u8)1, U8_MAX, u8, false); + errors += test_op("mul", cast(u16)U16_MAX, cast( u8)1, U16_MAX, u16, false); + errors += test_op("mul", cast(u32)U32_MAX, cast(u32)1, U32_MAX, u32, false); + errors += test_op("mul", cast(u64)U64_MAX, cast(u32)1, U64_MAX, u64, false); + + errors += test_op("mul", cast( u8) U8_MAX, cast( u8)2, U8_MAX, u8, true); + errors += test_op("mul", cast(u16)U16_MAX, cast( u8)2, U16_MAX, u16, true); + errors += test_op("mul", cast(u32)U32_MAX, cast(u32)2, U32_MAX, u32, true); + errors += test_op("mul", cast(u64)U64_MAX, cast(u32)2, U64_MAX, u64, true); + + // Test signed div. + errors += test_op("div", cast( s8) S8_MIN, cast( s8)-1, S8_MAX, s8, true, -1); + errors += test_op("div", cast(s16)S16_MIN, cast( s8)-1, S16_MAX, s16, true, -1); + errors += test_op("div", cast(s32)S32_MIN, cast(s32)-1, S32_MAX, s32, true, -1); + errors += test_op("div", cast(s64)S64_MIN, cast(s32)-1, S64_MAX, s64, true, -1); + + errors += test_op("div", cast( s8) S8_MAX, cast( s8)-2, - S8_MAX/2, s8, false, 1); + errors += test_op("div", cast(s16)S16_MAX, cast( s8)-2, -S16_MAX/2, s16, false, 1); + errors += test_op("div", cast(s32)S32_MAX, cast(s32)-2, -S32_MAX/2, s32, false, 1); + errors += test_op("div", cast(s64)S64_MAX, cast(s32)-2, -S64_MAX/2, s64, false, 1); + + errors += test_op("div", cast( s8)15, cast( s8)5, 3, s8, false, 0); + errors += test_op("div", cast( u8)15, cast(s16)7, 2, s16, false, 1); + errors += test_op("div", cast(s16)15, cast(s32)13, 1, s32, false, 2); + errors += test_op("div", cast(u16)100, cast(s64)3, 33, s64, false, 1); + + // Test unsigned div. + errors += test_op("div", cast( u8) U8_MAX, cast( u8)2, U8_MAX/2, u8, false, 1); + errors += test_op("div", cast(u16)U16_MAX, cast( u8)2, U16_MAX/2, u16, false, 1); + errors += test_op("div", cast(u32)U32_MAX, cast(u32)2, U32_MAX/2, u32, false, 1); + errors += test_op("div", cast(u64)U64_MAX, cast(u32)2, U64_MAX/2, u64, false, 1); + + if errors > 0 print("# Found % %!\n", errors, ifx errors == 1 then "error" else "errors"); else print(" No errors found.\n"); + + + // Test generic agains branch-free alternative. + write_strings( + "#=======================#\n", + "# generic == x64 asm ? #\n" + ); + + + full_test :: ($type: Type, test: (a: type, b: type)) { + #if type == { + case u8; + min :u8 = 0; + max :u8 = U8_MAX; + + case u16; + min :u16 = 0; + max :u16 = U16_MAX; + + case s8; + min :s8 = S8_MIN; + max :s8 = S8_MAX; + + case s16; + min :s16 = S16_MIN; + max :s16 = S16_MAX; + + case; + assert(false, "This will take way too long."); + } + + #if !AVOID_INFINITE_FOR_LOOP { + for a : min..max { + for b : min..max { + test(a, b); + } + } + } + else { + a :type = min; + b :type = min; + while loop_a := true { + while loop_b := true { + test(a, b); + if b == max then break loop_b; else b += 1; + } + if a == max then break loop_a; else a += 1; + } + } + } + + partial_test :: ($type: Type, test: (a: type, b: type)) { + min, max: type; + #if type == { + case u8; + min = 0; + max = U8_MAX; + + case u16; + min = 0; + max = U16_MAX; + + case u32; + min = 0; + max = U32_MAX; + + case s8; + min = S8_MIN; + max = S8_MAX; + + case s16; + min = S16_MIN; + max = S16_MAX; + + case s32; + min = S32_MIN; + max = S32_MAX; + + case; + assert(false, "This will take way too long."); + } + + #if !AVOID_INFINITE_FOR_LOOP { + for a: min..max { + b := a; + c := max - a + min; + test(a, b); + test(a, c); + } + } + else { + a := min; + while loop := true { + b := a; + c := max - a + min; + test(a, b); + test(a, c); + if a == max then break loop; else a += 1; + } + } + } + + minimal_test :: ($type: Type, test: (a: type, b: type)) { + #if type == { + case u32; + min :u32 = 0; + mid :u32 = U32_MAX / 2; + max :u32 = U32_MAX; + range :u32 = cast(u32)U16_MAX * 2048; + + case u64; + min :u64 = 0; + mid :u64 = U64_MAX / 2; + max :u64 = U64_MAX; + range :u64 = cast(u64)U16_MAX * 2048; + + case s32; + min :s32 = S32_MIN; + mid :s32 = (S32_MIN / 2) + (S32_MAX / 2); + max :s32 = S32_MAX; + range :s32 = cast(s32)S16_MAX * 2048; + + case s64; + min :s64 = S64_MIN; + mid :s64 = (S64_MIN / 2) + (S64_MAX / 2); + max :s64 = S64_MAX; + range :s64 = cast(s64)S16_MAX * 2048; + + case; + assert(false, "Invalid type % given.", type); + } + + #if !AVOID_INFINITE_FOR_LOOP { + start, end : type; + + start = min; + end = min+range; + for a: start..end { + b := a; + c := end - a + start; + test(a, b); + test(a, c); + } + + start = mid-range; + end = mid+range; + for a: start..end { + b := a; + c := end - a + start; + test(a, b); + test(a, c); + } + + start = max-range; + end = max; + for a: start..end { + b := a; + c := end - a + start; + test(a, b); + test(a, c); + } + } + else { + start, end, a : type; + + start = min; + end = min + range; + a = start; + while loop := true { + b := a; + c := end - a + start; + test(a, b); + test(a, c); + if a == end then break loop; else a += 1; + } + + start = mid - range; + end = mid + range; + a = start; + while loop := true { + b := a; + c := end - a + start; + test(a, b); + test(a, c); + if a == end then break loop; else a += 1; + } + + start = max - range; + end = max; + a = start; + while loop := true { + b := a; + c := end - a + start; + test(a, b); + test(a, c); + if a == end then break loop; else a += 1; + } + } + } + + + // add + + ADD_TEST_TEMPLATE :: (a: $T, b: T) { + rT, sT := add(a, b, true); + rF, sF := add(a, b, false); + assert(rT == rF && sT == sF, "> add(%1, %2, true) = %3,%4 != add(%1, %2, false) = %5,%6\n", a, b, rT, sT, rF, sF); + } + + write_string("# testing add,u8 #\n"); + full_test(u8, ADD_TEST_TEMPLATE); + + write_string("# testing add,u16 #\n"); + full_test(u16, ADD_TEST_TEMPLATE); + + write_string("# testing add,u32 #\n"); + partial_test(u32, ADD_TEST_TEMPLATE); + + write_string("# testing add,u64 #\n"); + minimal_test(u64, ADD_TEST_TEMPLATE); + + write_string("# testing add,s8 #\n"); + full_test(s8, ADD_TEST_TEMPLATE); + + write_string("# testing add,s16 #\n"); + full_test(s16, ADD_TEST_TEMPLATE); + + write_string("# testing add,s32 #\n"); + partial_test(s32, ADD_TEST_TEMPLATE); + + write_string("# testing add,s64 #\n"); + minimal_test(s64, ADD_TEST_TEMPLATE); + + + // sub + + SUB_TEST_TEMPLATE :: (a: $T, b: T) { + rT, sT := sub(a, b, true); + rF, sF := sub(a, b, false); + assert(rT == rF && sT == sF, "> sub(%1, %2, true) = %3,%4 != sub(%1, %2, false) = %5,%6\n", a, b, rT, sT, rF, sF); + } + + write_string("# testing sub,u8 #\n"); + full_test(u8, SUB_TEST_TEMPLATE); + + write_string("# testing sub,u16 #\n"); + full_test(u16, SUB_TEST_TEMPLATE); + + write_string("# testing sub,u32 #\n"); + partial_test(u32, SUB_TEST_TEMPLATE); + + write_string("# testing sub,u64 #\n"); + minimal_test(u64, SUB_TEST_TEMPLATE); + + write_string("# testing sub,s8 #\n"); + full_test(s8, SUB_TEST_TEMPLATE); + + write_string("# testing sub,s16 #\n"); + full_test(s16, SUB_TEST_TEMPLATE); + + write_string("# testing sub,s32 #\n"); + partial_test(s32, SUB_TEST_TEMPLATE); + + write_string("# testing sub,s64 #\n"); + minimal_test(s64, SUB_TEST_TEMPLATE); + + + // mul + + MUL_TEST_TEMPLATE :: (a: $T, b: T) { + rT, sT := mul(a, b, true); + rF, sF := mul(a, b, false); + assert(rT == rF && sT == sF, "> mul(%1, %2, true) = %3,%4 != mul(%1, %2, false) = %5,%6\n", a, b, rT, sT, rF, sF); + } + + write_string("# testing mul,u8 #\n"); + full_test(u8, MUL_TEST_TEMPLATE); + + write_string("# testing mul,u16 #\n"); + full_test(u16, MUL_TEST_TEMPLATE); + + write_string("# testing mul,u32 #\n"); + partial_test(u32, MUL_TEST_TEMPLATE); + + write_string("# testing mul,u64 #\n"); + minimal_test(u64, MUL_TEST_TEMPLATE); + + write_string("# testing mul,s8 #\n"); + full_test(s8, MUL_TEST_TEMPLATE); + + write_string("# testing mul,s16 #\n"); + full_test(s16, MUL_TEST_TEMPLATE); + + write_string("# testing mul,s32 #\n"); + partial_test(s32, MUL_TEST_TEMPLATE); + + write_string("# testing mul,s64 #\n"); + minimal_test(s64, MUL_TEST_TEMPLATE); + + + // div + + DIV_TEST_TEMPLATE :: (a: $T, b: T) { + if b == 0 then return; + rT, remT, sT := div(a, b, true); + rF, remF, sF := div(a, b, false); + assert(rT == rF && sT == sF, "> mul(%1, %2, true) = %3,%4,%5 != mul(%1, %2, false) = %6,%7,%8\n", a, b, rT, remT, sT, rF, remF, sF); + } + + write_string("# testing div,u8 #\n"); + full_test(u8, DIV_TEST_TEMPLATE); + + write_string("# testing div,u16 #\n"); + full_test(u16, DIV_TEST_TEMPLATE); + + write_string("# testing div,u32 #\n"); + partial_test(u32, DIV_TEST_TEMPLATE); + + write_string("# testing div,u64 #\n"); + minimal_test(u64, DIV_TEST_TEMPLATE); + + write_string("# testing div,s8 #\n"); + full_test(s8, DIV_TEST_TEMPLATE); + + write_string("# testing div,s16 #\n"); + full_test(s16, DIV_TEST_TEMPLATE); + + write_string("# testing div,s32 #\n"); + partial_test(s32, DIV_TEST_TEMPLATE); + + write_string("# testing div,s64 #\n"); + minimal_test(s64, DIV_TEST_TEMPLATE); + + + write_string(" No errors found.\n"); + + + write_strings( + "#=======================#\n", + "# Benchmarks #\n" + ); + + #import "Random"; + + performance_test :: ($operation: string, $type: Type, print_result: bool = true) -> ops_per_us_gen: float, ops_per_us_asm: float { + + #if type == u8 { MIN :: 0; MAX :: U8_MAX; } + #if type == u16 { MIN :: 0; MAX :: U16_MAX; } + #if type == u32 { MIN :: 0; MAX :: U32_MAX; } + #if type == u64 { MIN :: 0; MAX :: U64_MAX; } + #if type == s8 { MIN :: S8_MIN; MAX :: S8_MAX; } + #if type == s16 { MIN :: S16_MIN; MAX :: S16_MAX; } + #if type == s32 { MIN :: S32_MIN; MAX :: S32_MAX; } + #if type == s64 { MIN :: S64_MIN; MAX :: S64_MAX; } + + NUM_TESTS :: 50000; + DATA_SIZE_BITS :: 64*1024*8; + #if type == s8 || type == u8 then + DATA_SIZE :: DATA_SIZE_BITS/8; + else #if type == s16 || type == u16 then + DATA_SIZE :: DATA_SIZE_BITS/16; + else #if type == s32 || type == u32 then + DATA_SIZE :: DATA_SIZE_BITS/32; + else #if type == s64 || type == u64 then + DATA_SIZE :: DATA_SIZE_BITS/64; + + best_gen := 0.0; + best_asm := 0.0; + numbers_x: [..] type; + numbers_y: [..] type; + array_reserve(*numbers_x, DATA_SIZE); + array_reserve(*numbers_y, DATA_SIZE); + + // Comment the line bellow to use the same "random" values. + random_seed(cast(u64)to_nanoseconds(current_time_monotonic())); + + for 0..DATA_SIZE-1 { + x := cast(type) random_get_within_range(xx MIN, xx MAX); + y := cast(type) random_get_within_range(xx MIN, xx MAX); + if y == 0 && operation == "div" { + y = 1; + } + array_add(*numbers_x, x); + array_add(*numbers_y, y); + } + + for 0..NUM_TESTS-1 { + + r_gen: type = 0; + r_asm: type = 0; + + time_gen := current_time_monotonic(); + for idx: 0..DATA_SIZE-1 #insert #run replace("r_gen ^= OP(numbers_x[idx], numbers_y[idx], false);", "OP", operation); + time_gen = current_time_monotonic() - time_gen; + + time_asm := current_time_monotonic(); + for idx: 0..DATA_SIZE-1 #insert #run replace("r_asm ^= OP(numbers_x[idx], numbers_y[idx], true);", "OP", operation); + time_asm = current_time_monotonic() - time_asm; + + assert(r_gen == r_asm); + + perf_gen := cast(float)DATA_SIZE/cast(float)to_nanoseconds(time_gen); + perf_asm := cast(float)DATA_SIZE/cast(float)to_nanoseconds(time_asm); + best_gen = max(best_gen, perf_gen); + best_asm = max(best_asm, perf_asm); + } + + tmp_context := context; + push_context tmp_context { + ff := *context.print_style.default_format_float; + ff.zero_removal = .NO; + ff.width = 7; + ff.trailing_width = 2; + + fi := *context.print_style.default_format_int; + fi.minimum_digits = 3; + + if print_result { + if type == s8 || type == u8 write_string(" "); + print("% | % | % | %\n", type, best_gen, best_asm, cast(int)(100*best_asm/best_gen)); + } + } + return best_gen, best_asm; + } + + write_strings( + " | (ops / nsec) |\n", + " T | generic | x64 asm | %\n" + ); + + write_strings( + "--- | ----------------- | ---\n", + " | add |\n" + ); + performance_test("add", u8); + performance_test("add", u16); + performance_test("add", u32); + performance_test("add", u64); + performance_test("add", s8); + performance_test("add", s16); + performance_test("add", s32); + performance_test("add", s64); + + write_strings( + "--- | ----------------- | ---\n", + " | sub |\n" + ); + performance_test("sub", u8); + performance_test("sub", u16); + performance_test("sub", u32); + performance_test("sub", u64); + performance_test("sub", s8); + performance_test("sub", s16); + performance_test("sub", s32); + performance_test("sub", s64); + + write_strings( + "--- | ----------------- | ---\n", + " | mul |\n" + ); + performance_test("mul", u8); + performance_test("mul", u16); + performance_test("mul", u32); + performance_test("mul", u64); + performance_test("mul", s8); + performance_test("mul", s16); + performance_test("mul", s32); + performance_test("mul", s64); + + write_strings( + "--- | ----------------- | ---\n", + " | div |\n" + ); + performance_test("div", u8); + performance_test("div", u16); + performance_test("div", u32); + performance_test("div", u64); + performance_test("div", s8); + performance_test("div", s16); + performance_test("div", s32); + performance_test("div", s64); +} |
