// Tests for integer saturating arighmetic (with branch-free procedures on x64). // Expects signed values in two's complement. #import "Basic"; #import "Compiler"; #import "Math"; #load "Math_Ext.jai"; main :: () { write_strings( "#========================#\n", "# Unit tests #\n" ); test_op :: (operation: string, x: $Tx, y: $Ty, result: $Tr, type: Type, saturated: bool, remainder: Tr = 0) -> errors_found: int #expand { print_test_call :: (operation: string) -> string { #import "String"; str: string = ---; if operation != "div" { TEST_CALL :: #string DONE t_result, t_saturated := OP(cast(Tx)x, cast(Ty)y); if result != t_result print("%_%(%, %) = %0%0\n", operation, type, x, y, result, ifx saturated then " : saturated"); DONE str = replace(TEST_CALL, "OP", operation); } else { TEST_CALL :: #string DONE t_result, t_remainder, t_saturated := OP(cast(Tx)x, cast(Ty)y); if result != t_result print("%_%(%, %) = % + %0%0\n", operation, type, x, y, result, remainder, ifx saturated then " : saturated"); DONE str = replace(TEST_CALL, "OP", operation); } return str; } #insert #run print_test_call(operation); errors := 0; if result != t_result { errors += 1; print(" > incorrect result value: got % expected %\n", t_result, result); }; if type != type_of(t_result) { errors += 1; print(" > incorrect result type: got % expected %\n", type_of(t_result), type); }; if saturated != t_saturated { errors += 1; print(" > incorrect saturated flag: got % expected %\n", t_saturated, saturated); }; #if operation == "div" { if remainder != t_remainder { errors += 1; print(" > incorrect remainder value: got % expected %\n", t_remainder, remainder); }; } return errors; } errors := 0; // Test signed add. errors += test_op("add", cast( s8) S8_MAX, cast( s8)1, S8_MAX, s8, true); errors += test_op("add", cast(s16)S16_MAX, cast( u8)1, S16_MAX, s16, true); errors += test_op("add", cast(s32)S32_MAX, cast(s32)1, S32_MAX, s32, true); errors += test_op("add", cast(s64)S64_MAX, cast(u32)1, S64_MAX, s64, true); errors += test_op("add", cast( s8) S8_MAX, cast( s8) S8_MIN, -1, s8, false); errors += test_op("add", cast(s16)S16_MAX, cast(s16)S16_MIN, -1, s16, false); errors += test_op("add", cast(s32)S32_MAX, cast(s32)S32_MIN, -1, s32, false); errors += test_op("add", cast(s64)S64_MAX, cast(s64)S64_MIN, -1, s64, false); // Test unsigned add. errors += test_op("add", cast( u8) U8_MAX, cast( u8)1, U8_MAX, u8, true); errors += test_op("add", cast(u16)U16_MAX, cast(u16)1, U16_MAX, u16, true); errors += test_op("add", cast(u32)U32_MAX, cast(u32)1, U32_MAX, u32, true); errors += test_op("add", cast(u64)U64_MAX, cast(u64)1, U64_MAX, u64, true); errors += test_op("add", cast( u8) U8_MAX, cast( u8)0, U8_MAX, u8, false); errors += test_op("add", cast(u16)U16_MAX, cast(u16)0, U16_MAX, u16, false); errors += test_op("add", cast(u32)U32_MAX, cast(u32)0, U32_MAX, u32, false); errors += test_op("add", cast(u64)U64_MAX, cast(u64)0, U64_MAX, u64, false); // Test signed sub. errors += test_op("sub", cast( s8) S8_MIN, cast( s8)1, S8_MIN, s8, true); errors += test_op("sub", cast(s16)S16_MIN, cast( u8)1, S16_MIN, s16, true); errors += test_op("sub", cast(s32)S32_MIN, cast(s32)1, S32_MIN, s32, true); errors += test_op("sub", cast(s64)S64_MIN, cast(u32)1, S64_MIN, s64, true); errors += test_op("sub", cast( s8)-1, cast( s8) S8_MAX, S8_MIN, s8, false); errors += test_op("sub", cast(s16)-1, cast(s16)S16_MAX, S16_MIN, s16, false); errors += test_op("sub", cast(s32)-1, cast(s32)S32_MAX, S32_MIN, s32, false); errors += test_op("sub", cast(s64)-1, cast(s64)S64_MAX, S64_MIN, s64, false); // Test unsigned sub. errors += test_op("sub", cast( u8)1, cast( u8) U8_MAX, 0, u8, true); errors += test_op("sub", cast( u8)1, cast(u16)U16_MAX, 0, u16, true); errors += test_op("sub", cast(u32)1, cast(u32)U32_MAX, 0, u32, true); errors += test_op("sub", cast(u32)1, cast(u64)U64_MAX, 0, u64, true); errors += test_op("sub", cast( u8) U8_MAX, cast( u8)0, U8_MAX, u8, false); errors += test_op("sub", cast(u16)U16_MAX, cast( u8)0, U16_MAX, u16, false); errors += test_op("sub", cast(u32)U32_MAX, cast(u32)0, U32_MAX, u32, false); errors += test_op("sub", cast(u64)U64_MAX, cast(u32)0, U64_MAX, u64, false); // Test signed mul. errors += test_op("mul", cast( s8) S8_MIN, cast( s8)-1, S8_MAX, s8, true); errors += test_op("mul", cast(s16)S16_MIN, cast( s8)-1, S16_MAX, s16, true); errors += test_op("mul", cast(s32)S32_MIN, cast(s32)-1, S32_MAX, s32, true); errors += test_op("mul", cast(s64)S64_MIN, cast(s32)-1, S64_MAX, s64, true); errors += test_op("mul", cast( s8) S8_MAX, cast( s8)-2, S8_MIN, s8, true); errors += test_op("mul", cast(s16)S16_MAX, cast( s8)-2, S16_MIN, s16, true); errors += test_op("mul", cast(s32)S32_MAX, cast(s32)-2, S32_MIN, s32, true); errors += test_op("mul", cast(s64)S64_MAX, cast(s32)-2, S64_MIN, s64, true); errors += test_op("mul", cast( s8)-2, cast( s8) S8_MAX, S8_MIN, s8, true); errors += test_op("mul", cast( s8)-2, cast(s16)S16_MAX, S16_MIN, s16, true); errors += test_op("mul", cast(s32)-2, cast(s32)S32_MAX, S32_MIN, s32, true); errors += test_op("mul", cast(s32)-2, cast(s64)S64_MAX, S64_MIN, s64, true); errors += test_op("mul", cast( s8) S8_MAX, cast( s8)2, S8_MAX, s8, true); errors += test_op("mul", cast(s16)S16_MAX, cast( s8)2, S16_MAX, s16, true); errors += test_op("mul", cast(s32)S32_MAX, cast(s32)2, S32_MAX, s32, true); errors += test_op("mul", cast(s64)S64_MAX, cast(s32)2, S64_MAX, s64, true); errors += test_op("mul", cast( s8) S8_MAX, cast( s8)-1, -S8_MAX, s8, false); errors += test_op("mul", cast(s16)S16_MAX, cast( s8)-1, -S16_MAX, s16, false); errors += test_op("mul", cast(s32)S32_MAX, cast(s32)-1, -S32_MAX, s32, false); errors += test_op("mul", cast(s64)S64_MAX, cast(s32)-1, -S64_MAX, s64, false); errors += test_op("mul", cast( s8) S8_MAX, cast( s8)0, 0, s8, false); errors += test_op("mul", cast(s16)S16_MAX, cast( u8)0, 0, s16, false); errors += test_op("mul", cast(s32)S32_MAX, cast(s32)0, 0, s32, false); errors += test_op("mul", cast(s64)S64_MAX, cast(u32)0, 0, s64, false); // Test unsigned mul. errors += test_op("mul", cast( u8) U8_MAX, cast( u8)1, U8_MAX, u8, false); errors += test_op("mul", cast(u16)U16_MAX, cast( u8)1, U16_MAX, u16, false); errors += test_op("mul", cast(u32)U32_MAX, cast(u32)1, U32_MAX, u32, false); errors += test_op("mul", cast(u64)U64_MAX, cast(u32)1, U64_MAX, u64, false); errors += test_op("mul", cast( u8) U8_MAX, cast( u8)2, U8_MAX, u8, true); errors += test_op("mul", cast(u16)U16_MAX, cast( u8)2, U16_MAX, u16, true); errors += test_op("mul", cast(u32)U32_MAX, cast(u32)2, U32_MAX, u32, true); errors += test_op("mul", cast(u64)U64_MAX, cast(u32)2, U64_MAX, u64, true); // Test signed div. errors += test_op("div", cast( s8) S8_MIN, cast( s8)-1, S8_MAX, s8, true, -1); errors += test_op("div", cast(s16)S16_MIN, cast( s8)-1, S16_MAX, s16, true, -1); errors += test_op("div", cast(s32)S32_MIN, cast(s32)-1, S32_MAX, s32, true, -1); errors += test_op("div", cast(s64)S64_MIN, cast(s32)-1, S64_MAX, s64, true, -1); errors += test_op("div", cast( s8) S8_MAX, cast( s8)-2, - S8_MAX/2, s8, false, 1); errors += test_op("div", cast(s16)S16_MAX, cast( s8)-2, -S16_MAX/2, s16, false, 1); errors += test_op("div", cast(s32)S32_MAX, cast(s32)-2, -S32_MAX/2, s32, false, 1); errors += test_op("div", cast(s64)S64_MAX, cast(s32)-2, -S64_MAX/2, s64, false, 1); errors += test_op("div", cast( s8)15, cast( s8)5, 3, s8, false, 0); errors += test_op("div", cast( u8)15, cast(s16)7, 2, s16, false, 1); errors += test_op("div", cast(s16)15, cast(s32)13, 1, s32, false, 2); errors += test_op("div", cast(u16)100, cast(s64)3, 33, s64, false, 1); // Test unsigned div. errors += test_op("div", cast( u8) U8_MAX, cast( u8)2, U8_MAX/2, u8, false, 1); errors += test_op("div", cast(u16)U16_MAX, cast( u8)2, U16_MAX/2, u16, false, 1); errors += test_op("div", cast(u32)U32_MAX, cast(u32)2, U32_MAX/2, u32, false, 1); errors += test_op("div", cast(u64)U64_MAX, cast(u32)2, U64_MAX/2, u64, false, 1); if errors > 0 print("# Found % %!\n", errors, ifx errors == 1 then "error" else "errors"); else print(" No errors found.\n"); write_strings( "#========================#\n", "# Benchmarks #\n" ); #import "Random"; performance_test :: ($operation: string, $type: Type, print_result: bool = true) -> ops_per_us_gen: float, ops_per_us_asm: float { NUM_TESTS :: 500; SIZE_DATA :: 12000; // Keep it below cache size. best_gen := 0.0; best_asm := 0.0; numbers_x: [..] type; numbers_y: [..] type; numbers_zgen: [SIZE_DATA] type; numbers_zasm: [SIZE_DATA] type; array_reserve(*numbers_x, SIZE_DATA); array_reserve(*numbers_y, SIZE_DATA); #if type == u8 { MIN :: 0; MAX :: U8_MAX; } #if type == u16 { MIN :: 0; MAX :: U16_MAX; } #if type == u32 { MIN :: 0; MAX :: U32_MAX; } #if type == u64 { MIN :: 0; MAX :: U64_MAX; } #if type == s8 { MIN :: S8_MIN; MAX :: S8_MAX; } #if type == s16 { MIN :: S16_MIN; MAX :: S16_MAX; } #if type == s32 { MIN :: S32_MIN; MAX :: S32_MAX; } #if type == s64 { MIN :: S64_MIN; MAX :: S64_MAX; } for 0..SIZE_DATA-1 { x := cast(type) random_get_within_range(xx MIN, xx MAX); y := cast(type) random_get_within_range(xx MIN, xx MAX); if y == 0 && operation == "div" { y = 1; } array_add(*numbers_x, x); array_add(*numbers_y, y); } for 0..NUM_TESTS-1 { time_gen := current_time_monotonic(); for 0..SIZE_DATA-1 #insert #run replace("numbers_zgen[it] = OP(numbers_x[it], numbers_y[it], true);", "OP", operation); time_gen = current_time_monotonic() - time_gen; time_asm := current_time_monotonic(); for 0..SIZE_DATA-1 #insert #run replace("numbers_zasm[it] = OP(numbers_x[it], numbers_y[it]);", "OP", operation); time_asm = current_time_monotonic() - time_asm; for 0..SIZE_DATA-1 assert(numbers_zgen[it] == numbers_zasm[it]); perf_gen := cast(float)SIZE_DATA/cast(float)to_microseconds(time_gen); perf_asm := cast(float)SIZE_DATA/cast(float)to_microseconds(time_asm); best_gen = max(best_gen, perf_gen); best_asm = max(best_asm, perf_asm); } if print_result { if type == s8 || type == u8 write_string(" "); print("% | % | % |\n", type, best_gen, best_asm); } return best_gen, best_asm; } ff := context.print_style.default_format_float; ff.zero_removal = .NO; ff.width = 7; ff.trailing_width = 1; context.print_style.default_format_float = ff; write_strings( " # (ops / usec) #\n", " | generic | x64 asm |\n" ); write_strings( "--- | ----------------- |\n", " | add |\n" ); performance_test("add", u8); performance_test("add", u16); performance_test("add", u32); performance_test("add", u64); performance_test("add", s8); performance_test("add", s16); performance_test("add", s32); performance_test("add", s64); write_strings( "--- | ----------------- |\n", " | sub |\n" ); performance_test("sub", u8); performance_test("sub", u16); performance_test("sub", u32); performance_test("sub", u64); performance_test("sub", s8); performance_test("sub", s16); performance_test("sub", s32); performance_test("sub", s64); write_strings( "--- | ----------------- |\n", " | mul |\n" ); performance_test("mul", u8); performance_test("mul", u16); performance_test("mul", u32); performance_test("mul", u64); performance_test("mul", s8); performance_test("mul", s16); performance_test("mul", s32); performance_test("mul", s64); write_strings( "--- | ----------------- |\n", " | div |\n" ); performance_test("div", u8); performance_test("div", u16); performance_test("div", u32); performance_test("div", u64); performance_test("div", s8); performance_test("div", s16); performance_test("div", s32); performance_test("div", s64); }