diff options
| -rw-r--r-- | ttt.jai | 53 |
1 files changed, 30 insertions, 23 deletions
@@ -177,35 +177,42 @@ is_equal_to_any :: (to_compare :string, test_a :string, test_b :string) -> bool // The string should have capacity for at least length + 1. // The terminating null byte ('\0') is not included in length. // Returns the truncated string length. -truncate_string_utf8 :: (str: string, length: s64) -> length: s64 { + +Text_Encoding :: enum u8 #specified { + ASCII :: 1; + UTF8 :: 2; +} + +// WIP TODO Ues compiler time code to see the auto bake being used... just for fun, once! :D +truncate_string :: (str: string, length: s64, $encoding: Text_Encoding = .UTF8) -> length: s64 #no_abc { // TODO Should I use #no_abc ? assert(str.data != null); assert(str.count >= length); data := str.data; count := str.count; - // WIP simplify this if/else and try to use only idx or length - - // Find index of first continuation byte. - idx := length; - while (idx > 0 && ((data[idx - 1] & 0xC0) == 0x80)) { - idx -= 1; - } - continuation_bytes := length - idx; + #if encoding == .UTF8 { // WIP simplify this if/else and try to use only idx or length + // Find index of first continuation byte. + idx := length; + while (idx > 0 && ((data[idx - 1] & 0xC0) == 0x80)) { + idx -= 1; + } + continuation_bytes := length - idx; - // If string starts with continuation bytes, it's an invalid UTF8 string. - if (idx == 0 && continuation_bytes > 0) { - length = 0; - } - // If length truncates some continuation bytes, remove incomplete UTF8 character. - else if (idx > 0 // string is not empty - // continuation bytes are not complete - && !(continuation_bytes == 0 && (data[idx - 1] & 0x80) == 0x00) - && !(continuation_bytes == 1 && (data[idx - 1] & 0xE0) == 0xC0) - && !(continuation_bytes == 2 && (data[idx - 1] & 0xF0) == 0xE0) - && !(continuation_bytes == 3 && (data[idx - 1] & 0xF8) == 0xF0) - ) { - length -= (continuation_bytes + 1); // Remove '+ 1' start byte. + // If string starts with continuation bytes, it's an invalid UTF8 string. + if (idx == 0 && continuation_bytes > 0) { + length = 0; + } + // If length truncates some continuation bytes, remove incomplete UTF8 character. + else if (idx > 0 // string is not empty + // continuation bytes are not complete + && !(continuation_bytes == 0 && (data[idx - 1] & 0x80) == 0x00) + && !(continuation_bytes == 1 && (data[idx - 1] & 0xE0) == 0xC0) + && !(continuation_bytes == 2 && (data[idx - 1] & 0xF0) == 0xE0) + && !(continuation_bytes == 3 && (data[idx - 1] & 0xF8) == 0xF0) + ) { + length -= (continuation_bytes + 1); // Remove '+ 1' start byte. + } } memset(data + length, 0, count - length); @@ -1300,7 +1307,7 @@ main :: () { print("--- --- ---\n"); xpto: string = copy_string("ç€dam"); memcpy(vb.data, xpto.data, xpto.count); - truncate_string_utf8(xx vb, 5); + truncate_string(xx vb, 4); // memset(xpto.data, 0, 5); print(">'%'\n", xpto); print(">'%'\n", cast(string)vb); |
