diff options
Diffstat (limited to 'modules/UTF8.jai')
| -rw-r--r-- | modules/UTF8.jai | 48 |
1 files changed, 47 insertions, 1 deletions
diff --git a/modules/UTF8.jai b/modules/UTF8.jai index fac1326..eba4585 100644 --- a/modules/UTF8.jai +++ b/modules/UTF8.jai @@ -1,4 +1,5 @@ // BBBB BBBB & 1100 0000 == 10XX XXXX -> is continuation byte +// TODO Maybe rename to: is_continuation_byte is_utf8_continuation_byte :: inline (byte: u8) -> bool { return (byte & 0xC0) == 0x80; } @@ -6,6 +7,7 @@ is_utf8_continuation_byte :: inline (byte: u8) -> bool { // BBBB BBBB & 1110 0000 == 110X XXXX -> 1 initial + 1 continuation byte // BBBB BBBB & 1111 0000 == 1110 XXXX -> 1 initial + 2 continuation byte // BBBB BBBB & 1111 1000 == 1111 0XXX -> 1 initial + 3 continuation byte +// TODO Maybe rename to: count_character_bytes count_utf8_bytes :: inline (byte: u8) -> int { if (byte & 0xE0) == 0xC0 return 1+1; if (byte & 0xF0) == 0xE0 return 1+2; @@ -16,6 +18,7 @@ count_utf8_bytes :: inline (byte: u8) -> int { // Truncates the string to the length provided or shorter, in case of UTF8 strings that require so. // Truncation is done by zeroing the tail of the string in place. // Returns length of truncated string. +// TODO Maybe rename to: truncate truncate_string :: (str: string, length: int) -> length: int { if str.data == null then return -1; @@ -26,7 +29,6 @@ truncate_string :: (str: string, length: int) -> length: int { // Find index of first continuation byte. idx := length; - // while (idx > 0 && ((data[idx - 1] & 0xC0) == 0x80)) { TODO REMOVE AFTER TESTING while (idx > 0 && is_utf8_continuation_byte(data[idx - 1])) { idx -= 1; } @@ -48,10 +50,12 @@ truncate_string :: (str: string, length: int) -> length: int { } memset(data + length, 0, count - length); + // str.count = length; TODO We should be doing this... return length; } // Returns true when the string is empty or consists of space characters. +// TODO Maybe rename to: is_empty is_empty_string :: (str: string) -> bool { for 0..str.count-1 { if str[it] == { @@ -69,3 +73,45 @@ is_empty_string :: (str: string) -> bool { } return true; } + +// Counts number of characters in string. +count_characters :: (str: string) -> int { + characters := 0; + idx := 0; + while idx < str.count { + idx += count_utf8_bytes(str[idx]); + characters += 1; + } + return characters; +} + +// Delete character. +delete_character :: (str: *string, character_idx: int) { + buffer_idx := map_character_to_buffer_idx(str.*, character_idx); + bytes_to_delete := count_utf8_bytes(str.data[buffer_idx]); + + for buffer_idx..str.count-1-bytes_to_delete { + str.data[it] = str.data[it+bytes_to_delete]; + } + for str.count-bytes_to_delete..str.count-1 { + str.data[it] = 0; + } + + str.count -= bytes_to_delete; +} + +// Get character index. +// TODO Maybe rename to: map_character_to_byte_idx or get_character_byte_idx +map_character_to_buffer_idx :: (str: string, character_idx: int) -> buffer_idx: int, success: bool { + if character_idx < 0 then return -1, false; + if character_idx > str.count then return -2, false; + if character_idx == 0 then return 0, true; + + buff_idx := 0; + char_idx := 0; + while buff_idx < str.count && char_idx != character_idx { + buff_idx += count_utf8_bytes(str[buff_idx]); + char_idx += 1; + } + return buff_idx, char_idx == character_idx; +} |
