diff --git a/problems/utf-8-validation.cpp b/problems/utf-8-validation.cpp new file mode 100644 index 0000000..6097c91 --- /dev/null +++ b/problems/utf-8-validation.cpp @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace { +class todo : public std::exception { + std::string cause; + +public: + todo() + : cause("Not yet implemented!") + { + } + + todo(std::string&& excuse) + : cause("Not yet implemented: " + excuse) + { + } + + virtual const char* what() const throw() + { + return cause.c_str(); + } +}; + +auto expected_size(int first) -> std::optional { + static constexpr std::array HEADERS = { + 0, 6, 14, 30 + }; + + for (auto i = 0; i < 4; i++) { + auto mask_length = 1 + i + (i > 0); + auto mask = (1 << (mask_length)) - 1; + + if (((first >> (8 - mask_length)) & mask) == HEADERS[i]) { + return {i + 1}; + } + } + + return {}; +} + +} + +class Solution { + static constexpr int CONTINUATION_BYTE = 2; + +public: + auto validUtf8(const std::vector& data) -> bool + { + for (auto i = 0; i < data.size();) { + auto expected_length = expected_size(data[i]); + if (!expected_length.has_value()) { + // std::cout << "corrupted first byte\n"; + return false; + } + + if (i + *expected_length > data.size()) { + // std::cout << "unexpected length of size " << data.size() << " ≠ " << *expected_length << "\n"; + return false; + } + + // check first byte + if (data[i] >= (1 << 8)) { + // std::cout << "incorrect leading byte" << "\n"; + return false; + } + + // check remaining bytes + for (auto j = 1; j < expected_length; j++) { + if ((data[i + j] >> 6) != CONTINUATION_BYTE) { + // std::cout << "invalid continuation byte" << "\n"; + return false; + } + } + + i += *expected_length; + } + + return true; + } +}; + +#pragma region tests + +#include + +TEST(examples, valid) +{ + Solution s; + ASSERT_TRUE(s.validUtf8(std::vector { 197, 130, 1 })); +} + +TEST(examples, invalid) +{ + Solution s; + ASSERT_FALSE(s.validUtf8(std::vector { 235, 140, 4 })); +} + +TEST(valid, ascii_byte) +{ + Solution s; + ASSERT_TRUE(s.validUtf8(std::vector { 64 })); +} + +TEST(invalid, just_one_byte) { + Solution s; + ASSERT_FALSE(s.validUtf8(std::vector {2 << 7})); +} + +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +#pragma endregion /* tests */