2022-09-14 10:31:37 +02:00
|
|
|
#include <array>
|
|
|
|
#include <exception>
|
|
|
|
#include <iostream>
|
|
|
|
#include <optional>
|
|
|
|
#include <sstream>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
class todo : public std::exception {
|
|
|
|
std::string cause;
|
|
|
|
|
2024-01-03 12:06:42 +01:00
|
|
|
public:
|
|
|
|
todo() : cause("Not yet implemented!") {}
|
2022-09-14 10:31:37 +02:00
|
|
|
|
2024-01-03 12:06:42 +01:00
|
|
|
todo(std::string &&excuse) : cause("Not yet implemented: " + excuse) {}
|
2022-09-14 10:31:37 +02:00
|
|
|
|
2024-01-03 12:06:42 +01:00
|
|
|
virtual const char *what() const throw() { return cause.c_str(); }
|
2022-09-14 10:31:37 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
auto expected_size(int first) -> std::optional<std::size_t> {
|
2024-01-03 12:06:42 +01:00
|
|
|
static constexpr std::array<int, 4> HEADERS = {0, 6, 14, 30};
|
2022-09-14 10:31:37 +02:00
|
|
|
|
|
|
|
for (auto i = 0; i < 4; i++) {
|
|
|
|
auto mask_length = 1 + i + (i > 0);
|
|
|
|
auto mask = (1 << (mask_length)) - 1;
|
|
|
|
|
|
|
|
if (((first >> (8 - mask_length)) & mask) == HEADERS[i]) {
|
|
|
|
return {i + 1};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2024-01-03 12:06:42 +01:00
|
|
|
} // namespace
|
2022-09-14 10:31:37 +02:00
|
|
|
|
|
|
|
class Solution {
|
|
|
|
static constexpr int CONTINUATION_BYTE = 2;
|
|
|
|
|
2024-01-03 12:06:42 +01:00
|
|
|
public:
|
|
|
|
auto validUtf8(const std::vector<int> &data) -> bool {
|
2022-09-14 10:31:37 +02:00
|
|
|
for (auto i = 0; i < data.size();) {
|
|
|
|
auto expected_length = expected_size(data[i]);
|
|
|
|
if (!expected_length.has_value()) {
|
|
|
|
// std::cout << "corrupted first byte\n";
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i + *expected_length > data.size()) {
|
2024-01-03 12:06:42 +01:00
|
|
|
// std::cout << "unexpected length of size " << data.size() << "
|
|
|
|
// ≠ " << *expected_length << "\n";
|
2022-09-14 10:31:37 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// check first byte
|
|
|
|
if (data[i] >= (1 << 8)) {
|
|
|
|
// std::cout << "incorrect leading byte" << "\n";
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// check remaining bytes
|
|
|
|
for (auto j = 1; j < expected_length; j++) {
|
|
|
|
if ((data[i + j] >> 6) != CONTINUATION_BYTE) {
|
|
|
|
// std::cout << "invalid continuation byte" << "\n";
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
i += *expected_length;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
#pragma region tests
|
|
|
|
|
|
|
|
#include <gtest/gtest.h>
|
|
|
|
|
2024-01-03 12:06:42 +01:00
|
|
|
TEST(examples, valid) {
|
2022-09-14 10:31:37 +02:00
|
|
|
Solution s;
|
2024-01-03 12:06:42 +01:00
|
|
|
ASSERT_TRUE(s.validUtf8(std::vector{197, 130, 1}));
|
2022-09-14 10:31:37 +02:00
|
|
|
}
|
|
|
|
|
2024-01-03 12:06:42 +01:00
|
|
|
TEST(examples, invalid) {
|
2022-09-14 10:31:37 +02:00
|
|
|
Solution s;
|
2024-01-03 12:06:42 +01:00
|
|
|
ASSERT_FALSE(s.validUtf8(std::vector{235, 140, 4}));
|
2022-09-14 10:31:37 +02:00
|
|
|
}
|
|
|
|
|
2024-01-03 12:06:42 +01:00
|
|
|
TEST(valid, ascii_byte) {
|
2022-09-14 10:31:37 +02:00
|
|
|
Solution s;
|
2024-01-03 12:06:42 +01:00
|
|
|
ASSERT_TRUE(s.validUtf8(std::vector{64}));
|
2022-09-14 10:31:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(invalid, just_one_byte) {
|
|
|
|
Solution s;
|
2024-01-03 12:06:42 +01:00
|
|
|
ASSERT_FALSE(s.validUtf8(std::vector{2 << 7}));
|
2022-09-14 10:31:37 +02:00
|
|
|
}
|
|
|
|
|
2024-01-03 12:06:42 +01:00
|
|
|
int main(int argc, char **argv) {
|
2022-09-14 10:31:37 +02:00
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|
|
|
|
|
|
|
|
#pragma endregion /* tests */
|