Lukhnos: Optimized KeyValueBlobReader.

This commit is contained in:
ShikiSuen 2022-01-23 23:15:29 +08:00
parent a713cb324c
commit 8b9e5ca57e
2 changed files with 144 additions and 130 deletions

View File

@ -10,116 +10,118 @@
namespace vChewing { namespace vChewing {
KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out) { KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out)
static auto new_line = [](char c) { return c == '\n' || c == '\r'; }; {
static auto blank = [](char c) { return c == ' ' || c == '\t'; }; static auto new_line = [](char c) { return c == '\n' || c == '\r'; };
static auto blank_or_newline = [](char c) { return blank(c) || new_line(c); }; static auto blank = [](char c) { return c == ' ' || c == '\t'; };
static auto content_char = [](char c) { static auto blank_or_newline
return !blank(c) && !new_line(c); = [](char c) { return blank(c) || new_line(c); };
}; static auto content_char = [](char c) { return !blank(c) && !new_line(c); };
if (state_ == State::ERROR) { if (state_ == State::ERROR) {
return state_;
}
const char* key_begin = nullptr;
size_t key_length = 0;
const char* value_begin = nullptr;
size_t value_length = 0;
while (true) {
state_ = SkipUntilNot(blank_or_newline);
if (state_ != State::CAN_CONTINUE) {
return state_;
}
// Check if it's a comment line; if so, read until end of line.
if (*current_ != '#') {
break;
}
state_ = SkipUntil(new_line);
if (state_ != State::CAN_CONTINUE) {
return state_;
}
}
// No need to check whether* current_ is a content_char, since content_char
// is defined as not blank and not new_line.
key_begin = current_;
state_ = SkipUntilNot(content_char);
if (state_ != State::CAN_CONTINUE) {
goto error;
}
key_length = current_ - key_begin;
// There should be at least one blank character after the key string.
if (!blank(*current_)) {
goto error;
}
state_ = SkipUntilNot(blank);
if (state_ != State::CAN_CONTINUE) {
goto error;
}
if (!content_char(*current_)) {
goto error;
}
value_begin = current_;
// value must only contain content characters, blanks not are allowed.
// also, there's no need to check the state after this, since we will always
// emit the value. This also avoids the situation where trailing spaces in a
// line would become part of the value.
SkipUntilNot(content_char);
value_length = current_ - value_begin;
// Unconditionally skip until the end of the line. This prevents the case
// like "foo bar baz\n" where baz should not be treated as the Next key.
SkipUntil(new_line);
if (out != nullptr) {
*out = KeyValue { std::string_view { key_begin, key_length },
std::string_view { value_begin, value_length } };
}
state_ = State::HAS_PAIR;
return state_; return state_;
}
const char* key_begin = nullptr;
size_t key_length = 0;
const char* value_begin = nullptr;
size_t value_length = 0;
while (true) {
state_ = SkipUntilNot(blank_or_newline);
if (state_ != State::CAN_CONTINUE) {
return state_;
}
// Check if it's a comment line; if so, read until end of line.
if (*current_ != '#') {
break;
}
state_ = SkipUntil(new_line);
if (state_ != State::CAN_CONTINUE) {
return state_;
}
}
// No need to check whether* current_ is a content_char, since content_char
// is defined as not blank and not new_line.
key_begin = current_;
state_ = SkipUntilNot(content_char);
if (state_ != State::CAN_CONTINUE) {
goto error;
}
key_length = current_ - key_begin;
// There should be at least one blank character after the key string.
if (!blank(*current_)) {
goto error;
}
state_ = SkipUntilNot(blank);
if (state_ != State::CAN_CONTINUE) {
goto error;
}
if (!content_char(*current_)) {
goto error;
}
value_begin = current_;
// value must only contain content characters, blanks not are allowed.
// also, there's no need to check the state after this, since we will always
// emit the value. This also avoids the situation where trailing spaces in a
// line would become part of the value.
SkipUntilNot(content_char);
value_length = current_ - value_begin;
// Unconditionally skip until the end of the line. This prevents the case
// like "foo bar baz\n" where baz should not be treated as the Next key.
SkipUntil(new_line);
if (out != nullptr) {
*out = KeyValue{
std::string_view{key_begin, key_length},
std::string_view{value_begin, value_length}};
}
state_ = State::HAS_PAIR;
return state_;
error: error:
state_ = State::ERROR; state_ = State::ERROR;
return State::ERROR; return state_;
} }
KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot( KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(
const std::function<bool(char)>& f) { const std::function<bool(char)>& f)
while (current_ != end_ &&* current_) { {
if (!f(*current_)) { while (current_ != end_ && *current_) {
return State::CAN_CONTINUE; if (!f(*current_)) {
return State::CAN_CONTINUE;
}
++current_;
} }
++current_;
}
return State::END; return State::END;
} }
KeyValueBlobReader::State KeyValueBlobReader::SkipUntil( KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(
const std::function<bool(char)>& f) { const std::function<bool(char)>& f)
while (current_ != end_ &&* current_) { {
if (f(*current_)) { while (current_ != end_ && *current_) {
return State::CAN_CONTINUE; if (f(*current_)) {
return State::CAN_CONTINUE;
}
++current_;
} }
++current_;
}
return State::END; return State::END;
} }
std::ostream& operator<<(std::ostream& os, std::ostream& operator<<(
const KeyValueBlobReader::KeyValue& kv) { std::ostream& os, const KeyValueBlobReader::KeyValue& kv)
os << "(key: " << kv.key << ", value: " << kv.value << ")"; {
return os; os << "(key: " << kv.key << ", value: " << kv.value << ")";
return os;
} }
} // namespace vChewing } // namespace vChewing

View File

@ -31,49 +31,61 @@
namespace vChewing { namespace vChewing {
class KeyValueBlobReader { class KeyValueBlobReader {
public: public:
enum class State : int { enum class State : int {
// There are no more key-value pairs in this blob. // There are no more key-value pairs in this blob.
END = 0, END = 0,
// The reader has produced a new key-value pair. // The reader has produced a new key-value pair.
HAS_PAIR = 1, HAS_PAIR = 1,
// An error is encountered and the parsing stopped. // An error is encountered and the parsing stopped.
ERROR = -1, ERROR = -1,
// Internal-only state: the parser can continue parsing. // Internal-only state: the parser can continue parsing.
CAN_CONTINUE = 2 CAN_CONTINUE = 2
}; };
struct KeyValue { struct KeyValue {
constexpr KeyValue() : key(""), value("") {} constexpr KeyValue()
constexpr KeyValue(std::string_view k, std::string_view v) : key("")
: key(k), value(v) {} , value("")
{
}
constexpr KeyValue(std::string_view k, std::string_view v)
: key(k)
, value(v)
{
}
bool operator==(const KeyValue& another) const { bool operator==(const KeyValue& another) const
return key == another.key && value == another.value; {
return key == another.key && value == another.value;
}
std::string_view key;
std::string_view value;
};
KeyValueBlobReader(const char* blob, size_t size)
: current_(blob)
, end_(blob + size)
{
} }
std::string_view key; // Parse the next key-value pair and return the state of the reader. If
std::string_view value; // `out` is passed, out will be set to the produced key-value pair if there
}; // is one.
State Next(KeyValue* out = nullptr);
KeyValueBlobReader(const char* blob, size_t size) private:
: current_(blob), end_(blob + size) {} State SkipUntil(const std::function<bool(char)>& f);
State SkipUntilNot(const std::function<bool(char)>& f);
// Parse the next key-value pair and return the state of the reader. If `out` const char* current_;
// is passed, out will be set to the produced key-value pair if there is one. const char* end_;
State Next(KeyValue* out = nullptr); State state_ = State::CAN_CONTINUE;
private:
State SkipUntil(const std::function<bool(char)>& f);
State SkipUntilNot(const std::function<bool(char)>& f);
const char* current_;
const char* end_;
State state_ = State::CAN_CONTINUE;
}; };
std::ostream& operator<<(std::ostream&, const KeyValueBlobReader::KeyValue&); std::ostream& operator<<(std::ostream&, const KeyValueBlobReader::KeyValue&);
} // namespace vChewing } // namespace vChewing
#endif // SOURCE_ENGINE_KEYVALUEBLOBREADER_H_ #endif // SOURCE_ENGINE_KEYVALUEBLOBREADER_H_