AsmGrader 0.0.0
Loading...
Searching...
No Matches
expression_inspection.hpp
Go to the documentation of this file.
1
6#pragma once
7
10#include <asmgrader/logging.hpp>
11
12#include <fmt/format.h>
13#include <gsl/narrow>
14#include <libassert/assert.hpp>
15#include <range/v3/algorithm/any_of.hpp>
16#include <range/v3/algorithm/contains.hpp>
17#include <range/v3/algorithm/copy.hpp>
18#include <range/v3/algorithm/count.hpp>
19#include <range/v3/algorithm/count_if.hpp>
20#include <range/v3/algorithm/equal.hpp>
21#include <range/v3/algorithm/find.hpp>
22#include <range/v3/algorithm/find_if.hpp>
23#include <range/v3/algorithm/find_if_not.hpp>
24#include <range/v3/algorithm/sort.hpp>
25#include <range/v3/range/access.hpp>
26#include <range/v3/range/concepts.hpp>
27#include <range/v3/view/any_view.hpp>
28#include <range/v3/view/enumerate.hpp>
29#include <range/v3/view/take.hpp>
30#include <range/v3/view/take_while.hpp>
31#include <range/v3/view/transform.hpp>
32
33#include <algorithm>
34#include <array>
35#include <concepts>
36#include <cstddef>
37#include <exception>
38#include <functional>
39#include <optional>
40#include <span>
41#include <string>
42#include <string_view>
43#include <type_traits>
44#include <utility>
45
47
56struct Token
57{
73 enum class Kind {
76 Unknown,
77
84
92
103
106
110
115
130
134
150
167
173
178 Grouping,
179
198
217 // TODO: Maybe support alternate spellings like 'and', 'not', etc.
218 Operator,
219
224 };
225
227 std::string_view str;
228
229 constexpr bool operator==(const Token&) const = default;
230};
231
232constexpr std::string_view format_as(const Token::Kind token_kind) {
233 using enum Token::Kind;
234 switch (token_kind) {
235 case Unknown:
236 return "Unknown";
237 case StringLiteral:
238 return "StringLiteral";
239 case RawStringLiteral:
240 return "RawStringLiteral";
241 case CharLiteral:
242 return "CharLiteral";
243 case IntBinLiteral:
244 return "IntBinLiteral";
245 case IntOctLiteral:
246 return "IntOctLiteral";
247 case IntDecLiteral:
248 return "IntDecLiteral";
249 case IntHexLiteral:
250 return "IntHexLiteral";
251 case FloatLiteral:
252 return "FloatLiteral";
253 case FloatHexLiteral:
254 return "FloatHexLiteral";
255 case Identifier:
256 return "Identifier";
257 case Grouping:
258 return "Grouping";
259 case Operator:
260 return "Operator";
261 case EndDelimiter:
262 return "EndDelimiter";
263 default:
264 UNREACHABLE(token_kind);
265 }
266}
267
268constexpr std::pair<Token::Kind, std::string_view> format_as(const Token& tok) {
269 return {tok.kind, tok.str};
270}
271
274class ParsingError : public std::exception
275{
276public:
277 // Using constexpr where possible for the unlikely possibility of eventually updating to C++26
278 // See: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p3068r6.html
279
280 explicit ParsingError(std::string msg)
281 : msg_{std::move(msg)} {}
282
283 ParsingError(std::string msg, std::string_view stream_state)
284 : msg_{std::move(msg)}
285 , stream_state_{stream_state} {}
286
287 ParsingError(std::string msg, std::string_view stream_state, Token::Kind token_kind)
288 : msg_{std::move(msg)}
289 , stream_state_{stream_state}
290 , token_kind_{token_kind} {}
291
292 const char* what() const noexcept override { return get_pretty().c_str(); }
293
294 constexpr const std::string& msg() const noexcept { return msg_; }
295
296 constexpr const std::optional<std::string>& stream_state() const noexcept { return stream_state_; }
297
298 constexpr const std::optional<Token::Kind>& token_kind() const noexcept { return token_kind_; }
299
301 std::string pretty() const { return get_pretty(); }
302
303private:
304 std::string& get_pretty() const {
305 static std::string pretty_cache;
306
307 if (pretty_cache.empty()) {
308 std::string_view stream_state_str = stream_state_ ? std::string_view{*stream_state_} : "<unknown>";
309 std::string_view token_kind_str = token_kind_ ? format_as(*token_kind_) : "<unknown>";
310
311 pretty_cache = fmt::format("{} : state={:?}, token={}", msg_, stream_state_str, token_kind_str);
312 }
313
314 return pretty_cache;
315 }
316
318 std::string msg_;
320 std::optional<std::string> stream_state_;
322 std::optional<Token::Kind> token_kind_;
323};
324
325inline const std::exception& format_as(const ParsingError& err) {
326 return err;
327}
328
330namespace tokenize {
331
332// I'm pretty rusty so heavy inspiration was taken from https://craftinginterpreters.com/parsing-expressions.html
333//
334// This implementation is not extremely efficient, but my use case is all in constexpr-contexts, so it doesn't
335// matter much at all.
336//
337// The assumption is made that the token stream is in a valid state upon the call of every function
338// For instance, if we see that the stream starts with a ', we assume it's a char literal and not a
339// seperator within a digit literal.
340// It is also assumed that the token stream is syntactically valid.
341
343
346[[nodiscard]] constexpr std::string_view substr_to(std::string_view str, auto token) {
347 auto pos = str.find(token);
348 return str.substr(0, pos);
349}
350
351static_assert(substr_to("abcd", 'c') == "ab");
352static_assert(substr_to("abc ef ab", "ef") == "abc ");
353
358[[nodiscard]] constexpr std::string_view substr_to(std::string_view str, std::invocable<char> auto pred) {
359 auto iter = ranges::find_if(str, pred);
360
361 if (iter == str.end()) {
362 return str;
363 }
364
365 return str.substr(0, static_cast<std::size_t>(iter - str.begin()));
366}
367
368static_assert(substr_to("abc0123 ab", isdigit) == "abc");
369static_assert(substr_to("0123 ab", std::not_fn(isdigit)) == "0123");
370
373[[nodiscard]] constexpr std::string_view substr_past(std::string_view str, auto what) {
374 std::size_t skip_len{};
375 if constexpr (std::invocable<decltype(what), char>) {
376 skip_len = substr_to(str, std::not_fn(what)).size();
377 } else {
378 skip_len = str.find(what) + 1;
379 }
380
381 return str.substr(skip_len);
382}
383
384static_assert(substr_past("abc0123 ab", isalpha) == "0123 ab");
385static_assert(substr_past("abc0123 ab", isalnum) == " ab");
386static_assert(substr_past("abc0123 ab", isdigit) == "abc0123 ab");
387static_assert(substr_past("", isdigit) == "");
388
390
391using enum Token::Kind;
392
393constexpr struct CaseInsensitiveTag
394{
396
398{
399public:
402 {
403 std::span<Token> prevs;
404
405 constexpr Token::Kind last_kind() const {
406 if (prevs.empty()) {
407 return Unknown;
408 }
409 return prevs.back().kind;
410 }
411 };
412
413 // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
415
416 constexpr explicit(false) Stream(const char* string)
417 : str_{string} {}
418
419 constexpr explicit(false) Stream(std::string_view string)
420 : str_{string} {}
421
423 constexpr Stream(std::string_view string, StreamContext context)
424 : ctx{context}
425 , str_{string} {}
426
427 constexpr std::size_t size() const { return str().size(); }
428
429 constexpr bool empty() const { return str().empty(); }
430
432 constexpr char peek() const {
433 if (str().empty()) {
434 throw ParsingError("called peek with empty stream");
435 }
436 return str().at(0);
437 }
438
441 constexpr std::string_view peek(std::size_t n) const {
442 if (n > size()) {
443 throw ParsingError(fmt::format("called peek(n) where n > size() ({} > {})", n, size()));
444 }
445 return str().substr(0, n);
446 }
447
448 // FIXME: These are confusing. Change to more idiomatic forms 'take_while', 'drop_while', etc.
449
451 constexpr std::string_view peek_until(auto what) const { return substr_to(str(), what); }
452
453 constexpr std::string_view peek_while(std::invocable<char> auto pred) const {
454 return peek_until(std::not_fn(pred));
455 }
456
457 constexpr std::string_view peek_through(auto what) const {
458 auto past_sz = peek_past(what).size();
459 return str().substr(0, str().size() - past_sz);
460 }
461
463 constexpr std::string_view peek_past(auto what) const { return substr_past(str(), what); }
464
466 constexpr std::string_view consume(std::size_t n) {
467 auto res = peek(n);
468 idx_ += res.size();
469 return res;
470 }
471
476 template <typename StrLike>
477 requires std::convertible_to<StrLike, std::string_view> || std::same_as<StrLike, char>
478 constexpr bool consume(StrLike str) {
479 if (!starts_with(str)) {
480 return false;
481 }
482
483 if constexpr (std::same_as<char, decltype(str)>) {
484 idx_ += 1;
485 } else {
486 idx_ += std::string_view{str}.size();
487 }
488
489 return true;
490 }
491
492 constexpr bool consume(auto what, CaseInsensitiveTag /*unused*/) {
493 if (!starts_with(what, case_insensitive)) {
494 return false;
495 }
496
497 if constexpr (std::same_as<char, decltype(what)>) {
498 idx_ += 1;
499 } else {
500 idx_ += std::string_view{what}.size();
501 }
502
503 return true;
504 }
505
507 constexpr std::string_view consume_until(auto what) {
508 auto res = peek_until(what);
509 idx_ += res.size();
510 return res;
511 }
512
514 constexpr std::string_view consume_through(auto what) {
515 auto res = peek_through(what);
516 idx_ += res.size();
517 return res;
518 }
519
521 constexpr std::string_view consume_while(std::invocable<char> auto pred) {
522 auto res = peek_while(pred);
523 idx_ += res.size();
524 return res;
525 }
526
528 constexpr bool starts_with(auto what) const { return str().starts_with(what); }
529
530 constexpr bool starts_with(char chr, CaseInsensitiveTag /*unused*/) const {
531 return !str().empty() && tolower(str().front()) == tolower(chr);
532 }
533
534 constexpr bool starts_with(std::string_view string, CaseInsensitiveTag /*unused*/) const {
535 return size() >= string.size() &&
536 ranges::equal(string, str().substr(0, string.size()), std::equal_to{}, tolower, tolower);
537 }
538
539 constexpr std::string_view str() const { return str_.substr(idx_); }
540
541private:
542 std::string_view str_;
543 std::size_t idx_ = 0;
544};
545
546template <std::size_t N>
547constexpr auto make_rev_size_sorted(const std::string_view (&arr)[N]) {
548 std::array<std::string_view, N> array = std::to_array(arr);
549
550 ranges::sort(array, std::greater{}, &std::string_view::size);
551
552 return array;
553}
554
556 "::", ",", //
557 ".", "->", //
558 ".*", "->*", //
559 "+", "-", "*", "/", "%", //
560 "<<", ">>", "^", "|", "&", //
561 "&&", "||", //
562 "==", "!=", "<=>", "<", "<=", ">", ">=", //
563 "=", "+=", "-=", "*=", "/=", "%=", "<<=", ">>=", "&=", "^=", "|=" //
564});
565
567 "++", "--", //
568 "(", ")", "[", "]", //
569 "+", "-", //
570 "~", "!", //
571 "*", "&", //
572 "throw", "sizeof", "alignof", "new", "delete", //
573 //
574 "const_cast", "static_cast", "dynamic_cast", "reinterpret_cast", //
575 "::", "?", ":" //
576});
577
578// Only 5 operands, '+' '-' '*' '&' and '::', should be present in both operator_tokens and binary_operator_tokens
579static_assert(ranges::count_if(operator_tokens, std::bind_front(ranges::contains, binary_operator_tokens)) == 5);
580
581constexpr auto grouping_tokens = std::to_array<char>({
582 '{', '}', //
583 '(', ')', //
584 '<', '>' //
585});
586
588
591constexpr bool is_strlike_prefix(std::string_view str) {
592 if (str.empty() || str.size() > 2) {
593 return false;
594 }
595
596 if (str.size() == 1 && str.starts_with('L')) {
597 return true;
598 }
599
600 if (tolower(str.at(0)) == 'u') {
601 return str.size() == 1 || str.at(1) == '8';
602 }
603
604 return false;
605}
606
607static_assert(is_strlike_prefix("L") && is_strlike_prefix("u") && is_strlike_prefix("U8"));
608static_assert(!is_strlike_prefix("au8") && !is_strlike_prefix("\"L"));
609
612constexpr bool is_int_suffix(std::string_view str) {
613 if (str.empty() || str.size() > 3) {
614 return false;
615 }
616
617 // check for unsigned spec
618 if (tolower(str.front()) == 'u') {
619 str.remove_prefix(1);
620
621 if (str.empty()) {
622 return true;
623 }
624 }
625
626 // check for long / long long spec
627 if (tolower(str.front()) == 'l') {
628 str.remove_prefix(1);
629
630 if (str.empty()) {
631 return true;
632 }
633 }
634
635 return str.size() == 1 && tolower(str.front()) == 'l';
636}
637
638static_assert(is_int_suffix("L"));
639static_assert(is_int_suffix("u"));
640static_assert(is_int_suffix("ul"));
641static_assert(is_int_suffix("LL"));
642static_assert(is_int_suffix("ULL"));
643static_assert(!is_int_suffix("lu") && !is_int_suffix("Lu"));
644
648constexpr auto is_ident_like() {
649 return [first = true](char c) mutable {
650 if (std::exchange(first, false) && !isalpha(c) && c != '_') {
651 return false;
652 }
653
654 return isalnum(c) || c == '_';
655 };
656}
657
659constexpr auto digit_or_sep(char c) {
660 return isdigit(c) || c == '\'';
661}
662
664constexpr auto xdigit_or_sep(char c) {
665 return isxdigit(c) || c == '\'';
666}
667
668// Yes, I know \overload is not technically accurate for the following definitions,
669// but it groups the functions nicely in documentation.
670
676template <Token::Kind Kind>
677constexpr bool matches([[maybe_unused]] const Stream& stream) {
678 return false;
679}
680
684template <>
685constexpr bool matches<StringLiteral>(const Stream& stream) {
686 std::string_view potential_strlike_prefix = stream.peek_until('"');
687
688 // if the prefix is empty, then the stream just starts with " (it's a string)
689 if (potential_strlike_prefix.empty()) {
690 return true;
691 }
692
693 return is_strlike_prefix(potential_strlike_prefix);
694}
695
696static_assert(matches<StringLiteral>(R"("")"));
697static_assert(matches<StringLiteral>(R"(u8"")"));
698static_assert(matches<StringLiteral>(R"(L"")"));
699static_assert(!matches<StringLiteral>(R"(a"")"));
700
704template <>
705constexpr bool matches<RawStringLiteral>(const Stream& stream) {
706 std::string_view potential_strlike_prefix = stream.peek_until("R\"");
707
708 // if the prefix is empty, then the stream just starts with R" (it's a raw string literal)
709 if (potential_strlike_prefix.empty()) {
710 return true;
711 }
712
713 return is_strlike_prefix(potential_strlike_prefix);
714}
715
716static_assert(matches<RawStringLiteral>(R"(R"")"));
717static_assert(matches<RawStringLiteral>(R"(u8R"")"));
718static_assert(matches<RawStringLiteral>(R"(LR"")"));
719static_assert(!matches<RawStringLiteral>(R"("")"));
720
724template <>
725constexpr bool matches<CharLiteral>(const Stream& stream) {
726 std::string_view potential_strlike_prefix = stream.peek_until('\'');
727
728 // if the prefix is empty, then the stream just starts with ' (it's a char literal)
729 if (potential_strlike_prefix.empty()) {
730 return true;
731 }
732
733 return is_strlike_prefix(potential_strlike_prefix);
734}
735
736static_assert(matches<CharLiteral>("''"));
737static_assert(matches<CharLiteral>("u''"));
738static_assert(matches<CharLiteral>("u8''"));
739static_assert(!matches<CharLiteral>("+''"));
740
743template <>
744constexpr bool matches<BoolLiteral>(const Stream& stream) {
745 auto leading = stream.peek_through(is_ident_like());
746
747 return leading == "true" || leading == "false";
748}
749
750static_assert(matches<BoolLiteral>("true"));
751static_assert(matches<BoolLiteral>("false"));
752static_assert(!matches<BoolLiteral>("_false"));
753static_assert(!matches<BoolLiteral>("false_"));
754
758template <>
759constexpr bool matches<IntBinLiteral>(const Stream& stream) {
760 if (!stream.starts_with("0b", case_insensitive)) {
761 return false;
762 }
763
764 return stream.size() > 2 && isdigit(stream.str().at(2));
765}
766
770template <>
771constexpr bool matches<IntHexLiteral>(const Stream& stream) {
772 if (stream.size() <= 2 || !stream.starts_with("0x", case_insensitive)) {
773 return false;
774 }
775
776 std::string_view after_prefix = stream.str().substr(2);
777
778 if (!isxdigit(after_prefix.front())) {
779 return false;
780 }
781
782 Stream after_digits = substr_past(after_prefix, xdigit_or_sep);
783
784 // Hex int digit-seq proceeded by a 'p' is ALWAYS interpreted as an exponent,
785 // as it is defined as such as part of the lexical grammar
786 return !after_digits.starts_with('p', case_insensitive) && !after_digits.starts_with('.');
787}
788
792template <>
793constexpr bool matches<IntOctLiteral>(const Stream& stream) {
794 if (!stream.starts_with('0')) {
795 return false;
796 }
797
798 Stream after_digits = stream.peek_past(digit_or_sep);
799
800 // Non-hex int digit-seq proceeded by a 'e' is ALWAYS interpreted as an exponent,
801 // as it is defined as such as part of the lexical grammar
802 // int digit-seq proceeded by a 'x' is ALWAYS interpreted as a hex literal, for same reason as above
803 return !after_digits.starts_with('x', case_insensitive) && !after_digits.starts_with('e', case_insensitive) &&
804 !after_digits.starts_with('.');
805}
806
811template <>
812constexpr bool matches<IntDecLiteral>(const Stream& stream) {
813 if (stream.empty() || !isdigit(stream.peek())) {
814 return false;
815 }
816
817 if (matches<IntBinLiteral>(stream) || matches<IntOctLiteral>(stream) || matches<IntHexLiteral>(stream)) {
818 return false;
819 }
820
821 Stream after_digits = stream.peek_past(digit_or_sep);
822
823 // Non-hex int digit-seq proceeded by a 'e' is ALWAYS interpreted as an exponent,
824 // as it is defined as such as part of the lexical grammar
825 // int digit-seq proceeded by a 'x' is ALWAYS interpreted as a hex literal, for same reason as above
826 return !after_digits.starts_with('x', case_insensitive) && !after_digits.starts_with('e', case_insensitive) &&
827 !after_digits.starts_with('.');
828}
829
830static_assert(matches<IntDecLiteral>("123"));
831static_assert(matches<IntDecLiteral>("123+"));
832static_assert(matches<IntDecLiteral>("123u"));
833static_assert(matches<IntHexLiteral>("0xAEF102"));
834static_assert(matches<IntHexLiteral>("0xAEF102ull"));
835static_assert(matches<IntBinLiteral>("0b1010"));
836static_assert(matches<IntOctLiteral>("0"));
837static_assert(matches<IntOctLiteral>("01'2"));
838static_assert(matches<IntOctLiteral>("0'1'2345'123"));
839static_assert(matches<IntHexLiteral>("0x1'A'b3F5'123"));
840static_assert(!matches<IntOctLiteral>("'0"));
841static_assert(!matches<IntDecLiteral>("0x123p3"));
842static_assert(!matches<IntDecLiteral>("0x123.0x23"));
843static_assert(!matches<IntDecLiteral>("0x123.0x23p3"));
844static_assert(!matches<IntHexLiteral>("0x123p3"));
845static_assert(!matches<IntHexLiteral>("0x123.0x23"));
846static_assert(!matches<IntHexLiteral>("0x123.0x23p3"));
847static_assert(!matches<IntOctLiteral>("0x123"));
848static_assert(!matches<IntOctLiteral>("0XABC"));
849static_assert(!matches<IntOctLiteral>("0.123"));
850static_assert(!matches<IntDecLiteral>("10.123"));
851static_assert(!matches<IntDecLiteral>(".123"));
852static_assert(!matches<IntOctLiteral>("0.f"));
853static_assert(!matches<IntDecLiteral>("1e5"));
854static_assert(!matches<IntHexLiteral>("0x15p3"));
855
859template <>
860constexpr bool matches<FloatHexLiteral>(const Stream& stream) {
861 // Only 2 valid forms for the start of a hex floating-point literal
862 if (stream.starts_with("0x", case_insensitive)) {
863 return !matches<IntHexLiteral>(stream);
864 }
865
866 return stream.starts_with(".0x", case_insensitive);
867}
868
872template <>
873constexpr bool matches<FloatLiteral>(const Stream& stream) {
874 if (matches<FloatHexLiteral>(stream)) {
875 return false;
876 }
877 // stream starts with '.' and then a digit
878 if (stream.starts_with('.') && stream.size() > 1 && isdigit(stream.str().at(1))) {
879 return true;
880 }
881
882 if (!isdigit(stream.peek())) {
883 return false;
884 }
885
886 return !matches<IntBinLiteral>(stream) && !matches<IntOctLiteral>(stream) && !matches<IntDecLiteral>(stream) &&
887 !matches<IntHexLiteral>(stream);
888}
889
890static_assert(matches<FloatLiteral>(".123"));
891static_assert(matches<FloatLiteral>(".123e52"));
892static_assert(matches<FloatLiteral>(".123e+52"));
893static_assert(matches<FloatLiteral>(".123e-52"));
894static_assert(matches<FloatLiteral>("0.123"));
895static_assert(matches<FloatLiteral>("10.123e31"));
896static_assert(matches<FloatLiteral>("12e42"));
897static_assert(matches<FloatLiteral>("10.123f"));
898static_assert(matches<FloatLiteral>("0.fl"));
899static_assert(matches<FloatLiteral>("0'123.1'2345'6fl"));
900static_assert(matches<FloatLiteral>(".0FL"));
901
902static_assert(matches<FloatHexLiteral>("0xAEFp3"));
903static_assert(matches<FloatHexLiteral>("0xAEFp+3"));
904static_assert(matches<FloatHexLiteral>("0xAEFp-3"));
905static_assert(matches<FloatHexLiteral>("0xAEF.0x123p3"));
906static_assert(matches<FloatHexLiteral>(".0x123p3"));
907static_assert(matches<FloatHexLiteral>("0x12'1EF.p0x123"));
908static_assert(matches<FloatHexLiteral>("0x123.0xABCp10"));
909
910static_assert(!matches<FloatLiteral>("0xAEFp3"));
911static_assert(!matches<FloatLiteral>("0xAEFp+3"));
912static_assert(!matches<FloatLiteral>("0xAEFp-3"));
913static_assert(!matches<FloatLiteral>("0xAEF.0x123p3"));
914static_assert(!matches<FloatLiteral>(".0x123p3"));
915static_assert(!matches<FloatLiteral>("0x12'1EF.p0x123"));
916static_assert(!matches<FloatLiteral>("0x123.0xABCp10"));
917static_assert(!matches<FloatLiteral>("123"));
918static_assert(!matches<FloatLiteral>("0"));
919static_assert(!matches<FloatLiteral>("0b10"));
920static_assert(!matches<FloatHexLiteral>("0b10"));
921static_assert(!matches<FloatHexLiteral>("0xAB10"));
922
926template <>
927constexpr bool matches<Identifier>(const Stream& stream) {
928 // Make sure that the token is not an operator (new, etc.)
929 auto full_token = stream.peek_through(is_ident_like());
930
931 if (full_token.empty()) {
932 return false;
933 }
934
935 return !ranges::contains(operator_tokens, full_token) && !matches<BoolLiteral>(stream);
936}
937
938static_assert(matches<Identifier>("abc"));
939static_assert(matches<Identifier>("_"));
940static_assert(matches<Identifier>("_abc"));
941static_assert(matches<Identifier>("_12abc"));
942static_assert(!matches<Identifier>("1abc"));
943static_assert(!matches<Identifier>("1_abc"));
944static_assert(!matches<Identifier>("+_abc"));
945static_assert(!matches<Identifier>(".123"));
946static_assert(!(matches<Identifier>("new") || matches<Identifier>("sizeof")));
947
951template <>
952constexpr bool matches<Grouping>(const Stream& stream) {
953 char tok = stream.peek();
954
955 if (!ranges::contains(grouping_tokens, tok)) {
956 return false;
957 }
958
959 // these are always grouping symbols
960 if (tok == '{' || tok == '}') {
961 return true;
962 }
963
964 if (tok == '(') {
965 return stream.ctx.last_kind() != Identifier;
966 }
967
968 // a ')' is a closing grouping symbol iff an unmatched opening '(' grouping symbol
969 // is found MORE RECENTLY THAN an unmatched opening '(' operator
970 if (tok == ')') {
971 auto last_grouping_opening = ranges::find(stream.ctx.prevs, Token{.kind = Grouping, .str = "("});
972 // no opening token -> nothing to match to
973 if (last_grouping_opening == ranges::end(stream.ctx.prevs)) {
974 return false;
975 }
976
977 auto num_operator_opening = ranges::count(stream.ctx.prevs, Token{.kind = Operator, .str = "("});
978 auto num_operator_closing = ranges::count(stream.ctx.prevs, Token{.kind = Operator, .str = ")"});
979
980 // there cannot be more closing than opening. That would imply a parsing error
981 if (num_operator_closing > num_operator_opening) {
982 throw ParsingError("closing ')' ops > opening '(' ops", stream.str(), Grouping);
983 }
984
985 // no unmatched operator opening. We must be matching to the previous grouping symbol
986 if (num_operator_opening == num_operator_closing) {
987 return true;
988 }
989
990 auto last_operator_opening = ranges::find(stream.ctx.prevs, Token{.kind = Operator, .str = "("});
991
992 return last_grouping_opening > last_operator_opening;
993 }
994
995 // very basic heuristic for checking if '<' / '>' are being used to surround tparams:
996 // for '<': if an excess '>' is found further in the stream, before any other '<' chars, there is no logical
997 // operator in-between, there is no opening parenthesis in-between, and the chars up to the first alnum/ws/'_'
998 // do not match an operator
999 // for '>': an unmatched '<' exists
1000
1001 // FIXME: This is not worth implementing properly right now
1002 // as long as expressions like the following forms can be parsed:
1003 // `ident<simple-expr>`
1004 // `ident<simple-expr> </> ident<simple-expr>`
1005 // `ident<simple-expr> </> ident`
1006 // `ident </> ident<simple-expr>`
1007 // TODO: handle bitshift ambiguity and '<' / '>' operators nested within a tparam
1008 if (tok == '<') {
1009 auto full_token = stream.peek_until([](char c) { return isalnum(c) || isblank(c) || c == '_'; });
1010 if (full_token.size() > 1 &&
1011 (ranges::contains(binary_operator_tokens, full_token) || ranges::contains(operator_tokens, full_token))) {
1012 return false;
1013 }
1014
1015 auto first_logical_op = std::min(stream.str().find("&&"), stream.str().find("||"));
1016 auto first_opening_paren = stream.str().find('(');
1017 auto next_closing_angled = stream.str().find('>');
1018
1019 if (next_closing_angled > first_logical_op || next_closing_angled > first_opening_paren) {
1020 return false;
1021 }
1022
1023 auto next_opening_angled = stream.str().substr(1).find('<');
1024
1025 return next_opening_angled > next_closing_angled;
1026 }
1027
1028 if (tok == '>') {
1029 auto num_angled_opening = ranges::count(stream.ctx.prevs, Token{.kind = Grouping, .str = "<"});
1030 auto num_angled_closing = ranges::count(stream.ctx.prevs, Token{.kind = Grouping, .str = ">"});
1031
1032 // there cannot be more closing than opening. That would imply a parsing error
1033 // We're not going to assert this for now though, as the heuristic is very basic and will
1034 // have a lot of errors.
1035
1036 if (!std::is_constant_evaluated() && num_angled_closing > num_angled_opening) {
1037 LOG_WARN("Parsing error for '<' '>' grouping tokens (# opening < # closing). Context: (o={},c={}) "
1038 "stream={:?}, prevs={}",
1039 num_angled_opening, num_angled_closing, stream.str(), stream.ctx.prevs);
1040 }
1041
1042 return num_angled_opening > num_angled_closing;
1043 }
1044
1045 UNREACHABLE(tok, stream, stream.ctx.prevs);
1046}
1047
1048// tests with context can be found at parse<Grouping>
1049
1053template <>
1054constexpr bool matches<BinaryOperator>(const Stream& stream) {
1055 // Grouping tokens have the same chars as operators, except require more context checks,
1056 // so give precedence to identifying them
1057 if (matches<Grouping>(stream)) {
1058 return false;
1059 }
1060
1061 // To abide by the maximal munch rule, prioritizing bigger tokens over samller ones
1062 // e.g., "-=" is always picked over "-"
1063 auto max_munch_tokens = binary_operator_tokens;
1064 ranges::sort(max_munch_tokens, std::greater{}, &std::string_view::size);
1065
1066 auto check_stream_starts = std::bind_front(&Stream::starts_with<std::string_view>, stream);
1067 auto iter = ranges::find_if(max_munch_tokens, check_stream_starts);
1068
1069 if (iter == ranges::end(max_munch_tokens)) {
1070 return false;
1071 }
1072
1073 // Logic to seperate binary-operators from any other arity is defined in matches<Operator>
1074
1075 // the only 3 operators that could be unary or binary
1076 if (!ranges::contains(operator_tokens, *iter)) {
1077 return true;
1078 }
1079
1080 // At the start of the stream -> MUST be a unary operator
1081 if (stream.ctx.prevs.empty()) {
1082 return false;
1083 }
1084
1085 // the last token was an operator -> this one MUST be a unary operator
1086 if (Token::Kind kind = stream.ctx.prevs.back().kind; kind == Operator || kind == BinaryOperator) {
1087 return false;
1088 }
1089
1090 return true;
1091}
1092
1096template <>
1097constexpr bool matches<Operator>(const Stream& stream) {
1098 // Grouping tokens have the same chars as operators, except require more context checks,
1099 // so give precedence to identifying them
1100 if (matches<Grouping>(stream)) {
1101 return false;
1102 }
1103
1104 // To abide by the maximal munch rule, prioritizing bigger tokens over samller ones
1105 // e.g., "-=" is always picked over "-"
1106 auto max_munch_tokens = operator_tokens;
1107 ranges::sort(max_munch_tokens, std::greater{}, &std::string_view::size);
1108
1109 auto check_stream_starts = std::bind_front(&Stream::starts_with<std::string_view>, stream);
1110
1111 auto iter = ranges::find_if(max_munch_tokens, check_stream_starts);
1112
1113 if (iter == ranges::end(max_munch_tokens)) {
1114 return false;
1115 }
1116
1117 // the only 2 operators that could be unary or binary
1118 if (ranges::contains(binary_operator_tokens, *iter)) {
1119 return !matches<BinaryOperator>(stream);
1120 }
1121
1122 return true;
1123}
1124
1125static_assert(matches<Operator>("+ 123"));
1126static_assert(matches<Operator>(":: 123"));
1127static_assert(matches<Operator>("? 123"));
1128static_assert(matches<Operator>("sizeof 123"));
1129static_assert(!matches<Operator>("(123)"));
1130static_assert(!matches<Operator>("{ 123"));
1131static_assert(!matches<Operator>("|| 123"));
1132
1133// tests for binary operators are impossible to do without context.
1134// They may be found in test_expression_inspection.cpp
1135
1137
1138template <Token::Kind Kind>
1139constexpr std::string_view test_parse(std::string_view str) {
1140 Stream stream{str};
1141
1142 return parse<Kind>(stream);
1143}
1144
1152template <Token::Kind Kind>
1153constexpr std::string_view parse([[maybe_unused]] Stream& stream) {
1154 return "";
1155}
1156
1159template <>
1160constexpr std::string_view parse<StringLiteral>(Stream& stream) {
1161 if (!matches<StringLiteral>(stream)) {
1162 throw ParsingError("matches precondition failed in parse", stream.str(), StringLiteral);
1163 }
1164
1165 auto get_res = [init = stream.str(), &stream] { return init.substr(0, init.size() - stream.str().size()); };
1166
1167 // Consume through first "
1168 stream.consume_through('"');
1169
1170 auto is_str_end = [prev_backslash = false](char c) mutable {
1171 if (prev_backslash) {
1172 prev_backslash = false;
1173 return false;
1174 }
1175
1176 if (c == '\\') {
1177 prev_backslash = true;
1178 }
1179
1180 return c == '"';
1181 };
1182
1183 stream.consume_until(is_str_end);
1184 stream.consume(1);
1185
1186 return get_res();
1187}
1188
1189static_assert(test_parse<StringLiteral>(R"("")") == R"("")");
1190static_assert(test_parse<StringLiteral>(R"(u"a")") == R"(u"a")");
1191static_assert(test_parse<StringLiteral>(R"(u8"abc")") == R"(u8"abc")");
1192static_assert(test_parse<StringLiteral>(R"(L"abc " )") == R"(L"abc ")");
1193static_assert(test_parse<StringLiteral>(R"("\"")") == R"("\"")");
1194static_assert(test_parse<StringLiteral>(R"("\""")") == R"("\"")");
1195static_assert(test_parse<StringLiteral>(R"("\"""")") == R"("\"")");
1196static_assert(test_parse<StringLiteral>(R"("\\"""")") == R"("\\")");
1197static_assert(test_parse<StringLiteral>(R"("\\\\\" \x12")") == R"("\\\\\" \x12")");
1198
1201template <>
1202constexpr std::string_view parse<RawStringLiteral>(Stream& stream) {
1203 if (!matches<RawStringLiteral>(stream)) {
1204 throw ParsingError("matches precondition failed in parse", stream.str(), RawStringLiteral);
1205 }
1206
1207 auto get_res = [init = stream.str(), &stream] { return init.substr(0, init.size() - stream.str().size()); };
1208
1209 // Consume through first "
1210 stream.consume_until('"');
1211 stream.consume(1);
1212
1213 // get d-char-seq
1214 std::string_view d_char_seq = stream.consume_until('(');
1215
1216 // first, consume until the next ) char
1217 // then check if the d-char-seq AND a " follows that paren.
1218 // if so -> we're done
1219 // if not -> repeat checks above
1220 while (!stream.consume_through(')').empty() && !(stream.consume(d_char_seq) && stream.consume('"'))) {
1221 }
1222
1223 return get_res();
1224}
1225
1226static_assert(test_parse<RawStringLiteral>(R"---(R"()")---") == R"---(R"()")---");
1227static_assert(test_parse<RawStringLiteral>(R"---(LR"()")---") == R"---(LR"()")---");
1228static_assert(test_parse<RawStringLiteral>(R"---(u8R"()")---") == R"---(u8R"()")---");
1229static_assert(test_parse<RawStringLiteral>(R"---(R"(("))")---") == R"---(R"(("))")---");
1230static_assert(test_parse<RawStringLiteral>(R"---(R"a()a")---") == R"---(R"a()a")---");
1231static_assert(test_parse<RawStringLiteral>(R"---(R"a()a)a")---") == R"---(R"a()a)a")---");
1232static_assert(test_parse<RawStringLiteral>(R"---(R"123( 28%\di\""" 2)123")---") == R"---(R"123( 28%\di\""" 2)123")---");
1233
1236template <>
1237constexpr std::string_view parse<CharLiteral>(Stream& stream) {
1238 if (!matches<CharLiteral>(stream)) {
1239 throw ParsingError("matches precondition failed in parse", stream.str(), CharLiteral);
1240 }
1241
1242 auto get_res = [init = stream.str(), &stream] { return init.substr(0, init.size() - stream.str().size()); };
1243
1244 // Exact same strategy used in parse<StringLiteral>, simply replacing " with '
1245
1246 // Consume through first '
1247 stream.consume_through('\'');
1248
1249 auto is_chr_end = [prev_backslash = false](char c) mutable {
1250 if (prev_backslash) {
1251 prev_backslash = false;
1252 return false;
1253 }
1254
1255 if (c == '\\') {
1256 prev_backslash = true;
1257 }
1258
1259 return c == '\'';
1260 };
1261
1262 stream.consume_until(is_chr_end);
1263 stream.consume(1);
1264
1265 return get_res();
1266}
1267
1268static_assert(test_parse<CharLiteral>("''") == "''");
1269static_assert(test_parse<CharLiteral>("u''") == "u''");
1270static_assert(test_parse<CharLiteral>("u8''") == "u8''");
1271static_assert(test_parse<CharLiteral>("L''") == "L''");
1272static_assert(test_parse<CharLiteral>("'a'") == "'a'");
1273static_assert(test_parse<CharLiteral>(R"('\\')") == R"('\\')");
1274static_assert(test_parse<CharLiteral>(R"('\'')") == R"('\'')");
1275static_assert(test_parse<CharLiteral>("'abcd'") == "'abcd'");
1276
1277constexpr bool consume_int_suffix(Stream& stream) {
1278 // an int suffix is 1-3 chars long, so just brute force
1279 for (std::size_t len = 3; len >= 1; --len) {
1280 if (len <= stream.size() && is_int_suffix(stream.peek(len))) {
1281 stream.consume(len);
1282 return true;
1283 }
1284 }
1285
1286 return false;
1287}
1288
1291template <>
1292constexpr std::string_view parse<IntBinLiteral>(Stream& stream) {
1293 if (!matches<IntBinLiteral>(stream)) {
1294 throw ParsingError("matches precondition failed in parse", stream.str(), IntBinLiteral);
1295 }
1296
1297 auto get_res = [init = stream.str(), &stream] { return init.substr(0, init.size() - stream.str().size()); };
1298
1299 // Consume the prefix
1300 if (!stream.consume("0b", case_insensitive)) {
1301 throw ParsingError("0b literal prefix missing", stream.str(), IntBinLiteral);
1302 }
1303
1304 stream.consume_through(digit_or_sep);
1305
1306 consume_int_suffix(stream);
1307
1308 return get_res();
1309}
1310
1313template <>
1314constexpr std::string_view parse<IntOctLiteral>(Stream& stream) {
1315 if (!matches<IntOctLiteral>(stream)) {
1316 throw ParsingError("matches precondition failed in parse", stream.str(), IntOctLiteral);
1317 }
1318
1319 auto get_res = [init = stream.str(), &stream] { return init.substr(0, init.size() - stream.str().size()); };
1320
1321 // Consume the prefix
1322 if (!stream.consume('0')) {
1323 throw ParsingError("0 literal prefix missing", stream.str(), IntOctLiteral);
1324 }
1325
1326 stream.consume_through(digit_or_sep);
1327
1328 consume_int_suffix(stream);
1329
1330 return get_res();
1331}
1332
1335template <>
1336constexpr std::string_view parse<IntDecLiteral>(Stream& stream) {
1337 if (!matches<IntDecLiteral>(stream)) {
1338 throw ParsingError("matches precondition failed in parse", stream.str(), IntDecLiteral);
1339 }
1340
1341 auto get_res = [init = stream.str(), &stream] { return init.substr(0, init.size() - stream.str().size()); };
1342
1343 stream.consume_through(digit_or_sep);
1344
1345 consume_int_suffix(stream);
1346
1347 return get_res();
1348}
1349
1352template <>
1353constexpr std::string_view parse<IntHexLiteral>(Stream& stream) {
1354 if (!matches<IntHexLiteral>(stream)) {
1355 throw ParsingError("matches precondition failed in parse", stream.str(), IntHexLiteral);
1356 }
1357
1358 auto get_res = [init = stream.str(), &stream] { return init.substr(0, init.size() - stream.str().size()); };
1359
1360 // Consume the prefix
1361 if (!stream.consume("0x", case_insensitive)) {
1362 throw ParsingError("0x literal prefix missing", stream.str(), IntHexLiteral);
1363 }
1364
1365 stream.consume_through(xdigit_or_sep);
1366
1367 consume_int_suffix(stream);
1368
1369 return get_res();
1370}
1371
1372static_assert(test_parse<IntOctLiteral>("0") == "0");
1373static_assert(test_parse<IntOctLiteral>("0ull") == "0ull");
1374static_assert(test_parse<IntOctLiteral>("0U") == "0U");
1375static_assert(test_parse<IntOctLiteral>("0L") == "0L");
1376static_assert(test_parse<IntOctLiteral>("0UL") == "0UL");
1377static_assert(test_parse<IntDecLiteral>("123'456") == "123'456");
1378static_assert(test_parse<IntDecLiteral>("123+456") == "123");
1379static_assert(test_parse<IntDecLiteral>("123 + 456") == "123");
1380static_assert(test_parse<IntHexLiteral>("0x123ABC") == "0x123ABC");
1381static_assert(test_parse<IntBinLiteral>("0B01'01l") == "0B01'01l");
1382static_assert(test_parse<IntDecLiteral>("123_ab") == "123");
1383
1384// testing for parsing of *really weird* (and UB) user-defined literal operators
1385static_assert(test_parse<IntDecLiteral>("123ABC") == "123");
1386static_assert(test_parse<IntDecLiteral>("1FFF") == "1");
1387static_assert(test_parse<IntDecLiteral>("123'123a_ab") == "123'123");
1388static_assert(test_parse<IntBinLiteral>("0b0101") == "0b0101");
1389static_assert(test_parse<IntBinLiteral>("0b0A123") == "0b0");
1390
1393template <>
1394constexpr std::string_view parse<FloatLiteral>(Stream& stream) {
1395 if (!matches<FloatLiteral>(stream)) {
1396 throw ParsingError("matches precondition failed in parse", stream.str(), FloatLiteral);
1397 }
1398
1399 auto get_res = [init = stream.str(), &stream] { return init.substr(0, init.size() - stream.str().size()); };
1400
1401 stream.consume_through(digit_or_sep);
1402
1403 if (stream.consume('.')) {
1404 stream.consume_through(digit_or_sep);
1405 } else {
1406 // if there is no '.' then there must be an exponent
1407 if (tolower(stream.peek()) != 'e') {
1408 throw ParsingError("exponent char 'e'/'E' missing", stream.str(), FloatLiteral);
1409 }
1410 }
1411
1412 // consume potential exponent and sign
1413 if (stream.consume('e', case_insensitive)) {
1414 stream.consume('+') || stream.consume('-');
1415 }
1416
1417 stream.consume_through(digit_or_sep);
1418
1419 // check for a floating point suffix
1420 std::string_view suffix = stream.peek_through(is_ident_like());
1421
1422 if (suffix.size() == 1) {
1423 stream.consume('f', case_insensitive) || stream.consume('l', case_insensitive);
1424 }
1425
1426 return get_res();
1427}
1428
1431template <>
1432constexpr std::string_view parse<FloatHexLiteral>(Stream& stream) {
1433 if (!matches<FloatHexLiteral>(stream)) {
1434 throw ParsingError("matches precondition failed in parse", stream.str(), FloatHexLiteral);
1435 }
1436
1437 auto get_res = [init = stream.str(), &stream] { return init.substr(0, init.size() - stream.str().size()); };
1438
1439 // Hex floating point literal may be of the following forms:
1440 // hex-value hex-exp
1441 // hex-value '.' [hex-value] hex-exp
1442 // '.' hex-value hex-exp
1443 // In the following statement we ensure that it matches one of the above
1444 if (!stream.consume("0x", case_insensitive) && !stream.consume(".0x", case_insensitive)) {
1445 throw ParsingError("bad start of hex float literal", stream.str(), FloatHexLiteral);
1446 }
1447
1448 stream.consume_through(xdigit_or_sep);
1449
1450 // consume '.' if it exists
1451 stream.consume('.');
1452
1453 // potentially consume the hex fractional part
1454 if (stream.consume("0x", case_insensitive)) {
1455 stream.consume_through(xdigit_or_sep);
1456 }
1457
1458 // exponent is REQUIRED
1459 if (!stream.consume('p', case_insensitive)) {
1460 throw ParsingError("exponent char 'p'/'P' missing", stream.str(), FloatHexLiteral);
1461 }
1462
1463 stream.consume('+') || stream.consume('-');
1464
1465 stream.consume_through(digit_or_sep);
1466
1467 // parse the potential floating-point-suffix outside of this lammbda
1468
1469 // check for a floating point suffix
1470 std::string_view suffix = stream.peek_through(is_ident_like());
1471
1472 if (suffix.size() == 1) {
1473 stream.consume('f', case_insensitive) || stream.consume('l', case_insensitive);
1474 }
1475
1476 return get_res();
1477}
1478
1479static_assert(test_parse<FloatLiteral>("0.") == "0.");
1480static_assert(test_parse<FloatLiteral>("123.") == "123.");
1481static_assert(test_parse<FloatLiteral>(".1") == ".1");
1482static_assert(test_parse<FloatLiteral>(".123") == ".123");
1483static_assert(test_parse<FloatLiteral>("0.0") == "0.0");
1484static_assert(test_parse<FloatLiteral>("123.456") == "123.456");
1485static_assert(test_parse<FloatLiteral>("1e123") == "1e123");
1486static_assert(test_parse<FloatLiteral>("1.e123") == "1.e123");
1487static_assert(test_parse<FloatLiteral>(".1e123") == ".1e123");
1488static_assert(test_parse<FloatLiteral>("1.1e123") == "1.1e123");
1489static_assert(test_parse<FloatLiteral>("1.1e+123") == "1.1e+123");
1490static_assert(test_parse<FloatLiteral>("1.1e-123") == "1.1e-123");
1491static_assert(test_parse<FloatLiteral>("1.1E123") == "1.1E123");
1492static_assert(test_parse<FloatLiteral>("1.1E+123") == "1.1E+123");
1493static_assert(test_parse<FloatLiteral>("1.1E-123") == "1.1E-123");
1494// test hex
1495static_assert(test_parse<FloatHexLiteral>("0x1p1") == "0x1p1");
1496static_assert(test_parse<FloatHexLiteral>("0x1p1") == "0x1p1");
1497static_assert(test_parse<FloatHexLiteral>("0x1.p1") == "0x1.p1");
1498static_assert(test_parse<FloatHexLiteral>(".0x1p1") == ".0x1p1");
1499static_assert(test_parse<FloatHexLiteral>(".0x1p1") == ".0x1p1");
1500static_assert(test_parse<FloatHexLiteral>("0x1.0x1p1") == "0x1.0x1p1");
1501static_assert(test_parse<FloatHexLiteral>("0xABCDEF1.0x1p1") == "0xABCDEF1.0x1p1");
1502static_assert(test_parse<FloatHexLiteral>("0Xabcdef1.0X1P1") == "0Xabcdef1.0X1P1");
1503static_assert(test_parse<FloatHexLiteral>("0x1.0xABCDEF1p1") == "0x1.0xABCDEF1p1");
1504static_assert(test_parse<FloatHexLiteral>("0x1.0Xabcdef1P1") == "0x1.0Xabcdef1P1");
1505static_assert(test_parse<FloatHexLiteral>("0xABCDEF1.0xABCDEF1p1") == "0xABCDEF1.0xABCDEF1p1");
1506static_assert(test_parse<FloatHexLiteral>("0Xabcdef1.0Xabcdef1P1") == "0Xabcdef1.0Xabcdef1P1");
1507static_assert(test_parse<FloatHexLiteral>("0x1p12345") == "0x1p12345");
1508static_assert(test_parse<FloatHexLiteral>("0x1p12345f") == "0x1p12345f");
1509static_assert(test_parse<FloatHexLiteral>("0x1p12345l") == "0x1p12345l");
1510static_assert(test_parse<FloatHexLiteral>("0x123.0xABCp10") == "0x123.0xABCp10");
1511// test (normal) suffixes
1512static_assert(test_parse<FloatLiteral>("1.f") == "1.f");
1513static_assert(test_parse<FloatLiteral>("1.l") == "1.l");
1514static_assert(test_parse<FloatLiteral>("1.0f") == "1.0f");
1515static_assert(test_parse<FloatLiteral>(".0f") == ".0f");
1516static_assert(test_parse<FloatLiteral>("123.0F") == "123.0F");
1517static_assert(test_parse<FloatLiteral>("123.0L") == "123.0L");
1518// test potential user-defined suffixes
1519static_assert(test_parse<FloatLiteral>("123.0labc") == "123.0");
1520static_assert(test_parse<FloatLiteral>("123.0f_abc") == "123.0");
1521static_assert(test_parse<FloatLiteral>("123e12fb") == "123e12");
1522// hex weirdness (though exponent is required, so there are a few less edge cases)
1523static_assert(test_parse<FloatLiteral>("123.0p") == "123.0");
1524static_assert(test_parse<FloatLiteral>("123.p") == "123.");
1525static_assert(test_parse<FloatHexLiteral>("0x123.p1e123") == "0x123.p1");
1526static_assert(test_parse<FloatHexLiteral>(".0x0p1f12") == ".0x0p1");
1527static_assert(test_parse<FloatHexLiteral>(".0x0p1f12") == ".0x0p1");
1528static_assert(test_parse<FloatHexLiteral>(".0x0p1l_12") == ".0x0p1");
1529static_assert(test_parse<FloatHexLiteral>(".0x0p1_12") == ".0x0p1");
1530
1533template <>
1534constexpr std::string_view parse<BoolLiteral>(Stream& stream) {
1535 if (!matches<BoolLiteral>(stream)) {
1536 throw ParsingError("matches precondition failed in parse", stream.str(), BoolLiteral);
1537 }
1538
1539 return stream.consume_through(is_ident_like());
1540}
1541
1544template <>
1545constexpr std::string_view parse<Identifier>(Stream& stream) {
1546 if (!matches<Identifier>(stream)) {
1547 throw ParsingError("matches precondition failed in parse", stream.str(), Identifier);
1548 }
1549
1550 return stream.consume_through(is_ident_like());
1551}
1552
1553static_assert(test_parse<Identifier>("abc") == "abc");
1554static_assert(test_parse<Identifier>("_") == "_");
1555static_assert(test_parse<Identifier>("_abc") == "_abc");
1556static_assert(test_parse<Identifier>("_12abc") == "_12abc");
1557static_assert(test_parse<Identifier>("_abc+2") == "_abc");
1558static_assert(test_parse<Identifier>("_abc-2") == "_abc");
1559static_assert(test_parse<Identifier>("_abc(2)") == "_abc");
1560static_assert(test_parse<Identifier>("a.b") == "a");
1561
1564template <>
1565constexpr std::string_view parse<Grouping>(Stream& stream) {
1566 if (!matches<Grouping>(stream)) {
1567 throw ParsingError("matches precondition failed in parse", stream.str(), Grouping);
1568 }
1569
1570 // Raise a compilation error in case we ever change supported grouping tokens to multi-char
1571 static_assert(std::same_as<decltype(grouping_tokens)::value_type, char>);
1572
1573 return stream.consume(1);
1574}
1575
1578template <>
1579constexpr std::string_view parse<BinaryOperator>(Stream& stream) {
1580 if (!matches<BinaryOperator>(stream)) {
1581 throw ParsingError("matches precondition failed in parse", stream.str(), BinaryOperator);
1582 }
1583
1584 // Same strategy as in matches<BinaryOperator>
1585
1586 // To abide by the maximal munch rule, prioritizing bigger tokens over samller ones
1587 // e.g., "-=" is always picked over "-"
1588 auto max_munch_tokens = binary_operator_tokens;
1589 ranges::sort(max_munch_tokens, std::greater{}, &std::string_view::size);
1590
1591 auto check_stream_starts = std::bind_front(&Stream::starts_with<std::string_view>, stream);
1592 const auto* iter = ranges::find_if(max_munch_tokens, check_stream_starts);
1593
1594 if (iter == ranges::end(max_munch_tokens)) {
1595 throw ParsingError("no match found in binary operator token list", stream.str(), BinaryOperator);
1596 }
1597
1598 return stream.consume(iter->size());
1599}
1600
1603template <>
1604constexpr std::string_view parse<Operator>(Stream& stream) {
1605 if (!matches<Operator>(stream)) {
1606 throw ParsingError("matches precondition failed in parse", stream.str(), Operator);
1607 }
1608
1609 // Same logic as in parse<BinayOperator>
1610 // TODO: Make more DRY
1611
1612 // To abide by the maximal munch rule, prioritizing bigger tokens over samller ones
1613 // e.g., "-=" is always picked over "-"
1614 auto max_munch_tokens = operator_tokens;
1615 ranges::sort(max_munch_tokens, std::greater{}, &std::string_view::size);
1616
1617 auto check_stream_starts = std::bind_front(&Stream::starts_with<std::string_view>, stream);
1618 const auto* iter = ranges::find_if(max_munch_tokens, check_stream_starts);
1619
1620 if (iter == ranges::end(max_munch_tokens)) {
1621 throw ParsingError("no match found in operator token list", stream.str(), Operator);
1622 }
1623
1624 return stream.consume(iter->size());
1625}
1626
1628template <std::size_t MaxNumTokens, Token::Kind... ParsableTokenKinds>
1629constexpr auto parse_all(Stream input_stream) {
1630 std::array<Token, MaxNumTokens> tokens{};
1631
1632 for (std::size_t i = 0; auto& tok : tokens) {
1633 // strip any leading whitespace (only ' ' and '\t')
1634 input_stream.consume_through(isblank);
1635
1636 if (input_stream.empty()) {
1637 tok.kind = EndDelimiter;
1638 break;
1639 }
1640
1641 // An expression of, uhh, questionable coding standards and cleanliness
1642 // It is very concise though.
1643 ((tok.kind = ParsableTokenKinds,
1644 matches<ParsableTokenKinds>(input_stream) && !(tok.str = parse<ParsableTokenKinds>(input_stream)).empty()) ||
1645 ...);
1646
1647 input_stream.ctx.prevs = std::span(tokens.begin(), ++i);
1648 }
1649
1650 if (!input_stream.empty()) {
1651 throw ParsingError("input stream non-empty after full parse", input_stream.str());
1652 }
1653
1654 return tokens;
1655}
1656
1657} // namespace tokenize
1658
1659template <std::size_t MaxNumTokens>
1660constexpr auto parse_tokens(std::string_view str) {
1661 using enum Token::Kind;
1662
1663 return tokenize::parse_all<MaxNumTokens, BoolLiteral, StringLiteral, RawStringLiteral, CharLiteral, IntBinLiteral,
1664 IntOctLiteral, IntDecLiteral, IntHexLiteral, FloatLiteral, FloatHexLiteral, Identifier,
1665 Grouping, BinaryOperator, Operator>(str);
1666}
1667
1668template <std::size_t MaxNumTokens = 1'024>
1670{
1671public:
1672 constexpr Tokenizer() = default;
1673
1674 constexpr explicit(false) Tokenizer(std::string_view str)
1675 : original_{str}
1676 , tokens_{parse_tokens<MaxNumTokens>(str)}
1677 , num_tokens_{find_end_delim_idx()} {}
1678
1679 constexpr Tokenizer subseq(std::size_t start, std::size_t len) const {
1680 ASSERT(start < size());
1681
1682 len = std::min(size() - start, len);
1683
1684 Tokenizer result;
1685
1686 ranges::copy(tokens_.begin() + start, tokens_.begin() + start + len, result.tokens_.begin());
1687 result.num_tokens_ = len;
1688
1689 auto str_start = result.tokens_.front().str.data() - original_.data();
1690 auto str_len = (result[len - 1].str.data() + result[len - 1].str.size()) - result.tokens_.front().str.data();
1691 result.original_ = original_.substr(str_start, str_len);
1692
1693 return result;
1694 }
1695
1696 constexpr std::string_view get_original() const { return original_; }
1697
1698 constexpr auto size() const { return num_tokens_; }
1699
1700 constexpr auto empty() const { return num_tokens_ == 0; }
1701
1702 constexpr auto begin() const { return tokens_.begin(); }
1703
1704 constexpr auto end() const { return tokens_.begin() + num_tokens_; }
1705
1706 constexpr bool operator==(const ranges::forward_range auto& other) const { return ranges::equal(*this, other); }
1707
1708 constexpr const Token& operator[](std::size_t idx) const {
1709 ASSERT(idx < num_tokens_);
1710 return tokens_[idx];
1711 }
1712
1713private:
1714 constexpr std::size_t find_end_delim_idx() const {
1715 auto iter =
1716 std::ranges::find_if(tokens_, [](const Token& tok) { return tok.kind == Token::Kind::EndDelimiter; });
1717 if (iter == ranges::end(tokens_)) {
1718 throw ParsingError("EndDelimiter missing from token stream");
1719 }
1720
1721 return gsl::narrow_cast<std::size_t>(iter - ranges::begin(tokens_));
1722 }
1723
1724 std::string_view original_;
1725 std::array<Token, MaxNumTokens> tokens_;
1726 std::size_t num_tokens_{};
1727};
1728
1729} // namespace asmgrader::inspection
constexpr capable functions from c-style headers
Bad or invalid parse exception type. May indicate an implementation bug, or just an invalid expressio...
Definition expression_inspection.hpp:275
ParsingError(std::string msg, std::string_view stream_state, Token::Kind token_kind)
Definition expression_inspection.hpp:287
constexpr const std::optional< std::string > & stream_state() const noexcept
Definition expression_inspection.hpp:296
constexpr const std::string & msg() const noexcept
Definition expression_inspection.hpp:294
ParsingError(std::string msg, std::string_view stream_state)
Definition expression_inspection.hpp:283
std::string pretty() const
Return a human-readable "pretty" stringified version of the exception.
Definition expression_inspection.hpp:301
ParsingError(std::string msg)
Definition expression_inspection.hpp:280
constexpr const std::optional< Token::Kind > & token_kind() const noexcept
Definition expression_inspection.hpp:298
const char * what() const noexcept override
Definition expression_inspection.hpp:292
Definition expression_inspection.hpp:1670
constexpr Tokenizer subseq(std::size_t start, std::size_t len) const
Definition expression_inspection.hpp:1679
constexpr Tokenizer()=default
constexpr auto empty() const
Definition expression_inspection.hpp:1700
constexpr const Token & operator[](std::size_t idx) const
Definition expression_inspection.hpp:1708
constexpr auto begin() const
Definition expression_inspection.hpp:1702
constexpr std::string_view get_original() const
Definition expression_inspection.hpp:1696
constexpr auto size() const
Definition expression_inspection.hpp:1698
constexpr bool operator==(const ranges::forward_range auto &other) const
Definition expression_inspection.hpp:1706
constexpr auto end() const
Definition expression_inspection.hpp:1704
Definition expression_inspection.hpp:398
constexpr bool consume(StrLike str)
Attempt to consume str at the beginning of the stream, iff it actually exists at the beginning.
Definition expression_inspection.hpp:478
constexpr std::string_view consume(std::size_t n)
Same as peek, except it also mutates the stream.
Definition expression_inspection.hpp:466
constexpr std::string_view peek_through(auto what) const
Definition expression_inspection.hpp:457
constexpr std::size_t size() const
Definition expression_inspection.hpp:427
constexpr std::string_view consume_until(auto what)
Same as peek_until, except it also mutates the stream.
Definition expression_inspection.hpp:507
constexpr std::string_view peek_past(auto what) const
substr_past
Definition expression_inspection.hpp:463
constexpr bool starts_with(auto what) const
Whether the stream starts with what (accepts same as std::string_view::starts_with)
Definition expression_inspection.hpp:528
constexpr std::string_view consume_while(std::invocable< char > auto pred)
Same as peek_while, except it also mutates the stream.
Definition expression_inspection.hpp:521
constexpr Stream(std::string_view string, StreamContext context)
This overload is just used for writing tests.
Definition expression_inspection.hpp:423
constexpr std::string_view peek_while(std::invocable< char > auto pred) const
Definition expression_inspection.hpp:453
constexpr std::string_view str() const
Definition expression_inspection.hpp:539
StreamContext ctx
Definition expression_inspection.hpp:414
constexpr bool starts_with(char chr, CaseInsensitiveTag) const
Definition expression_inspection.hpp:530
constexpr bool consume(auto what, CaseInsensitiveTag)
Definition expression_inspection.hpp:492
constexpr std::string_view peek(std::size_t n) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:441
constexpr bool starts_with(std::string_view string, CaseInsensitiveTag) const
Definition expression_inspection.hpp:534
constexpr bool empty() const
Definition expression_inspection.hpp:429
constexpr char peek() const
Peek at the first character of the stream.
Definition expression_inspection.hpp:432
constexpr std::string_view consume_through(auto what)
Same as peek_through, except it also mutates the stream.
Definition expression_inspection.hpp:514
constexpr std::string_view peek_until(auto what) const
substr_to
Definition expression_inspection.hpp:451
#define LOG_WARN(...)
Definition logging.hpp:43
constexpr char tolower(char c)
Definition cconstexpr.hpp:54
constexpr struct asmgrader::inspection::tokenize::CaseInsensitiveTag case_insensitive
constexpr auto digit_or_sep(char c)
Exactly as named. Sep = '.
Definition expression_inspection.hpp:659
constexpr bool matches< BoolLiteral >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:744
constexpr auto xdigit_or_sep(char c)
Exactly as named. Sep = '.
Definition expression_inspection.hpp:664
constexpr bool matches< CharLiteral >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:725
constexpr bool matches< BinaryOperator >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:1054
constexpr bool matches< RawStringLiteral >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:705
constexpr std::string_view parse(Stream &stream)
Parse the token of Kind from the start of the stream. Assumes that the stream actually starts with a ...
Definition expression_inspection.hpp:1153
constexpr std::string_view test_parse(std::string_view str)
Definition expression_inspection.hpp:1139
constexpr bool is_strlike_prefix(std::string_view str)
Whether the entirety of str is a strlike-prefix See Token::Kind::StringLiteral for details.
Definition expression_inspection.hpp:591
constexpr auto make_rev_size_sorted(const std::string_view(&arr)[N])
Definition expression_inspection.hpp:547
constexpr bool matches< Operator >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:1097
constexpr bool matches< IntOctLiteral >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:793
constexpr bool matches< FloatLiteral >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:873
constexpr bool matches< FloatHexLiteral >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:860
constexpr bool is_int_suffix(std::string_view str)
Whether the entirety of str is an integer-suffix See Token::Kind::IntLiteral for details.
Definition expression_inspection.hpp:612
constexpr bool matches< Identifier >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:927
constexpr auto operator_tokens
Definition expression_inspection.hpp:566
constexpr auto is_ident_like()
Returns a functor to check for an identifier for a stream of characters Does not verify whether the i...
Definition expression_inspection.hpp:648
constexpr auto grouping_tokens
Definition expression_inspection.hpp:581
constexpr auto binary_operator_tokens
Definition expression_inspection.hpp:555
constexpr std::string_view parse< StringLiteral >(Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:1160
constexpr std::string_view substr_past(std::string_view str, auto what)
A substr of str past all characters satisfying pred In essence, performs 'drop while'.
Definition expression_inspection.hpp:373
constexpr bool matches< IntHexLiteral >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:771
constexpr bool matches< IntDecLiteral >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:812
constexpr std::string_view substr_to(std::string_view str, auto token)
A substr of str up to the first occurrence of token, or the entirety of str if token is not found.
Definition expression_inspection.hpp:346
constexpr bool matches< IntBinLiteral >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:759
constexpr bool matches< Grouping >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:952
constexpr bool matches< StringLiteral >(const Stream &stream)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition expression_inspection.hpp:685
constexpr bool matches(const Stream &stream)
Check whether the start of stream matches a token kind Assumes that there is no leading whitespace in...
Definition expression_inspection.hpp:677
Definition expression_inspection.hpp:46
constexpr auto parse_tokens(std::string_view str)
Definition expression_inspection.hpp:1660
constexpr std::string_view format_as(const Token::Kind token_kind)
Definition expression_inspection.hpp:232
Definition byte_array.hpp:94
Token of a very basic C++ expression. The primary use case is for rudemtary console syntax coloring.
Definition expression_inspection.hpp:57
Kind
The kind of token.
Definition expression_inspection.hpp:73
@ Grouping
Imperatively defined as: '{', '}' '(', ')' - when not as a function call '<', '>' - in template conte...
@ EndDelimiter
Deliminates the end of the token sequence. Also serves to obtain a count of the number of token types...
@ Identifier
https://en.cppreference.com/w/cpp/language/identifiers.html
@ BoolLiteral
'true' or 'false'. That's it.
@ BinaryOperator
https://en.cppreference.com/w/cpp/language/operator_precedence.html (Note that, contrary to the title...
@ CharLiteral
https://en.cppreference.com/w/cpp/language/character_literal.html
@ StringLiteral
https://en.cppreference.com/w/cpp/language/string_literal.html
@ IntHexLiteral
https://en.cppreference.com/w/cpp/language/integer_literal.html See IntDecLiteral
@ IntBinLiteral
https://en.cppreference.com/w/cpp/language/integer_literal.html See IntDecLiteral
@ IntDecLiteral
https://en.cppreference.com/w/cpp/language/integer_literal.html
@ Unknown
Under normal cases, this should be impossible. It's a saner option for a default, though,...
@ FloatHexLiteral
https://en.cppreference.com/w/cpp/language/floating_literal.html
@ RawStringLiteral
https://en.cppreference.com/w/cpp/language/string_literal.html
@ FloatLiteral
https://en.cppreference.com/w/cpp/language/floating_literal.html
@ IntOctLiteral
https://en.cppreference.com/w/cpp/language/integer_literal.html See IntDecLiteral This includes '0'
@ Operator
https://en.cppreference.com/w/cpp/language/operator_precedence.html (Note that, contrary to the title...
constexpr bool operator==(const Token &) const =default
std::string_view str
Definition expression_inspection.hpp:227
Kind kind
Definition expression_inspection.hpp:226
Definition expression_inspection.hpp:394
Primarily used to support Token::Kind::Grouping.
Definition expression_inspection.hpp:402
constexpr Token::Kind last_kind() const
Definition expression_inspection.hpp:405
std::span< Token > prevs
Definition expression_inspection.hpp:403
std::size_t start
Start of the block, inclusive i.e. position after the starting delimiter and style spec.
Definition syntax_highlighter.cpp:149
#define N
Definition test_macros.hpp:147