GPU: Shader: Add string parsing and merging

This avoid issues when trying to use printf with multiline string literals.
2025-08-30 22:34:31 +02:00
parent 72e5d5066a
commit ebf095a4b9
2 changed files with 69 additions and 34 deletions
--- a/source/blender/gpu/glsl_preprocess/glsl_preprocess.hh
+++ b/source/blender/gpu/glsl_preprocess/glsl_preprocess.hh
@@ -230,8 +230,8 @@ class Preprocessor {
        str = resource_guard_mutation(str, report_error);
        str = loop_unroll(str, report_error);
        str = assert_processing(str, filename);
-        static_strings_parsing(str);
-        str = static_strings_mutation(str);
+        str = static_strings_merging(str, report_error);
+        str = static_strings_parsing_and_mutation(str, report_error);
        str = printf_processing(str, report_error);
        quote_linting(str, report_error);
      }
@@ -1334,29 +1334,43 @@ class Preprocessor {
    return hash_32;
  }

-  void static_strings_parsing(const std::string &str)
+  std::string static_strings_merging(const std::string &str, report_callback report_error)
  {
-    using namespace metadata;
-    /* Matches any character inside a pair of un-escaped quote. */
-    std::regex regex(R"("(?:[^"])*")");
-    regex_global_search(str, regex, [&](const std::smatch &match) {
-      std::string format = match[0].str();
-      metadata.printf_formats.emplace_back(metadata::PrintfFormat{hash_string(format), format});
-    });
+    using namespace std;
+    using namespace shader::parser;
+
+    Parser parser(str, report_error);
+    do {
+      parser.foreach_match("__", [&](const std::vector<Token> &tokens) {
+        string first = tokens[0].str();
+        string second = tokens[1].str();
+        string between = parser.substr_range_inclusive(
+            tokens[0].str_index_last_no_whitespace() + 1, tokens[1].str_index_start() - 1);
+        string trailing = parser.substr_range_inclusive(
+            tokens[1].str_index_last_no_whitespace() + 1, tokens[1].str_index_last());
+        string merged = first.substr(0, first.length() - 1) + second.substr(1) + between +
+                        trailing;
+        parser.replace_try(tokens[0], tokens[1], merged);
+      });
+    } while (parser.apply_mutations());
+
+    return parser.result_get();
  }

-  std::string static_strings_mutation(std::string str)
+  std::string static_strings_parsing_and_mutation(const std::string &str,
+                                                  report_callback report_error)
  {
-    /* Replaces all matches by the respective string hash. */
-    for (const metadata::PrintfFormat &format : metadata.printf_formats) {
-      const std::string &str_var = format.format;
-      std::regex escape_regex(R"([\\\.\^\$\+\(\)\[\]\{\}\|\?\*])");
-      std::string str_regex = std::regex_replace(str_var, escape_regex, "\\$&");
+    using namespace std;
+    using namespace shader::parser;

-      std::regex regex(str_regex);
-      str = std::regex_replace(str, regex, std::to_string(hash_string(str_var)) + 'u');
-    }
-    return str;
+    Parser parser(str, report_error);
+    parser.foreach_token(String, [&](const Token &token) {
+      uint hash = hash_string(token.str());
+      metadata::PrintfFormat format = {hash, token.str()};
+      metadata.printf_formats.emplace_back(format);
+      parser.replace(token, std::to_string(hash) + 'u', true);
+    });
+    return parser.result_get();
  }

  /* Move all method definition outside of struct definition blocks. */
--- a/source/blender/gpu/glsl_preprocess/shader_parser.hh
+++ b/source/blender/gpu/glsl_preprocess/shader_parser.hh
@@ -55,7 +55,8 @@ enum TokenType : char {
  Dot = '.',
  Hash = '#',
  Ampersand = '&',
-  Literal = '0',
+  Number = '0',
+  String = '_',
  ParOpen = '(',
  ParClose = ')',
  BracketOpen = '{',
@@ -193,6 +194,8 @@ struct ParserData {
       * This allows to still split words on spaces. */
      bool prev_was_whitespace = (token_types[0] == NewLine || token_types[0] == Space);
      bool inside_preprocessor_directive = false;
+      bool next_character_is_escape = false;
+      bool inside_string = false;

      int offset = 0;
      for (const char &c : str.substr(1)) {
@@ -200,6 +203,17 @@ struct ParserData {
        TokenType type = to_type(c);
        TokenType prev = TokenType(token_types.back());

+        /* Merge string literal. */
+        if (inside_string) {
+          if (!next_character_is_escape && c == '\"') {
+            inside_string = false;
+          }
+          next_character_is_escape = c == '\\';
+          continue;
+        }
+        if (c == '\"') {
+          inside_string = true;
+        }
        /* Detect preprocessor directive newlines `\\\n`. */
        if (prev == Backslash && type == NewLine) {
          token_types.back() = PreprocessorNewline;
@@ -246,39 +260,39 @@ struct ParserData {
          continue;
        }
        /* If digit is part of word. */
-        if (type == Literal && prev == Word) {
+        if (type == Number && prev == Word) {
          continue;
        }
        /* If 'x' is part of hex literal. */
-        if (c == 'x' && prev == Literal) {
+        if (c == 'x' && prev == Number) {
          continue;
        }
        /* If 'A-F' is part of hex literal. */
-        if (c >= 'A' && c <= 'F' && prev == Literal) {
+        if (c >= 'A' && c <= 'F' && prev == Number) {
          continue;
        }
        /* If 'a-f' is part of hex literal. */
-        if (c >= 'a' && c <= 'f' && prev == Literal) {
+        if (c >= 'a' && c <= 'f' && prev == Number) {
          continue;
        }
        /* If 'u' is part of unsigned int literal. */
-        if (c == 'u' && prev == Literal) {
+        if (c == 'u' && prev == Number) {
          continue;
        }
        /* If dot is part of float literal. */
-        if (type == Dot && prev == Literal) {
+        if (type == Dot && prev == Number) {
          continue;
        }
        /* If 'f' suffix is part of float literal. */
-        if (c == 'f' && prev == Literal) {
+        if (c == 'f' && prev == Number) {
          continue;
        }
        /* If 'e' is part of float literal. */
-        if (c == 'e' && prev == Literal) {
+        if (c == 'e' && prev == Number) {
          continue;
        }
        /* If sign is part of float literal after exponent. */
-        if ((c == '+' || c == '-') && prev == Literal) {
+        if ((c == '+' || c == '-') && prev == Number) {
          continue;
        }
        /* Detect increment. */
@@ -292,7 +306,7 @@ struct ParserData {
          continue;
        }
        /* Only merge these token. Otherwise, always emit a token. */
-        if (type != Word && type != NewLine && type != Space && type != Literal) {
+        if (type != Word && type != NewLine && type != Space && type != Number) {
          prev = Word;
        }
        /* Split words on whitespaces even when merging. */
@@ -436,6 +450,8 @@ struct ParserData {
        return TokenType::Tilde;
      case '\\':
        return TokenType::Backslash;
+      case '\"':
+        return TokenType::String;
      case '?':
        return TokenType::Question;
      case ':':
@@ -453,7 +469,7 @@ struct ParserData {
      case '6':
      case '7':
      case '9':
-        return TokenType::Literal;
+        return TokenType::Number;
      default:
        return TokenType::Word;
    }
@@ -1110,9 +1126,14 @@ struct Parser {
    replace(from.str_index_start(), to.str_index_last(), replacement);
  }
  /* Replace token by string. */
-  void replace(Token tok, const std::string &replacement)
+  void replace(Token tok, const std::string &replacement, bool keep_trailing_whitespaces = false)
  {
-    replace(tok.str_index_start(), tok.str_index_last(), replacement);
+    if (keep_trailing_whitespaces) {
+      replace(tok.str_index_start(), tok.str_index_last_no_whitespace(), replacement);
+    }
+    else {
+      replace(tok.str_index_start(), tok.str_index_last(), replacement);
+    }
  }
  /* Replace Scope by string. */
  void replace(Scope scope, const std::string &replacement, bool keep_trailing_whitespaces = false)