diff --git a/source/blender/blenlib/BLI_string_search.hh b/source/blender/blenlib/BLI_string_search.hh index aee4a2d2a72..219e3aa4ec7 100644 --- a/source/blender/blenlib/BLI_string_search.hh +++ b/source/blender/blenlib/BLI_string_search.hh @@ -14,7 +14,8 @@ namespace blender::string_search { struct SearchItem { void *user_data; - Span normalized_words; + Span normalized_words; + Span word_weight_factors; int length; int weight; /** @@ -106,6 +107,7 @@ int get_fuzzy_match_errors(StringRef query, StringRef full); */ void extract_normalized_words(StringRef str, LinearAllocator<> &allocator, - Vector &r_words); + Vector &r_words, + Vector &r_word_weights); } // namespace blender::string_search diff --git a/source/blender/blenlib/intern/string_search.cc b/source/blender/blenlib/intern/string_search.cc index d2338d8dcfa..7616bce44d1 100644 --- a/source/blender/blenlib/intern/string_search.cc +++ b/source/blender/blenlib/intern/string_search.cc @@ -228,21 +228,30 @@ static bool match_word_initials(StringRef query, return true; } -static int get_shortest_word_index_that_startswith(StringRef query, - Span words, - Span word_match_map) +/** + * The "best" is chosen with combination of word weights and word length. + */ +static int get_best_word_index_that_startswith(StringRef query, + Span words, + Span word_weights, + Span word_match_map) { int best_word_size = INT32_MAX; int best_word_index = -1; + int best_word_weight = 0.0f; for (const int i : words.index_range()) { if (word_match_map[i] != unused_word) { continue; } StringRef word = words[i]; + const float word_weight = word_weights[i]; if (word.startswith(query)) { - if (word.size() < best_word_size) { + if (word.size() < best_word_size || + (word.size() == best_word_size && word_weight > best_word_weight)) + { best_word_index = i; best_word_size = word.size(); + best_word_weight = word_weight; } } } @@ -272,23 +281,25 @@ static int get_word_index_that_fuzzy_matches(StringRef query, * Checks how well the query matches a result. If it does not match, -1 is returned. A positive * return value indicates how good the match is. The higher the value, the better the match. */ -static int score_query_against_words(Span query_words, Span result_words) +static std::optional score_query_against_words(Span query_words, + Span result_words, + Span result_word_weights) { /* A mapping from #result_words to #query_words. It's mainly used to determine if a word has been * matched already to avoid matching it again. */ Array word_match_map(result_words.size(), unused_word); /* Start with some high score, because otherwise the final score might become negative. */ - int total_match_score = 1000; + float total_match_score = 1000; for (const int query_word_index : query_words.index_range()) { const StringRef query_word = query_words[query_word_index]; { /* Check if any result word begins with the query word. */ - const int word_index = get_shortest_word_index_that_startswith( - query_word, result_words, word_match_map); + const int word_index = get_best_word_index_that_startswith( + query_word, result_words, result_word_weights, word_match_map); if (word_index >= 0) { - total_match_score += 10; + total_match_score += 10 * result_word_weights[word_index]; word_match_map[word_index] = query_word_index; continue; } @@ -321,7 +332,7 @@ static int score_query_against_words(Span query_words, Span query_words, Span &allocator, - Vector &r_words) + Vector &r_words, + Vector &r_word_weights) { const uint32_t unicode_space = uint32_t(' '); const uint32_t unicode_slash = uint32_t('/'); @@ -360,6 +372,8 @@ void extract_normalized_words(StringRef str, return ELEM(unicode, unicode_space, unicode_slash, unicode_right_triangle); }; + Vector section_indices; + /* Make a copy of the string so that we can edit it. */ StringRef str_copy = allocator.copy_string(str); char *mutable_copy = const_cast(str_copy.data()); @@ -367,6 +381,7 @@ void extract_normalized_words(StringRef str, BLI_str_tolower_ascii(mutable_copy, str_size_in_bytes); /* Iterate over all unicode code points to split individual words. */ + int current_section = 0; bool is_in_word = false; size_t word_start = 0; size_t offset = 0; @@ -374,9 +389,14 @@ void extract_normalized_words(StringRef str, size_t size = offset; uint32_t unicode = BLI_str_utf8_as_unicode_step_safe(str.data(), str.size(), &size); size -= offset; + if (unicode == unicode_right_triangle) { + current_section++; + } if (is_separator(unicode)) { if (is_in_word) { - r_words.append(str_copy.substr(int(word_start), int(offset - word_start))); + const StringRef word = str_copy.substr(int(word_start), int(offset - word_start)); + r_words.append(word); + section_indices.append(current_section); is_in_word = false; } } @@ -390,19 +410,30 @@ void extract_normalized_words(StringRef str, } /* If the last word is not followed by a separator, it has to be handled separately. */ if (is_in_word) { - r_words.append(str_copy.drop_prefix(int(word_start))); + const StringRef word = str_copy.drop_prefix(int(word_start)); + r_words.append(word); + section_indices.append(current_section); + } + + for (const int i : section_indices.index_range()) { + const int section = section_indices[i]; + /* Give the last section a higher weight, because that's what is highlighted in the UI. */ + const float word_weight = section == current_section ? 1.0f : 0.9f; + r_word_weights.append(word_weight); } } void StringSearchBase::add_impl(const StringRef str, void *user_data, const int weight) { Vector words; - string_search::extract_normalized_words(str, allocator_, words); + Vector word_weights; + string_search::extract_normalized_words(str, allocator_, words, word_weights); const int recent_time = recent_cache_ ? recent_cache_->logical_time_by_str.lookup_default(str, -1) : -1; items_.append({user_data, allocator_.construct_array_copy(words.as_span()), + allocator_.construct_array_copy(word_weights.as_span()), int(str.size()), weight, recent_time}); @@ -412,15 +443,18 @@ Vector StringSearchBase::query_impl(const StringRef query) const { LinearAllocator<> allocator; Vector query_words; - string_search::extract_normalized_words(query, allocator, query_words); + /* The word weights are not actually used for the query. */ + Vector word_weights; + string_search::extract_normalized_words(query, allocator, query_words, word_weights); /* Compute score of every result. */ - MultiValueMap result_indices_by_score; + MultiValueMap result_indices_by_score; for (const int result_index : items_.index_range()) { - const int score = string_search::score_query_against_words( - query_words, items_[result_index].normalized_words); - if (score >= 0) { - result_indices_by_score.add(score, result_index); + const SearchItem &item = items_[result_index]; + const std::optional score = string_search::score_query_against_words( + query_words, item.normalized_words, item.word_weight_factors); + if (score.has_value()) { + result_indices_by_score.add(*score, result_index); } } diff --git a/source/blender/blenlib/tests/BLI_string_search_test.cc b/source/blender/blenlib/tests/BLI_string_search_test.cc index c3358235ec2..bdab5293f66 100644 --- a/source/blender/blenlib/tests/BLI_string_search_test.cc +++ b/source/blender/blenlib/tests/BLI_string_search_test.cc @@ -42,10 +42,12 @@ TEST(string_search, extract_normalized_words) { LinearAllocator<> allocator; Vector words; + Vector word_weights; extract_normalized_words("hello world" UI_MENU_ARROW_SEP "test another test" UI_MENU_ARROW_SEP " 3", allocator, - words); + words, + word_weights); EXPECT_EQ(words.size(), 6); EXPECT_EQ(words[0], "hello"); EXPECT_EQ(words[1], "world");