ClickHouse/rust/skim/src/lib.rs

112 lines
3.8 KiB
Rust
Raw Normal View History

use skim::prelude::*;
use term::terminfo::TermInfo;
use cxx::{CxxString, CxxVector};
use std::panic;
#[cxx::bridge]
mod ffi {
extern "Rust" {
fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String>;
}
}
struct Item {
Use "exact" matching for fuzzy search Right now fuzzy search is too smart for SQL, it even takes into account the case, which should not be accounted (you don't want to type "SELECT" instead of "select" to find the query). And to tell the truth, I think too smart fuzzy searching for SQL queries is not required, and is only harming. Exact matching seems better algorithm for SQL, it is not 100% exact, it splits by space, and apply separate matcher actually for each word. Note, that if you think that "space is not enough" as the delimiter, then you should first know that this is the delimiter only for the input query, so to match "system.query_log" you can use "sy qu log" (also you can disable exact mode by prepending "'" char). But it ignores the case by default, and the behaviour what is expected from the CaseMatching::Ignore. TL;DR; Just for the history I will describe what had been tried. At first I tried CaseMatching::Ignore - it does not helps for SkimV1/SkimV2/Clangd matches. So I converted lines from the history and input query, to the lower case. However this does not work for UPPER CASE, since only initial portion of the query had been converted to the lower. Then I've looked into skim/fuzzy-matcher crates code, and look for the reason why CaseMatching::Ignore does not work, and found that there is still a penalty for case mismatch, but there is no way to pass it from the user code, so I've tried guerrilla to monkey patch the library's code and it works: // Avoid penalty for case mismatch (even with CaseMatching::Ignore) let _guard = guerrilla::patch0(SkimScoreConfig::default, || { let score_match = 16; let gap_start = -3; let gap_extension = -1; let bonus_first_char_multiplier = 2; return SkimScoreConfig{ score_match, gap_start, gap_extension, bonus_first_char_multiplier, bonus_head: score_match / 2, bonus_break: score_match / 2 + gap_extension, bonus_camel: score_match / 2 + 2 * gap_extension, bonus_consecutive: -(gap_start + gap_extension), // penalty_case_mismatch: gap_extension * 2, penalty_case_mismatch: 0, }; }); But this does not sounds like a trivial code, so I decided, to look around, and realized that "exact" matching should do what is required for the completion of queries (at least from my point of view). Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-02-02 21:08:46 +00:00
text_no_newlines: String,
orig_text: String,
}
impl Item {
fn new(text: String) -> Self {
return Self{
Use "exact" matching for fuzzy search Right now fuzzy search is too smart for SQL, it even takes into account the case, which should not be accounted (you don't want to type "SELECT" instead of "select" to find the query). And to tell the truth, I think too smart fuzzy searching for SQL queries is not required, and is only harming. Exact matching seems better algorithm for SQL, it is not 100% exact, it splits by space, and apply separate matcher actually for each word. Note, that if you think that "space is not enough" as the delimiter, then you should first know that this is the delimiter only for the input query, so to match "system.query_log" you can use "sy qu log" (also you can disable exact mode by prepending "'" char). But it ignores the case by default, and the behaviour what is expected from the CaseMatching::Ignore. TL;DR; Just for the history I will describe what had been tried. At first I tried CaseMatching::Ignore - it does not helps for SkimV1/SkimV2/Clangd matches. So I converted lines from the history and input query, to the lower case. However this does not work for UPPER CASE, since only initial portion of the query had been converted to the lower. Then I've looked into skim/fuzzy-matcher crates code, and look for the reason why CaseMatching::Ignore does not work, and found that there is still a penalty for case mismatch, but there is no way to pass it from the user code, so I've tried guerrilla to monkey patch the library's code and it works: // Avoid penalty for case mismatch (even with CaseMatching::Ignore) let _guard = guerrilla::patch0(SkimScoreConfig::default, || { let score_match = 16; let gap_start = -3; let gap_extension = -1; let bonus_first_char_multiplier = 2; return SkimScoreConfig{ score_match, gap_start, gap_extension, bonus_first_char_multiplier, bonus_head: score_match / 2, bonus_break: score_match / 2 + gap_extension, bonus_camel: score_match / 2 + 2 * gap_extension, bonus_consecutive: -(gap_start + gap_extension), // penalty_case_mismatch: gap_extension * 2, penalty_case_mismatch: 0, }; }); But this does not sounds like a trivial code, so I decided, to look around, and realized that "exact" matching should do what is required for the completion of queries (at least from my point of view). Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-02-02 21:08:46 +00:00
// Text that will be printed by skim, and will be used for matching.
//
// Text that will be shown should not contains new lines since in this case skim may
// live some symbols on the screen, and this looks odd.
Use "exact" matching for fuzzy search Right now fuzzy search is too smart for SQL, it even takes into account the case, which should not be accounted (you don't want to type "SELECT" instead of "select" to find the query). And to tell the truth, I think too smart fuzzy searching for SQL queries is not required, and is only harming. Exact matching seems better algorithm for SQL, it is not 100% exact, it splits by space, and apply separate matcher actually for each word. Note, that if you think that "space is not enough" as the delimiter, then you should first know that this is the delimiter only for the input query, so to match "system.query_log" you can use "sy qu log" (also you can disable exact mode by prepending "'" char). But it ignores the case by default, and the behaviour what is expected from the CaseMatching::Ignore. TL;DR; Just for the history I will describe what had been tried. At first I tried CaseMatching::Ignore - it does not helps for SkimV1/SkimV2/Clangd matches. So I converted lines from the history and input query, to the lower case. However this does not work for UPPER CASE, since only initial portion of the query had been converted to the lower. Then I've looked into skim/fuzzy-matcher crates code, and look for the reason why CaseMatching::Ignore does not work, and found that there is still a penalty for case mismatch, but there is no way to pass it from the user code, so I've tried guerrilla to monkey patch the library's code and it works: // Avoid penalty for case mismatch (even with CaseMatching::Ignore) let _guard = guerrilla::patch0(SkimScoreConfig::default, || { let score_match = 16; let gap_start = -3; let gap_extension = -1; let bonus_first_char_multiplier = 2; return SkimScoreConfig{ score_match, gap_start, gap_extension, bonus_first_char_multiplier, bonus_head: score_match / 2, bonus_break: score_match / 2 + gap_extension, bonus_camel: score_match / 2 + 2 * gap_extension, bonus_consecutive: -(gap_start + gap_extension), // penalty_case_mismatch: gap_extension * 2, penalty_case_mismatch: 0, }; }); But this does not sounds like a trivial code, so I decided, to look around, and realized that "exact" matching should do what is required for the completion of queries (at least from my point of view). Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-02-02 21:08:46 +00:00
text_no_newlines: text.replace("\n", " "),
// This will be used when the match had been selected.
orig_text: text,
};
}
}
impl SkimItem for Item {
fn text(&self) -> Cow<str> {
Use "exact" matching for fuzzy search Right now fuzzy search is too smart for SQL, it even takes into account the case, which should not be accounted (you don't want to type "SELECT" instead of "select" to find the query). And to tell the truth, I think too smart fuzzy searching for SQL queries is not required, and is only harming. Exact matching seems better algorithm for SQL, it is not 100% exact, it splits by space, and apply separate matcher actually for each word. Note, that if you think that "space is not enough" as the delimiter, then you should first know that this is the delimiter only for the input query, so to match "system.query_log" you can use "sy qu log" (also you can disable exact mode by prepending "'" char). But it ignores the case by default, and the behaviour what is expected from the CaseMatching::Ignore. TL;DR; Just for the history I will describe what had been tried. At first I tried CaseMatching::Ignore - it does not helps for SkimV1/SkimV2/Clangd matches. So I converted lines from the history and input query, to the lower case. However this does not work for UPPER CASE, since only initial portion of the query had been converted to the lower. Then I've looked into skim/fuzzy-matcher crates code, and look for the reason why CaseMatching::Ignore does not work, and found that there is still a penalty for case mismatch, but there is no way to pass it from the user code, so I've tried guerrilla to monkey patch the library's code and it works: // Avoid penalty for case mismatch (even with CaseMatching::Ignore) let _guard = guerrilla::patch0(SkimScoreConfig::default, || { let score_match = 16; let gap_start = -3; let gap_extension = -1; let bonus_first_char_multiplier = 2; return SkimScoreConfig{ score_match, gap_start, gap_extension, bonus_first_char_multiplier, bonus_head: score_match / 2, bonus_break: score_match / 2 + gap_extension, bonus_camel: score_match / 2 + 2 * gap_extension, bonus_consecutive: -(gap_start + gap_extension), // penalty_case_mismatch: gap_extension * 2, penalty_case_mismatch: 0, }; }); But this does not sounds like a trivial code, so I decided, to look around, and realized that "exact" matching should do what is required for the completion of queries (at least from my point of view). Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-02-02 21:08:46 +00:00
return Cow::Borrowed(&self.text_no_newlines);
}
fn output(&self) -> Cow<str> {
return Cow::Borrowed(&self.orig_text);
}
}
fn skim_impl(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, String> {
// Let's check is terminal available. To avoid panic.
if let Err(err) = TermInfo::from_env() {
return Err(format!("{}", err));
}
let options = SkimOptionsBuilder::default()
.height(Some("30%"))
.query(Some(prefix.to_str().unwrap()))
.tac(true)
.tiebreak(Some("-score".to_string()))
Use "exact" matching for fuzzy search Right now fuzzy search is too smart for SQL, it even takes into account the case, which should not be accounted (you don't want to type "SELECT" instead of "select" to find the query). And to tell the truth, I think too smart fuzzy searching for SQL queries is not required, and is only harming. Exact matching seems better algorithm for SQL, it is not 100% exact, it splits by space, and apply separate matcher actually for each word. Note, that if you think that "space is not enough" as the delimiter, then you should first know that this is the delimiter only for the input query, so to match "system.query_log" you can use "sy qu log" (also you can disable exact mode by prepending "'" char). But it ignores the case by default, and the behaviour what is expected from the CaseMatching::Ignore. TL;DR; Just for the history I will describe what had been tried. At first I tried CaseMatching::Ignore - it does not helps for SkimV1/SkimV2/Clangd matches. So I converted lines from the history and input query, to the lower case. However this does not work for UPPER CASE, since only initial portion of the query had been converted to the lower. Then I've looked into skim/fuzzy-matcher crates code, and look for the reason why CaseMatching::Ignore does not work, and found that there is still a penalty for case mismatch, but there is no way to pass it from the user code, so I've tried guerrilla to monkey patch the library's code and it works: // Avoid penalty for case mismatch (even with CaseMatching::Ignore) let _guard = guerrilla::patch0(SkimScoreConfig::default, || { let score_match = 16; let gap_start = -3; let gap_extension = -1; let bonus_first_char_multiplier = 2; return SkimScoreConfig{ score_match, gap_start, gap_extension, bonus_first_char_multiplier, bonus_head: score_match / 2, bonus_break: score_match / 2 + gap_extension, bonus_camel: score_match / 2 + 2 * gap_extension, bonus_consecutive: -(gap_start + gap_extension), // penalty_case_mismatch: gap_extension * 2, penalty_case_mismatch: 0, }; }); But this does not sounds like a trivial code, so I decided, to look around, and realized that "exact" matching should do what is required for the completion of queries (at least from my point of view). Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-02-02 21:08:46 +00:00
// Exact mode performs better for SQL.
//
// Default fuzzy search is too smart for SQL, it even takes into account the case, which
// should not be accounted (you don't want to type "SELECT" instead of "select" to find the
// query).
//
// Exact matching seems better algorithm for SQL, it is not 100% exact, it splits by space,
// and apply separate matcher actually for each word.
// Note, that if you think that "space is not enough" as the delimiter, then you should
// first know that this is the delimiter only for the input query, so to match
// "system.query_log" you can use "sy qu log"
// Also it should be more common for users who did not know how to use fuzzy search.
// (also you can disable exact mode by prepending "'" char).
//
// Also it ignores the case correctly, i.e. it does not have penalty for case mismatch,
// like fuzzy algorithms (take a look at SkimScoreConfig::penalty_case_mismatch).
.exact(true)
.case(CaseMatching::Ignore)
.build()
.unwrap();
let (tx, rx): (SkimItemSender, SkimItemReceiver) = unbounded();
for word in words {
tx.send(Arc::new(Item::new(word.to_string()))).unwrap();
}
// so that skim could know when to stop waiting for more items.
drop(tx);
let output = Skim::run_with(&options, Some(rx));
if output.is_none() {
return Err("skim return nothing".to_string());
}
let output = output.unwrap();
if output.is_abort {
return Ok("".to_string());
}
if output.selected_items.is_empty() {
return Err("No items had been selected".to_string());
}
return Ok(output.selected_items[0].output().to_string());
}
fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, String> {
let ret = panic::catch_unwind(|| {
return skim_impl(prefix, words);
});
return match ret {
Err(err) => {
let e = if let Some(s) = err.downcast_ref::<String>() {
format!("{}", s)
} else if let Some(s) = err.downcast_ref::<&str>() {
format!("{}", s)
} else {
format!("Unknown panic type: {:?}", err.type_id())
};
Err(format!("Rust panic: {:?}", e))
},
Ok(res) => res,
}
}