Re-rank file search (vibe-kanban) (#514)
* Cleanup script changes for task attempt f7987e81-2e4a-47e7-9d54-cc27930b694b * Commit changes from coding agent for task attempt f7987e81-2e4a-47e7-9d54-cc27930b694b * Cleanup script changes for task attempt f7987e81-2e4a-47e7-9d54-cc27930b694b * fmt * Perfect! **✅ Refactoring Complete** ## Summary Successfully refactored the FileRanker to remove all direct git2 dependencies and consolidated all git operations in GitService: ### **Key Changes Made:** 1. **Enhanced GitService** (`crates/services/src/services/git.rs`): - ✅ Added `HeadInfo` struct with `branch: String` and `oid: String` - ✅ Added `get_head_info()` method that returns both branch name and commit SHA - ✅ Updated `get_current_branch()` to be a thin wrapper maintaining backward compatibility 2. **Refactored FileRanker** (`crates/services/src/services/file_ranker.rs`): - �� **Removed all `git2` imports** - now purely focused on ranking logic - ✅ Changed cache to store `head_sha: String` instead of `head_oid: git2::Oid` - ✅ Updated cache validation to use `GitService.get_head_info()` - ✅ Updated cache storage to use `GitService.get_head_info()` ### **Benefits Achieved:** - **🎯 Clean Separation**: FileRanker = pure ranking logic, GitService = all git I/O - **🧪 Better Testability**: FileRanker no longer depends on git2, easier to mock/test - **🔄 Future-Proof**: All git backend changes centralized in GitService - **⚡ Zero Performance Impact**: Same caching behavior and efficiency - **🔄 Backward Compatibility**: Existing `get_current_branch()` callers unaffected ### **Architecture Now:** ``` FileRanker (ranking logic only) ↓ delegates to GitService (single git facade) ↓ encapsulates git2 (implementation detail) ``` The refactoring successfully achieved proper separation of concerns while maintaining all existing functionality and performance characteristics. * Cleanup script changes for task attempt f7987e81-2e4a-47e7-9d54-cc27930b694b
This commit is contained in:
committed by
GitHub
parent
88a67d8d9c
commit
e5aa72e907
@@ -13,7 +13,7 @@ use db::models::project::{
|
|||||||
};
|
};
|
||||||
use deployment::Deployment;
|
use deployment::Deployment;
|
||||||
use ignore::WalkBuilder;
|
use ignore::WalkBuilder;
|
||||||
use services::services::git::GitBranch;
|
use services::services::{file_ranker::FileRanker, git::GitBranch};
|
||||||
use utils::response::ApiResponse;
|
use utils::response::ApiResponse;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
@@ -355,7 +355,19 @@ async fn search_files_in_repo(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort results by priority: FileName > DirectoryName > FullPath
|
// Apply git history-based ranking
|
||||||
|
let file_ranker = FileRanker::new();
|
||||||
|
match file_ranker.get_stats(repo_path).await {
|
||||||
|
Ok(stats) => {
|
||||||
|
// Re-rank results using git history
|
||||||
|
file_ranker.rerank(&mut results, &stats);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(
|
||||||
|
"Failed to get git stats for ranking, using basic sort: {}",
|
||||||
|
e
|
||||||
|
);
|
||||||
|
// Fallback to basic priority sorting
|
||||||
results.sort_by(|a, b| {
|
results.sort_by(|a, b| {
|
||||||
let priority = |match_type: &SearchMatchType| match match_type {
|
let priority = |match_type: &SearchMatchType| match match_type {
|
||||||
SearchMatchType::FileName => 0,
|
SearchMatchType::FileName => 0,
|
||||||
@@ -367,6 +379,8 @@ async fn search_files_in_repo(
|
|||||||
.cmp(&priority(&b.match_type))
|
.cmp(&priority(&b.match_type))
|
||||||
.then_with(|| a.path.cmp(&b.path))
|
.then_with(|| a.path.cmp(&b.path))
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Limit to top 10 results
|
// Limit to top 10 results
|
||||||
results.truncate(10);
|
results.truncate(10);
|
||||||
|
|||||||
@@ -52,3 +52,5 @@ strum = "0.27.2"
|
|||||||
notify = "8.2.0"
|
notify = "8.2.0"
|
||||||
notify-debouncer-full = "0.5.0"
|
notify-debouncer-full = "0.5.0"
|
||||||
dunce = "1.0"
|
dunce = "1.0"
|
||||||
|
dashmap = "6.1"
|
||||||
|
once_cell = "1.20"
|
||||||
|
|||||||
157
crates/services/src/services/file_ranker.rs
Normal file
157
crates/services/src/services/file_ranker.rs
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
sync::Arc,
|
||||||
|
time::Instant,
|
||||||
|
};
|
||||||
|
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use dashmap::DashMap;
|
||||||
|
use db::models::project::{SearchMatchType, SearchResult};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use tokio::task;
|
||||||
|
|
||||||
|
use super::git::{GitService, GitServiceError};
|
||||||
|
|
||||||
|
/// Statistics for a single file based on git history
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct FileStat {
|
||||||
|
/// Index in the commit history (0 = HEAD, 1 = parent of HEAD, ...)
|
||||||
|
pub last_index: usize,
|
||||||
|
/// Number of times this file was changed in recent commits
|
||||||
|
pub commit_count: u32,
|
||||||
|
/// Timestamp of the most recent change
|
||||||
|
pub last_time: DateTime<Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// File statistics for a repository
|
||||||
|
pub type FileStats = HashMap<String, FileStat>;
|
||||||
|
|
||||||
|
/// Cache entry for repository history
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct RepoHistoryCache {
|
||||||
|
head_sha: String,
|
||||||
|
stats: Arc<FileStats>,
|
||||||
|
generated_at: Instant,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Global cache for file ranking statistics
|
||||||
|
static FILE_STATS_CACHE: Lazy<DashMap<PathBuf, RepoHistoryCache>> = Lazy::new(DashMap::new);
|
||||||
|
|
||||||
|
/// Configuration constants for ranking algorithm
|
||||||
|
const DEFAULT_COMMIT_LIMIT: usize = 100;
|
||||||
|
const BASE_MATCH_SCORE_FILENAME: i64 = 100;
|
||||||
|
const BASE_MATCH_SCORE_DIRNAME: i64 = 10;
|
||||||
|
const BASE_MATCH_SCORE_FULLPATH: i64 = 1;
|
||||||
|
const RECENCY_WEIGHT: i64 = 2;
|
||||||
|
const FREQUENCY_WEIGHT: i64 = 1;
|
||||||
|
|
||||||
|
/// Service for ranking files based on git history
|
||||||
|
pub struct FileRanker {
|
||||||
|
git_service: GitService,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for FileRanker {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileRanker {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
git_service: GitService::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get file statistics for a repository, using cache when possible
|
||||||
|
pub async fn get_stats(&self, repo_path: &Path) -> Result<Arc<FileStats>, GitServiceError> {
|
||||||
|
let repo_path = repo_path.to_path_buf();
|
||||||
|
|
||||||
|
// Check if we have a valid cache entry
|
||||||
|
if let Some(cache_entry) = FILE_STATS_CACHE.get(&repo_path) {
|
||||||
|
// Verify cache is still valid by checking HEAD
|
||||||
|
if let Ok(head_info) = self.git_service.get_head_info(&repo_path)
|
||||||
|
&& head_info.oid == cache_entry.head_sha
|
||||||
|
{
|
||||||
|
return Ok(Arc::clone(&cache_entry.stats));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache miss or invalid - compute new stats
|
||||||
|
let stats = self.compute_stats(&repo_path).await?;
|
||||||
|
Ok(stats)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Re-rank search results based on git history statistics
|
||||||
|
pub fn rerank(&self, results: &mut [SearchResult], stats: &FileStats) {
|
||||||
|
results.sort_by(|a, b| {
|
||||||
|
let score_a = self.calculate_score(a, stats);
|
||||||
|
let score_b = self.calculate_score(b, stats);
|
||||||
|
score_b.cmp(&score_a) // Higher scores first
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate relevance score for a search result
|
||||||
|
fn calculate_score(&self, result: &SearchResult, stats: &FileStats) -> i64 {
|
||||||
|
let base_score = match result.match_type {
|
||||||
|
SearchMatchType::FileName => BASE_MATCH_SCORE_FILENAME,
|
||||||
|
SearchMatchType::DirectoryName => BASE_MATCH_SCORE_DIRNAME,
|
||||||
|
SearchMatchType::FullPath => BASE_MATCH_SCORE_FULLPATH,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(stat) = stats.get(&result.path) {
|
||||||
|
let recency_bonus = (100 - stat.last_index.min(99) as i64) * RECENCY_WEIGHT;
|
||||||
|
let frequency_bonus = stat.commit_count as i64 * FREQUENCY_WEIGHT;
|
||||||
|
|
||||||
|
// Multiply base score to maintain hierarchy, add git-based bonuses
|
||||||
|
base_score * 1000 + recency_bonus * 10 + frequency_bonus
|
||||||
|
} else {
|
||||||
|
// Files not in git history get base score only
|
||||||
|
base_score * 1000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute file statistics from git history
|
||||||
|
async fn compute_stats(&self, repo_path: &Path) -> Result<Arc<FileStats>, GitServiceError> {
|
||||||
|
let repo_path = repo_path.to_path_buf();
|
||||||
|
let repo_path_for_error = repo_path.clone();
|
||||||
|
let git_service = self.git_service.clone();
|
||||||
|
|
||||||
|
// Run git analysis in blocking task to avoid blocking async runtime
|
||||||
|
let stats = task::spawn_blocking(move || {
|
||||||
|
git_service.collect_recent_file_stats(&repo_path, DEFAULT_COMMIT_LIMIT)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| GitServiceError::InvalidRepository(format!("Task join error: {e}")))?;
|
||||||
|
|
||||||
|
let stats = match stats {
|
||||||
|
Ok(s) => s,
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(
|
||||||
|
"Failed to collect file stats for {:?}: {}",
|
||||||
|
repo_path_for_error,
|
||||||
|
e
|
||||||
|
);
|
||||||
|
// Return empty stats on error - search will still work without ranking
|
||||||
|
HashMap::new()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let stats_arc = Arc::new(stats);
|
||||||
|
|
||||||
|
// Update cache
|
||||||
|
if let Ok(head_info) = self.git_service.get_head_info(&repo_path_for_error) {
|
||||||
|
FILE_STATS_CACHE.insert(
|
||||||
|
repo_path_for_error,
|
||||||
|
RepoHistoryCache {
|
||||||
|
head_sha: head_info.oid,
|
||||||
|
stats: Arc::clone(&stats_arc),
|
||||||
|
generated_at: Instant::now(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(stats_arc)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
use std::path::Path;
|
use std::{collections::HashMap, path::Path};
|
||||||
|
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use git2::{
|
use git2::{
|
||||||
BranchType, CherrypickOptions, Delta, DiffFindOptions, DiffOptions, Error as GitError,
|
BranchType, CherrypickOptions, Delta, DiffFindOptions, DiffOptions, Error as GitError,
|
||||||
FetchOptions, Repository, Status, StatusOptions, build::CheckoutBuilder,
|
FetchOptions, Repository, Sort, Status, StatusOptions, build::CheckoutBuilder,
|
||||||
};
|
};
|
||||||
use regex;
|
use regex;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@@ -11,6 +11,9 @@ use thiserror::Error;
|
|||||||
use ts_rs::TS;
|
use ts_rs::TS;
|
||||||
use utils::diff::{Diff, FileDiffDetails};
|
use utils::diff::{Diff, FileDiffDetails};
|
||||||
|
|
||||||
|
// Import for file ranking functionality
|
||||||
|
use super::file_ranker::FileStat;
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum GitServiceError {
|
pub enum GitServiceError {
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
@@ -46,6 +49,12 @@ pub struct GitBranch {
|
|||||||
pub last_commit_date: DateTime<Utc>,
|
pub last_commit_date: DateTime<Utc>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct HeadInfo {
|
||||||
|
pub branch: String,
|
||||||
|
pub oid: String,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, TS)]
|
#[derive(Debug, Clone, Serialize, Deserialize, TS)]
|
||||||
pub struct BranchStatus {
|
pub struct BranchStatus {
|
||||||
pub commits_behind: Option<usize>,
|
pub commits_behind: Option<usize>,
|
||||||
@@ -646,13 +655,35 @@ impl GitService {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_current_branch(&self, repo_path: &Path) -> Result<String, git2::Error> {
|
/// Get current HEAD information including branch name and commit OID
|
||||||
let repo = Repository::open(repo_path)?;
|
pub fn get_head_info(&self, repo_path: &Path) -> Result<HeadInfo, GitServiceError> {
|
||||||
|
let repo = self.open_repo(repo_path)?;
|
||||||
let head = repo.head()?;
|
let head = repo.head()?;
|
||||||
if let Some(branch_name) = head.shorthand() {
|
|
||||||
Ok(branch_name.to_string())
|
let branch = if let Some(branch_name) = head.shorthand() {
|
||||||
|
branch_name.to_string()
|
||||||
} else {
|
} else {
|
||||||
Ok("HEAD".to_string())
|
"HEAD".to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
let oid = if let Some(target_oid) = head.target() {
|
||||||
|
target_oid.to_string()
|
||||||
|
} else {
|
||||||
|
// Handle case where HEAD exists but has no target (empty repo)
|
||||||
|
return Err(GitServiceError::InvalidRepository(
|
||||||
|
"Repository HEAD has no target commit".to_string(),
|
||||||
|
));
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(HeadInfo { branch, oid })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_current_branch(&self, repo_path: &Path) -> Result<String, git2::Error> {
|
||||||
|
// Thin wrapper for backward compatibility
|
||||||
|
match self.get_head_info(repo_path) {
|
||||||
|
Ok(head_info) => Ok(head_info.branch),
|
||||||
|
Err(GitServiceError::Git(git_err)) => Err(git_err),
|
||||||
|
Err(_) => Err(git2::Error::from_str("Failed to get head info")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1256,6 +1287,80 @@ impl GitService {
|
|||||||
|
|
||||||
Ok(repo)
|
Ok(repo)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Collect file statistics from recent commits for ranking purposes
|
||||||
|
pub fn collect_recent_file_stats(
|
||||||
|
&self,
|
||||||
|
repo_path: &Path,
|
||||||
|
commit_limit: usize,
|
||||||
|
) -> Result<HashMap<String, FileStat>, GitServiceError> {
|
||||||
|
let repo = self.open_repo(repo_path)?;
|
||||||
|
let mut stats: HashMap<String, FileStat> = HashMap::new();
|
||||||
|
|
||||||
|
// Set up revision walk from HEAD
|
||||||
|
let mut revwalk = repo.revwalk()?;
|
||||||
|
revwalk.push_head()?;
|
||||||
|
revwalk.set_sorting(Sort::TIME)?;
|
||||||
|
|
||||||
|
// Iterate through recent commits
|
||||||
|
for (commit_index, oid_result) in revwalk.take(commit_limit).enumerate() {
|
||||||
|
let oid = oid_result?;
|
||||||
|
let commit = repo.find_commit(oid)?;
|
||||||
|
|
||||||
|
// Get commit timestamp
|
||||||
|
let commit_time = {
|
||||||
|
let time = commit.time();
|
||||||
|
DateTime::from_timestamp(time.seconds(), 0).unwrap_or_else(Utc::now)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Get the commit tree
|
||||||
|
let commit_tree = commit.tree()?;
|
||||||
|
|
||||||
|
// For the first commit (no parent), diff against empty tree
|
||||||
|
let parent_tree = if commit.parent_count() == 0 {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(commit.parent(0)?.tree()?)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create diff between parent and current commit
|
||||||
|
let diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&commit_tree), None)?;
|
||||||
|
|
||||||
|
// Process each changed file in this commit
|
||||||
|
diff.foreach(
|
||||||
|
&mut |delta, _progress| {
|
||||||
|
// Get the file path - prefer new file path, fall back to old
|
||||||
|
if let Some(path) = delta.new_file().path().or_else(|| delta.old_file().path())
|
||||||
|
{
|
||||||
|
let path_str = path.to_string_lossy().to_string();
|
||||||
|
|
||||||
|
// Update or insert file stats
|
||||||
|
let stat = stats.entry(path_str).or_insert(FileStat {
|
||||||
|
last_index: commit_index,
|
||||||
|
commit_count: 0,
|
||||||
|
last_time: commit_time,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Increment commit count
|
||||||
|
stat.commit_count += 1;
|
||||||
|
|
||||||
|
// Keep the most recent change (smallest index)
|
||||||
|
if commit_index < stat.last_index {
|
||||||
|
stat.last_index = commit_index;
|
||||||
|
stat.last_time = commit_time;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
true // Continue iteration
|
||||||
|
},
|
||||||
|
None, // No binary callback
|
||||||
|
None, // No hunk callback
|
||||||
|
None, // No line callback
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(stats)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// #[cfg(test)]
|
// #[cfg(test)]
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ pub mod auth;
|
|||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod container;
|
pub mod container;
|
||||||
pub mod events;
|
pub mod events;
|
||||||
|
pub mod file_ranker;
|
||||||
pub mod filesystem;
|
pub mod filesystem;
|
||||||
pub mod filesystem_watcher;
|
pub mod filesystem_watcher;
|
||||||
pub mod git;
|
pub mod git;
|
||||||
|
|||||||
Reference in New Issue
Block a user