Re-rank file search (vibe-kanban) (#514)
* Cleanup script changes for task attempt f7987e81-2e4a-47e7-9d54-cc27930b694b * Commit changes from coding agent for task attempt f7987e81-2e4a-47e7-9d54-cc27930b694b * Cleanup script changes for task attempt f7987e81-2e4a-47e7-9d54-cc27930b694b * fmt * Perfect! **✅ Refactoring Complete** ## Summary Successfully refactored the FileRanker to remove all direct git2 dependencies and consolidated all git operations in GitService: ### **Key Changes Made:** 1. **Enhanced GitService** (`crates/services/src/services/git.rs`): - ✅ Added `HeadInfo` struct with `branch: String` and `oid: String` - ✅ Added `get_head_info()` method that returns both branch name and commit SHA - ✅ Updated `get_current_branch()` to be a thin wrapper maintaining backward compatibility 2. **Refactored FileRanker** (`crates/services/src/services/file_ranker.rs`): - �� **Removed all `git2` imports** - now purely focused on ranking logic - ✅ Changed cache to store `head_sha: String` instead of `head_oid: git2::Oid` - ✅ Updated cache validation to use `GitService.get_head_info()` - ✅ Updated cache storage to use `GitService.get_head_info()` ### **Benefits Achieved:** - **🎯 Clean Separation**: FileRanker = pure ranking logic, GitService = all git I/O - **🧪 Better Testability**: FileRanker no longer depends on git2, easier to mock/test - **🔄 Future-Proof**: All git backend changes centralized in GitService - **⚡ Zero Performance Impact**: Same caching behavior and efficiency - **🔄 Backward Compatibility**: Existing `get_current_branch()` callers unaffected ### **Architecture Now:** ``` FileRanker (ranking logic only) ↓ delegates to GitService (single git facade) ↓ encapsulates git2 (implementation detail) ``` The refactoring successfully achieved proper separation of concerns while maintaining all existing functionality and performance characteristics. * Cleanup script changes for task attempt f7987e81-2e4a-47e7-9d54-cc27930b694b
This commit is contained in:
committed by
GitHub
parent
88a67d8d9c
commit
e5aa72e907
@@ -13,7 +13,7 @@ use db::models::project::{
|
||||
};
|
||||
use deployment::Deployment;
|
||||
use ignore::WalkBuilder;
|
||||
use services::services::git::GitBranch;
|
||||
use services::services::{file_ranker::FileRanker, git::GitBranch};
|
||||
use utils::response::ApiResponse;
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -355,18 +355,32 @@ async fn search_files_in_repo(
|
||||
}
|
||||
}
|
||||
|
||||
// Sort results by priority: FileName > DirectoryName > FullPath
|
||||
results.sort_by(|a, b| {
|
||||
let priority = |match_type: &SearchMatchType| match match_type {
|
||||
SearchMatchType::FileName => 0,
|
||||
SearchMatchType::DirectoryName => 1,
|
||||
SearchMatchType::FullPath => 2,
|
||||
};
|
||||
// Apply git history-based ranking
|
||||
let file_ranker = FileRanker::new();
|
||||
match file_ranker.get_stats(repo_path).await {
|
||||
Ok(stats) => {
|
||||
// Re-rank results using git history
|
||||
file_ranker.rerank(&mut results, &stats);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"Failed to get git stats for ranking, using basic sort: {}",
|
||||
e
|
||||
);
|
||||
// Fallback to basic priority sorting
|
||||
results.sort_by(|a, b| {
|
||||
let priority = |match_type: &SearchMatchType| match match_type {
|
||||
SearchMatchType::FileName => 0,
|
||||
SearchMatchType::DirectoryName => 1,
|
||||
SearchMatchType::FullPath => 2,
|
||||
};
|
||||
|
||||
priority(&a.match_type)
|
||||
.cmp(&priority(&b.match_type))
|
||||
.then_with(|| a.path.cmp(&b.path))
|
||||
});
|
||||
priority(&a.match_type)
|
||||
.cmp(&priority(&b.match_type))
|
||||
.then_with(|| a.path.cmp(&b.path))
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Limit to top 10 results
|
||||
results.truncate(10);
|
||||
|
||||
@@ -52,3 +52,5 @@ strum = "0.27.2"
|
||||
notify = "8.2.0"
|
||||
notify-debouncer-full = "0.5.0"
|
||||
dunce = "1.0"
|
||||
dashmap = "6.1"
|
||||
once_cell = "1.20"
|
||||
|
||||
157
crates/services/src/services/file_ranker.rs
Normal file
157
crates/services/src/services/file_ranker.rs
Normal file
@@ -0,0 +1,157 @@
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
time::Instant,
|
||||
};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use dashmap::DashMap;
|
||||
use db::models::project::{SearchMatchType, SearchResult};
|
||||
use once_cell::sync::Lazy;
|
||||
use tokio::task;
|
||||
|
||||
use super::git::{GitService, GitServiceError};
|
||||
|
||||
/// Statistics for a single file based on git history
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FileStat {
|
||||
/// Index in the commit history (0 = HEAD, 1 = parent of HEAD, ...)
|
||||
pub last_index: usize,
|
||||
/// Number of times this file was changed in recent commits
|
||||
pub commit_count: u32,
|
||||
/// Timestamp of the most recent change
|
||||
pub last_time: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// File statistics for a repository
|
||||
pub type FileStats = HashMap<String, FileStat>;
|
||||
|
||||
/// Cache entry for repository history
|
||||
#[derive(Clone)]
|
||||
struct RepoHistoryCache {
|
||||
head_sha: String,
|
||||
stats: Arc<FileStats>,
|
||||
generated_at: Instant,
|
||||
}
|
||||
|
||||
/// Global cache for file ranking statistics
|
||||
static FILE_STATS_CACHE: Lazy<DashMap<PathBuf, RepoHistoryCache>> = Lazy::new(DashMap::new);
|
||||
|
||||
/// Configuration constants for ranking algorithm
|
||||
const DEFAULT_COMMIT_LIMIT: usize = 100;
|
||||
const BASE_MATCH_SCORE_FILENAME: i64 = 100;
|
||||
const BASE_MATCH_SCORE_DIRNAME: i64 = 10;
|
||||
const BASE_MATCH_SCORE_FULLPATH: i64 = 1;
|
||||
const RECENCY_WEIGHT: i64 = 2;
|
||||
const FREQUENCY_WEIGHT: i64 = 1;
|
||||
|
||||
/// Service for ranking files based on git history
|
||||
pub struct FileRanker {
|
||||
git_service: GitService,
|
||||
}
|
||||
|
||||
impl Default for FileRanker {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl FileRanker {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
git_service: GitService::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get file statistics for a repository, using cache when possible
|
||||
pub async fn get_stats(&self, repo_path: &Path) -> Result<Arc<FileStats>, GitServiceError> {
|
||||
let repo_path = repo_path.to_path_buf();
|
||||
|
||||
// Check if we have a valid cache entry
|
||||
if let Some(cache_entry) = FILE_STATS_CACHE.get(&repo_path) {
|
||||
// Verify cache is still valid by checking HEAD
|
||||
if let Ok(head_info) = self.git_service.get_head_info(&repo_path)
|
||||
&& head_info.oid == cache_entry.head_sha
|
||||
{
|
||||
return Ok(Arc::clone(&cache_entry.stats));
|
||||
}
|
||||
}
|
||||
|
||||
// Cache miss or invalid - compute new stats
|
||||
let stats = self.compute_stats(&repo_path).await?;
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
/// Re-rank search results based on git history statistics
|
||||
pub fn rerank(&self, results: &mut [SearchResult], stats: &FileStats) {
|
||||
results.sort_by(|a, b| {
|
||||
let score_a = self.calculate_score(a, stats);
|
||||
let score_b = self.calculate_score(b, stats);
|
||||
score_b.cmp(&score_a) // Higher scores first
|
||||
});
|
||||
}
|
||||
|
||||
/// Calculate relevance score for a search result
|
||||
fn calculate_score(&self, result: &SearchResult, stats: &FileStats) -> i64 {
|
||||
let base_score = match result.match_type {
|
||||
SearchMatchType::FileName => BASE_MATCH_SCORE_FILENAME,
|
||||
SearchMatchType::DirectoryName => BASE_MATCH_SCORE_DIRNAME,
|
||||
SearchMatchType::FullPath => BASE_MATCH_SCORE_FULLPATH,
|
||||
};
|
||||
|
||||
if let Some(stat) = stats.get(&result.path) {
|
||||
let recency_bonus = (100 - stat.last_index.min(99) as i64) * RECENCY_WEIGHT;
|
||||
let frequency_bonus = stat.commit_count as i64 * FREQUENCY_WEIGHT;
|
||||
|
||||
// Multiply base score to maintain hierarchy, add git-based bonuses
|
||||
base_score * 1000 + recency_bonus * 10 + frequency_bonus
|
||||
} else {
|
||||
// Files not in git history get base score only
|
||||
base_score * 1000
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute file statistics from git history
|
||||
async fn compute_stats(&self, repo_path: &Path) -> Result<Arc<FileStats>, GitServiceError> {
|
||||
let repo_path = repo_path.to_path_buf();
|
||||
let repo_path_for_error = repo_path.clone();
|
||||
let git_service = self.git_service.clone();
|
||||
|
||||
// Run git analysis in blocking task to avoid blocking async runtime
|
||||
let stats = task::spawn_blocking(move || {
|
||||
git_service.collect_recent_file_stats(&repo_path, DEFAULT_COMMIT_LIMIT)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| GitServiceError::InvalidRepository(format!("Task join error: {e}")))?;
|
||||
|
||||
let stats = match stats {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"Failed to collect file stats for {:?}: {}",
|
||||
repo_path_for_error,
|
||||
e
|
||||
);
|
||||
// Return empty stats on error - search will still work without ranking
|
||||
HashMap::new()
|
||||
}
|
||||
};
|
||||
|
||||
let stats_arc = Arc::new(stats);
|
||||
|
||||
// Update cache
|
||||
if let Ok(head_info) = self.git_service.get_head_info(&repo_path_for_error) {
|
||||
FILE_STATS_CACHE.insert(
|
||||
repo_path_for_error,
|
||||
RepoHistoryCache {
|
||||
head_sha: head_info.oid,
|
||||
stats: Arc::clone(&stats_arc),
|
||||
generated_at: Instant::now(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
Ok(stats_arc)
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
use std::path::Path;
|
||||
use std::{collections::HashMap, path::Path};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use git2::{
|
||||
BranchType, CherrypickOptions, Delta, DiffFindOptions, DiffOptions, Error as GitError,
|
||||
FetchOptions, Repository, Status, StatusOptions, build::CheckoutBuilder,
|
||||
FetchOptions, Repository, Sort, Status, StatusOptions, build::CheckoutBuilder,
|
||||
};
|
||||
use regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -11,6 +11,9 @@ use thiserror::Error;
|
||||
use ts_rs::TS;
|
||||
use utils::diff::{Diff, FileDiffDetails};
|
||||
|
||||
// Import for file ranking functionality
|
||||
use super::file_ranker::FileStat;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum GitServiceError {
|
||||
#[error(transparent)]
|
||||
@@ -46,6 +49,12 @@ pub struct GitBranch {
|
||||
pub last_commit_date: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct HeadInfo {
|
||||
pub branch: String,
|
||||
pub oid: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, TS)]
|
||||
pub struct BranchStatus {
|
||||
pub commits_behind: Option<usize>,
|
||||
@@ -646,13 +655,35 @@ impl GitService {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_current_branch(&self, repo_path: &Path) -> Result<String, git2::Error> {
|
||||
let repo = Repository::open(repo_path)?;
|
||||
/// Get current HEAD information including branch name and commit OID
|
||||
pub fn get_head_info(&self, repo_path: &Path) -> Result<HeadInfo, GitServiceError> {
|
||||
let repo = self.open_repo(repo_path)?;
|
||||
let head = repo.head()?;
|
||||
if let Some(branch_name) = head.shorthand() {
|
||||
Ok(branch_name.to_string())
|
||||
|
||||
let branch = if let Some(branch_name) = head.shorthand() {
|
||||
branch_name.to_string()
|
||||
} else {
|
||||
Ok("HEAD".to_string())
|
||||
"HEAD".to_string()
|
||||
};
|
||||
|
||||
let oid = if let Some(target_oid) = head.target() {
|
||||
target_oid.to_string()
|
||||
} else {
|
||||
// Handle case where HEAD exists but has no target (empty repo)
|
||||
return Err(GitServiceError::InvalidRepository(
|
||||
"Repository HEAD has no target commit".to_string(),
|
||||
));
|
||||
};
|
||||
|
||||
Ok(HeadInfo { branch, oid })
|
||||
}
|
||||
|
||||
pub fn get_current_branch(&self, repo_path: &Path) -> Result<String, git2::Error> {
|
||||
// Thin wrapper for backward compatibility
|
||||
match self.get_head_info(repo_path) {
|
||||
Ok(head_info) => Ok(head_info.branch),
|
||||
Err(GitServiceError::Git(git_err)) => Err(git_err),
|
||||
Err(_) => Err(git2::Error::from_str("Failed to get head info")),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1256,6 +1287,80 @@ impl GitService {
|
||||
|
||||
Ok(repo)
|
||||
}
|
||||
|
||||
/// Collect file statistics from recent commits for ranking purposes
|
||||
pub fn collect_recent_file_stats(
|
||||
&self,
|
||||
repo_path: &Path,
|
||||
commit_limit: usize,
|
||||
) -> Result<HashMap<String, FileStat>, GitServiceError> {
|
||||
let repo = self.open_repo(repo_path)?;
|
||||
let mut stats: HashMap<String, FileStat> = HashMap::new();
|
||||
|
||||
// Set up revision walk from HEAD
|
||||
let mut revwalk = repo.revwalk()?;
|
||||
revwalk.push_head()?;
|
||||
revwalk.set_sorting(Sort::TIME)?;
|
||||
|
||||
// Iterate through recent commits
|
||||
for (commit_index, oid_result) in revwalk.take(commit_limit).enumerate() {
|
||||
let oid = oid_result?;
|
||||
let commit = repo.find_commit(oid)?;
|
||||
|
||||
// Get commit timestamp
|
||||
let commit_time = {
|
||||
let time = commit.time();
|
||||
DateTime::from_timestamp(time.seconds(), 0).unwrap_or_else(Utc::now)
|
||||
};
|
||||
|
||||
// Get the commit tree
|
||||
let commit_tree = commit.tree()?;
|
||||
|
||||
// For the first commit (no parent), diff against empty tree
|
||||
let parent_tree = if commit.parent_count() == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(commit.parent(0)?.tree()?)
|
||||
};
|
||||
|
||||
// Create diff between parent and current commit
|
||||
let diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&commit_tree), None)?;
|
||||
|
||||
// Process each changed file in this commit
|
||||
diff.foreach(
|
||||
&mut |delta, _progress| {
|
||||
// Get the file path - prefer new file path, fall back to old
|
||||
if let Some(path) = delta.new_file().path().or_else(|| delta.old_file().path())
|
||||
{
|
||||
let path_str = path.to_string_lossy().to_string();
|
||||
|
||||
// Update or insert file stats
|
||||
let stat = stats.entry(path_str).or_insert(FileStat {
|
||||
last_index: commit_index,
|
||||
commit_count: 0,
|
||||
last_time: commit_time,
|
||||
});
|
||||
|
||||
// Increment commit count
|
||||
stat.commit_count += 1;
|
||||
|
||||
// Keep the most recent change (smallest index)
|
||||
if commit_index < stat.last_index {
|
||||
stat.last_index = commit_index;
|
||||
stat.last_time = commit_time;
|
||||
}
|
||||
}
|
||||
|
||||
true // Continue iteration
|
||||
},
|
||||
None, // No binary callback
|
||||
None, // No hunk callback
|
||||
None, // No line callback
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
}
|
||||
|
||||
// #[cfg(test)]
|
||||
|
||||
@@ -3,6 +3,7 @@ pub mod auth;
|
||||
pub mod config;
|
||||
pub mod container;
|
||||
pub mod events;
|
||||
pub mod file_ranker;
|
||||
pub mod filesystem;
|
||||
pub mod filesystem_watcher;
|
||||
pub mod git;
|
||||
|
||||
Reference in New Issue
Block a user