diff --git a/crates/server/src/routes/projects.rs b/crates/server/src/routes/projects.rs index a36565e3..d21d41d7 100644 --- a/crates/server/src/routes/projects.rs +++ b/crates/server/src/routes/projects.rs @@ -13,7 +13,7 @@ use db::models::project::{ }; use deployment::Deployment; use ignore::WalkBuilder; -use services::services::git::GitBranch; +use services::services::{file_ranker::FileRanker, git::GitBranch}; use utils::response::ApiResponse; use uuid::Uuid; @@ -355,18 +355,32 @@ async fn search_files_in_repo( } } - // Sort results by priority: FileName > DirectoryName > FullPath - results.sort_by(|a, b| { - let priority = |match_type: &SearchMatchType| match match_type { - SearchMatchType::FileName => 0, - SearchMatchType::DirectoryName => 1, - SearchMatchType::FullPath => 2, - }; + // Apply git history-based ranking + let file_ranker = FileRanker::new(); + match file_ranker.get_stats(repo_path).await { + Ok(stats) => { + // Re-rank results using git history + file_ranker.rerank(&mut results, &stats); + } + Err(e) => { + tracing::warn!( + "Failed to get git stats for ranking, using basic sort: {}", + e + ); + // Fallback to basic priority sorting + results.sort_by(|a, b| { + let priority = |match_type: &SearchMatchType| match match_type { + SearchMatchType::FileName => 0, + SearchMatchType::DirectoryName => 1, + SearchMatchType::FullPath => 2, + }; - priority(&a.match_type) - .cmp(&priority(&b.match_type)) - .then_with(|| a.path.cmp(&b.path)) - }); + priority(&a.match_type) + .cmp(&priority(&b.match_type)) + .then_with(|| a.path.cmp(&b.path)) + }); + } + } // Limit to top 10 results results.truncate(10); diff --git a/crates/services/Cargo.toml b/crates/services/Cargo.toml index 02b4d34e..49767b37 100644 --- a/crates/services/Cargo.toml +++ b/crates/services/Cargo.toml @@ -52,3 +52,5 @@ strum = "0.27.2" notify = "8.2.0" notify-debouncer-full = "0.5.0" dunce = "1.0" +dashmap = "6.1" +once_cell = "1.20" diff --git a/crates/services/src/services/file_ranker.rs b/crates/services/src/services/file_ranker.rs new file mode 100644 index 00000000..f7c7bf86 --- /dev/null +++ b/crates/services/src/services/file_ranker.rs @@ -0,0 +1,157 @@ +use std::{ + collections::HashMap, + path::{Path, PathBuf}, + sync::Arc, + time::Instant, +}; + +use chrono::{DateTime, Utc}; +use dashmap::DashMap; +use db::models::project::{SearchMatchType, SearchResult}; +use once_cell::sync::Lazy; +use tokio::task; + +use super::git::{GitService, GitServiceError}; + +/// Statistics for a single file based on git history +#[derive(Clone, Debug)] +pub struct FileStat { + /// Index in the commit history (0 = HEAD, 1 = parent of HEAD, ...) + pub last_index: usize, + /// Number of times this file was changed in recent commits + pub commit_count: u32, + /// Timestamp of the most recent change + pub last_time: DateTime, +} + +/// File statistics for a repository +pub type FileStats = HashMap; + +/// Cache entry for repository history +#[derive(Clone)] +struct RepoHistoryCache { + head_sha: String, + stats: Arc, + generated_at: Instant, +} + +/// Global cache for file ranking statistics +static FILE_STATS_CACHE: Lazy> = Lazy::new(DashMap::new); + +/// Configuration constants for ranking algorithm +const DEFAULT_COMMIT_LIMIT: usize = 100; +const BASE_MATCH_SCORE_FILENAME: i64 = 100; +const BASE_MATCH_SCORE_DIRNAME: i64 = 10; +const BASE_MATCH_SCORE_FULLPATH: i64 = 1; +const RECENCY_WEIGHT: i64 = 2; +const FREQUENCY_WEIGHT: i64 = 1; + +/// Service for ranking files based on git history +pub struct FileRanker { + git_service: GitService, +} + +impl Default for FileRanker { + fn default() -> Self { + Self::new() + } +} + +impl FileRanker { + pub fn new() -> Self { + Self { + git_service: GitService::new(), + } + } + + /// Get file statistics for a repository, using cache when possible + pub async fn get_stats(&self, repo_path: &Path) -> Result, GitServiceError> { + let repo_path = repo_path.to_path_buf(); + + // Check if we have a valid cache entry + if let Some(cache_entry) = FILE_STATS_CACHE.get(&repo_path) { + // Verify cache is still valid by checking HEAD + if let Ok(head_info) = self.git_service.get_head_info(&repo_path) + && head_info.oid == cache_entry.head_sha + { + return Ok(Arc::clone(&cache_entry.stats)); + } + } + + // Cache miss or invalid - compute new stats + let stats = self.compute_stats(&repo_path).await?; + Ok(stats) + } + + /// Re-rank search results based on git history statistics + pub fn rerank(&self, results: &mut [SearchResult], stats: &FileStats) { + results.sort_by(|a, b| { + let score_a = self.calculate_score(a, stats); + let score_b = self.calculate_score(b, stats); + score_b.cmp(&score_a) // Higher scores first + }); + } + + /// Calculate relevance score for a search result + fn calculate_score(&self, result: &SearchResult, stats: &FileStats) -> i64 { + let base_score = match result.match_type { + SearchMatchType::FileName => BASE_MATCH_SCORE_FILENAME, + SearchMatchType::DirectoryName => BASE_MATCH_SCORE_DIRNAME, + SearchMatchType::FullPath => BASE_MATCH_SCORE_FULLPATH, + }; + + if let Some(stat) = stats.get(&result.path) { + let recency_bonus = (100 - stat.last_index.min(99) as i64) * RECENCY_WEIGHT; + let frequency_bonus = stat.commit_count as i64 * FREQUENCY_WEIGHT; + + // Multiply base score to maintain hierarchy, add git-based bonuses + base_score * 1000 + recency_bonus * 10 + frequency_bonus + } else { + // Files not in git history get base score only + base_score * 1000 + } + } + + /// Compute file statistics from git history + async fn compute_stats(&self, repo_path: &Path) -> Result, GitServiceError> { + let repo_path = repo_path.to_path_buf(); + let repo_path_for_error = repo_path.clone(); + let git_service = self.git_service.clone(); + + // Run git analysis in blocking task to avoid blocking async runtime + let stats = task::spawn_blocking(move || { + git_service.collect_recent_file_stats(&repo_path, DEFAULT_COMMIT_LIMIT) + }) + .await + .map_err(|e| GitServiceError::InvalidRepository(format!("Task join error: {e}")))?; + + let stats = match stats { + Ok(s) => s, + Err(e) => { + tracing::warn!( + "Failed to collect file stats for {:?}: {}", + repo_path_for_error, + e + ); + // Return empty stats on error - search will still work without ranking + HashMap::new() + } + }; + + let stats_arc = Arc::new(stats); + + // Update cache + if let Ok(head_info) = self.git_service.get_head_info(&repo_path_for_error) { + FILE_STATS_CACHE.insert( + repo_path_for_error, + RepoHistoryCache { + head_sha: head_info.oid, + stats: Arc::clone(&stats_arc), + generated_at: Instant::now(), + }, + ); + } + + Ok(stats_arc) + } +} diff --git a/crates/services/src/services/git.rs b/crates/services/src/services/git.rs index f0df6143..02085300 100644 --- a/crates/services/src/services/git.rs +++ b/crates/services/src/services/git.rs @@ -1,9 +1,9 @@ -use std::path::Path; +use std::{collections::HashMap, path::Path}; use chrono::{DateTime, Utc}; use git2::{ BranchType, CherrypickOptions, Delta, DiffFindOptions, DiffOptions, Error as GitError, - FetchOptions, Repository, Status, StatusOptions, build::CheckoutBuilder, + FetchOptions, Repository, Sort, Status, StatusOptions, build::CheckoutBuilder, }; use regex; use serde::{Deserialize, Serialize}; @@ -11,6 +11,9 @@ use thiserror::Error; use ts_rs::TS; use utils::diff::{Diff, FileDiffDetails}; +// Import for file ranking functionality +use super::file_ranker::FileStat; + #[derive(Debug, Error)] pub enum GitServiceError { #[error(transparent)] @@ -46,6 +49,12 @@ pub struct GitBranch { pub last_commit_date: DateTime, } +#[derive(Debug, Clone)] +pub struct HeadInfo { + pub branch: String, + pub oid: String, +} + #[derive(Debug, Clone, Serialize, Deserialize, TS)] pub struct BranchStatus { pub commits_behind: Option, @@ -646,13 +655,35 @@ impl GitService { Ok(()) } - pub fn get_current_branch(&self, repo_path: &Path) -> Result { - let repo = Repository::open(repo_path)?; + /// Get current HEAD information including branch name and commit OID + pub fn get_head_info(&self, repo_path: &Path) -> Result { + let repo = self.open_repo(repo_path)?; let head = repo.head()?; - if let Some(branch_name) = head.shorthand() { - Ok(branch_name.to_string()) + + let branch = if let Some(branch_name) = head.shorthand() { + branch_name.to_string() } else { - Ok("HEAD".to_string()) + "HEAD".to_string() + }; + + let oid = if let Some(target_oid) = head.target() { + target_oid.to_string() + } else { + // Handle case where HEAD exists but has no target (empty repo) + return Err(GitServiceError::InvalidRepository( + "Repository HEAD has no target commit".to_string(), + )); + }; + + Ok(HeadInfo { branch, oid }) + } + + pub fn get_current_branch(&self, repo_path: &Path) -> Result { + // Thin wrapper for backward compatibility + match self.get_head_info(repo_path) { + Ok(head_info) => Ok(head_info.branch), + Err(GitServiceError::Git(git_err)) => Err(git_err), + Err(_) => Err(git2::Error::from_str("Failed to get head info")), } } @@ -1256,6 +1287,80 @@ impl GitService { Ok(repo) } + + /// Collect file statistics from recent commits for ranking purposes + pub fn collect_recent_file_stats( + &self, + repo_path: &Path, + commit_limit: usize, + ) -> Result, GitServiceError> { + let repo = self.open_repo(repo_path)?; + let mut stats: HashMap = HashMap::new(); + + // Set up revision walk from HEAD + let mut revwalk = repo.revwalk()?; + revwalk.push_head()?; + revwalk.set_sorting(Sort::TIME)?; + + // Iterate through recent commits + for (commit_index, oid_result) in revwalk.take(commit_limit).enumerate() { + let oid = oid_result?; + let commit = repo.find_commit(oid)?; + + // Get commit timestamp + let commit_time = { + let time = commit.time(); + DateTime::from_timestamp(time.seconds(), 0).unwrap_or_else(Utc::now) + }; + + // Get the commit tree + let commit_tree = commit.tree()?; + + // For the first commit (no parent), diff against empty tree + let parent_tree = if commit.parent_count() == 0 { + None + } else { + Some(commit.parent(0)?.tree()?) + }; + + // Create diff between parent and current commit + let diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&commit_tree), None)?; + + // Process each changed file in this commit + diff.foreach( + &mut |delta, _progress| { + // Get the file path - prefer new file path, fall back to old + if let Some(path) = delta.new_file().path().or_else(|| delta.old_file().path()) + { + let path_str = path.to_string_lossy().to_string(); + + // Update or insert file stats + let stat = stats.entry(path_str).or_insert(FileStat { + last_index: commit_index, + commit_count: 0, + last_time: commit_time, + }); + + // Increment commit count + stat.commit_count += 1; + + // Keep the most recent change (smallest index) + if commit_index < stat.last_index { + stat.last_index = commit_index; + stat.last_time = commit_time; + } + } + + true // Continue iteration + }, + None, // No binary callback + None, // No hunk callback + None, // No line callback + )?; + } + + Ok(stats) + } } // #[cfg(test)] diff --git a/crates/services/src/services/mod.rs b/crates/services/src/services/mod.rs index ca77d572..54ed66f6 100644 --- a/crates/services/src/services/mod.rs +++ b/crates/services/src/services/mod.rs @@ -3,6 +3,7 @@ pub mod auth; pub mod config; pub mod container; pub mod events; +pub mod file_ranker; pub mod filesystem; pub mod filesystem_watcher; pub mod git;