From 6e4e6f92cecccacbe9c10cd0afc2e05a22d7fa39 Mon Sep 17 00:00:00 2001 From: Solomon Date: Sat, 20 Sep 2025 12:26:08 +0100 Subject: [PATCH] Reduce diff induced UI craches (#794) --- crates/local-deployment/src/container.rs | 171 +++++++++++++++++++-- crates/services/src/services/git.rs | 182 +++++++++++++++++------ crates/utils/src/diff.rs | 5 + frontend/src/components/DiffCard.tsx | 28 ++-- frontend/src/hooks/useDiffSummary.ts | 5 + shared/types.ts | 10 +- 6 files changed, 336 insertions(+), 65 deletions(-) diff --git a/crates/local-deployment/src/container.rs b/crates/local-deployment/src/container.rs index 0b2572a1..7a1f0002 100644 --- a/crates/local-deployment/src/container.rs +++ b/crates/local-deployment/src/container.rs @@ -2,7 +2,10 @@ use std::{ collections::{HashMap, HashSet}, io, path::{Path, PathBuf}, - sync::Arc, + sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, + }, time::Duration, }; @@ -50,6 +53,7 @@ use services::services::{ use tokio::{sync::RwLock, task::JoinHandle}; use tokio_util::io::ReaderStream; use utils::{ + diff::create_unified_diff_hunk, log_msg::LogMsg, msg_store::MsgStore, text::{git_branch_id, short_uuid}, @@ -70,6 +74,60 @@ pub struct LocalContainerService { } impl LocalContainerService { + // Max cumulative content bytes allowed per diff stream + const MAX_CUMULATIVE_DIFF_BYTES: usize = 50 * 1024; // 50KB + + // Apply stream-level omit policy based on cumulative bytes. + // If adding this diff's contents exceeds the cap, strip contents and set stats. + fn apply_stream_omit_policy( + &self, + diff: &mut utils::diff::Diff, + sent_bytes: &Arc, + ) { + // Compute size of current diff payload + let mut size = 0usize; + if let Some(ref s) = diff.old_content { + size += s.len(); + } + if let Some(ref s) = diff.new_content { + size += s.len(); + } + + if size == 0 { + return; // nothing to account + } + + let current = sent_bytes.load(Ordering::Relaxed); + if current.saturating_add(size) > Self::MAX_CUMULATIVE_DIFF_BYTES { + // We will omit content for this diff. If we still have both sides loaded + // (i.e., not already omitted by file-size guards), compute stats for UI. + if diff.additions.is_none() && diff.deletions.is_none() { + let old = diff.old_content.as_deref().unwrap_or(""); + let new = diff.new_content.as_deref().unwrap_or(""); + let hunk = create_unified_diff_hunk(old, new); + let mut add = 0usize; + let mut del = 0usize; + for line in hunk.lines() { + if let Some(first) = line.chars().next() { + if first == '+' { + add += 1; + } else if first == '-' { + del += 1; + } + } + } + diff.additions = Some(add); + diff.deletions = Some(del); + } + + diff.old_content = None; + diff.new_content = None; + diff.content_omitted = true; + } else { + // safe to include; account for it + let _ = sent_bytes.fetch_add(size, Ordering::Relaxed); + } + } pub fn new( db: DBService, msg_stores: Arc>>>, @@ -555,6 +613,15 @@ impl LocalContainerService { None, )?; + let cum = Arc::new(AtomicUsize::new(0)); + let diffs: Vec<_> = diffs + .into_iter() + .map(|mut d| { + self.apply_stream_omit_policy(&mut d, &cum); + d + }) + .collect(); + let stream = futures::stream::iter(diffs.into_iter().map(|diff| { let entry_index = GitService::diff_path(&diff); let patch = @@ -589,6 +656,29 @@ impl LocalContainerService { None, )?; + // cumulative counter for entire stream + let cumulative = Arc::new(AtomicUsize::new(0)); + // track which file paths have been emitted with full content already + let full_sent = Arc::new(std::sync::RwLock::new(HashSet::::new())); + let initial_diffs: Vec<_> = initial_diffs + .into_iter() + .map(|mut d| { + self.apply_stream_omit_policy(&mut d, &cumulative); + d + }) + .collect(); + + // Record which paths were sent with full content + { + let mut guard = full_sent.write().unwrap(); + for d in &initial_diffs { + if !d.content_omitted { + let p = GitService::diff_path(d); + guard.insert(p); + } + } + } + let initial_stream = futures::stream::iter(initial_diffs.into_iter().map(|diff| { let entry_index = GitService::diff_path(&diff); let patch = @@ -606,6 +696,8 @@ impl LocalContainerService { let live_stream = { let git_service = git_service.clone(); let worktree_path_for_spawn = worktree_path.clone(); + let cumulative = Arc::clone(&cumulative); + let full_sent = Arc::clone(&full_sent); try_stream! { // Move the expensive watcher setup to blocking thread to avoid blocking the async runtime let watcher_result = tokio::task::spawn_blocking(move || { @@ -629,6 +721,8 @@ impl LocalContainerService { &task_branch, &base_branch, &changed_paths, + &cumulative, + &full_sent, ).map_err(|e| { tracing::error!("Error processing file changes: {}", e); io::Error::other(e.to_string()) @@ -650,7 +744,9 @@ impl LocalContainerService { } }.boxed(); - let combined_stream = select(initial_stream, live_stream); + // Ensure all initial diffs are emitted before live updates, to avoid + // earlier files being abbreviated due to interleaving ordering. + let combined_stream = initial_stream.chain(live_stream); Ok(combined_stream.boxed()) } @@ -680,6 +776,8 @@ impl LocalContainerService { task_branch: &str, base_branch: &str, changed_paths: &[String], + cumulative_bytes: &Arc, + full_sent_paths: &Arc>>, ) -> Result, ContainerError> { let path_filter: Vec<&str> = changed_paths.iter().map(|s| s.as_str()).collect(); @@ -696,9 +794,66 @@ impl LocalContainerService { let mut files_with_diffs = HashSet::new(); // Add/update files that have diffs - for diff in current_diffs { + for mut diff in current_diffs { let file_path = GitService::diff_path(&diff); files_with_diffs.insert(file_path.clone()); + // Apply stream-level omit policy (affects contents and stats) + // Note: we can't call self methods from static fn; implement inline + { + // Compute size + let mut size = 0usize; + if let Some(ref s) = diff.old_content { + size += s.len(); + } + if let Some(ref s) = diff.new_content { + size += s.len(); + } + if size > 0 { + let current = cumulative_bytes.load(Ordering::Relaxed); + if current.saturating_add(size) + > LocalContainerService::MAX_CUMULATIVE_DIFF_BYTES + { + if diff.additions.is_none() && diff.deletions.is_none() { + let old = diff.old_content.as_deref().unwrap_or(""); + let new = diff.new_content.as_deref().unwrap_or(""); + let hunk = create_unified_diff_hunk(old, new); + let mut add = 0usize; + let mut del = 0usize; + for line in hunk.lines() { + if let Some(first) = line.chars().next() { + if first == '+' { + add += 1; + } else if first == '-' { + del += 1; + } + } + } + diff.additions = Some(add); + diff.deletions = Some(del); + } + diff.old_content = None; + diff.new_content = None; + diff.content_omitted = true; + } else { + let _ = cumulative_bytes.fetch_add(size, Ordering::Relaxed); + } + } + } + + // If this diff would be omitted and we already sent a full-content + // version of this path earlier in the stream, skip sending a + // degrading replacement. + if diff.content_omitted { + if full_sent_paths.read().unwrap().contains(&file_path) { + continue; + } + } else { + // Track that we have sent a full-content version + { + let mut guard = full_sent_paths.write().unwrap(); + guard.insert(file_path.clone()); + } + } let patch = ConversationPatch::add_diff(escape_json_pointer_segment(&file_path), diff); let event = LogMsg::JsonPatch(patch).to_sse_event(); @@ -1108,7 +1263,7 @@ impl ContainerService for LocalContainerService { && !parent.exists() { std::fs::create_dir_all(parent).map_err(|e| { - ContainerError::Other(anyhow!("Failed to create directory {:?}: {}", parent, e)) + ContainerError::Other(anyhow!("Failed to create directory {parent:?}: {e}")) })?; } @@ -1116,17 +1271,13 @@ impl ContainerService for LocalContainerService { if source_file.exists() { std::fs::copy(&source_file, &target_file).map_err(|e| { ContainerError::Other(anyhow!( - "Failed to copy file {:?} to {:?}: {}", - source_file, - target_file, - e + "Failed to copy file {source_file:?} to {target_file:?}: {e}" )) })?; tracing::info!("Copied file {:?} to worktree", file_path); } else { return Err(ContainerError::Other(anyhow!( - "File {:?} does not exist in the project directory", - source_file + "File {source_file:?} does not exist in the project directory" ))); } } diff --git a/crates/services/src/services/git.rs b/crates/services/src/services/git.rs index 42967dae..5582fb7e 100644 --- a/crates/services/src/services/git.rs +++ b/crates/services/src/services/git.rs @@ -42,6 +42,10 @@ pub enum GitServiceError { #[derive(Clone)] pub struct GitService {} +// Max inline diff size for UI (in bytes). Files larger than this will have +// their contents omitted from the diff stream to avoid UI crashes. +const MAX_INLINE_DIFF_BYTES: usize = 50 * 1024; // ~50KB + #[derive(Debug, Clone, Serialize, Deserialize, TS, PartialEq, Eq)] #[serde(rename_all = "snake_case")] #[ts(rename_all = "snake_case")] @@ -350,6 +354,7 @@ impl GitService { ) -> Result, GitServiceError> { let mut file_diffs = Vec::new(); + let mut delta_index: usize = 0; diff.foreach( &mut |delta, _| { if delta.status() == Delta::Unreadable { @@ -358,24 +363,72 @@ impl GitService { let status = delta.status(); - // Only build old_file for non-added entries - let old_file = if matches!(status, Delta::Added) { - None + // Decide if we should omit content due to size + let mut content_omitted = false; + // Check old blob size when applicable + if !matches!(status, Delta::Added) { + let oid = delta.old_file().id(); + if !oid.is_zero() + && let Ok(blob) = repo.find_blob(oid) + && !blob.is_binary() + && blob.size() > MAX_INLINE_DIFF_BYTES + { + content_omitted = true; + } + } + // Check new blob size when applicable + if !matches!(status, Delta::Deleted) { + let oid = delta.new_file().id(); + if !oid.is_zero() + && let Ok(blob) = repo.find_blob(oid) + && !blob.is_binary() + && blob.size() > MAX_INLINE_DIFF_BYTES + { + content_omitted = true; + } + } + + // Only build old/new content if not omitted + let (old_path, old_content) = if matches!(status, Delta::Added) { + (None, None) } else { - delta + let path_opt = delta .old_file() .path() - .map(|p| self.create_file_details(p, &delta.old_file().id(), repo)) + .map(|p| p.to_string_lossy().to_string()); + if content_omitted { + (path_opt, None) + } else { + let details = delta + .old_file() + .path() + .map(|p| self.create_file_details(p, &delta.old_file().id(), repo)); + ( + details.as_ref().and_then(|f| f.file_name.clone()), + details.and_then(|f| f.content), + ) + } }; - // Only build new_file for non-deleted entries - let new_file = if matches!(status, Delta::Deleted) { - None + let (new_path, new_content) = if matches!(status, Delta::Deleted) { + (None, None) } else { - delta + let path_opt = delta .new_file() .path() - .map(|p| self.create_file_details(p, &delta.new_file().id(), repo)) + .map(|p| p.to_string_lossy().to_string()); + if content_omitted { + (path_opt, None) + } else { + let details = delta + .new_file() + .path() + .map(|p| self.create_file_details(p, &delta.new_file().id(), repo)); + ( + details.as_ref().and_then(|f| f.file_name.clone()), + details.and_then(|f| f.content), + ) + } }; let mut change = match status { @@ -388,33 +441,40 @@ impl GitService { _ => DiffChangeKind::Modified, }; - let old_path = old_file.as_ref().and_then(|f| f.file_name.clone()); - let new_path = new_file.as_ref().and_then(|f| f.file_name.clone()); - let old_content = old_file.and_then(|f| f.content); - let new_content = new_file.and_then(|f| f.content); - // Detect pure mode changes (e.g., chmod +/-x) and classify as PermissionChange if matches!(status, Delta::Modified) && delta.old_file().mode() != delta.new_file().mode() { - // If content unchanged or unavailable, prefer PermissionChange label - if old_content - .as_ref() - .zip(new_content.as_ref()) - .is_none_or(|(o, n)| o == n) + // Only downgrade to PermissionChange if we KNOW content is unchanged + if old_content.is_some() && new_content.is_some() && old_content == new_content { change = DiffChangeKind::PermissionChange; } } + // If contents are omitted, try to compute line stats via libgit2 Patch + let mut additions: Option = None; + let mut deletions: Option = None; + if content_omitted + && let Ok(Some(patch)) = git2::Patch::from_diff(&diff, delta_index) + && let Ok((_ctx, adds, dels)) = patch.line_stats() + { + additions = Some(adds); + deletions = Some(dels); + } + file_diffs.push(Diff { change, old_path, new_path, old_content, new_content, + content_omitted, + additions, + deletions, }); + delta_index += 1; true }, None, @@ -458,11 +518,11 @@ impl GitService { } }; - // Size guard - skip files larger than 1MB - if bytes.len() > 1_048_576 { + // Size guard - skip files larger than UI inline threshold + if bytes.len() > MAX_INLINE_DIFF_BYTES { tracing::debug!( - "Skipping large file ({}MB): {:?}", - bytes.len() / 1_048_576, + "Skipping large file ({}KB): {:?}", + bytes.len() / 1024, abs_path ); return None; @@ -544,34 +604,65 @@ impl GitService { ChangeType::Unknown(_) => (e.old_path.clone(), Some(e.path.clone())), }; - // Load old content from base tree if possible - let old_content = if let Some(ref oldp) = old_path_opt { + // Decide if we should omit content by size (either side) + let mut content_omitted = false; + // Old side (from base tree) + if let Some(ref oldp) = old_path_opt { let rel = std::path::Path::new(oldp); - match base_tree.get_path(rel) { - Ok(entry) if entry.kind() == Some(git2::ObjectType::Blob) => repo - .find_blob(entry.id()) - .ok() - .and_then(|b| Self::blob_to_string(&b)), - _ => None, + if let Ok(entry) = base_tree.get_path(rel) + && entry.kind() == Some(git2::ObjectType::Blob) + && let Ok(blob) = repo.find_blob(entry.id()) + && !blob.is_binary() + && blob.size() > MAX_INLINE_DIFF_BYTES + { + content_omitted = true; } - } else { - None - }; + } + // New side (from filesystem) + if let Some(ref newp) = new_path_opt + && let Some(workdir) = repo.workdir() + { + let abs = workdir.join(newp); + if let Ok(md) = std::fs::metadata(&abs) + && (md.len() as usize) > MAX_INLINE_DIFF_BYTES + { + content_omitted = true; + } + } - // Load new content from filesystem (worktree) when available - let new_content = if let Some(ref newp) = new_path_opt { - let rel = std::path::Path::new(newp); - Self::read_file_to_string(repo, rel) + // Load contents only if not omitted + let (old_content, new_content) = if content_omitted { + (None, None) } else { - None + // Load old content from base tree if possible + let old_content = if let Some(ref oldp) = old_path_opt { + let rel = std::path::Path::new(oldp); + match base_tree.get_path(rel) { + Ok(entry) if entry.kind() == Some(git2::ObjectType::Blob) => repo + .find_blob(entry.id()) + .ok() + .and_then(|b| Self::blob_to_string(&b)), + _ => None, + } + } else { + None + }; + + // Load new content from filesystem (worktree) when available + let new_content = if let Some(ref newp) = new_path_opt { + let rel = std::path::Path::new(newp); + Self::read_file_to_string(repo, rel) + } else { + None + }; + (old_content, new_content) }; // If reported as Modified but content is identical, treat as a permission-only change if matches!(change, DiffChangeKind::Modified) - && old_content - .as_ref() - .zip(new_content.as_ref()) - .is_none_or(|(o, n)| o == n) + && old_content.is_some() + && new_content.is_some() + && old_content == new_content { change = DiffChangeKind::PermissionChange; } @@ -582,6 +673,9 @@ impl GitService { new_path: new_path_opt, old_content, new_content, + content_omitted, + additions: None, + deletions: None, } } diff --git a/crates/utils/src/diff.rs b/crates/utils/src/diff.rs index ce025934..90ef0ffd 100644 --- a/crates/utils/src/diff.rs +++ b/crates/utils/src/diff.rs @@ -20,6 +20,11 @@ pub struct Diff { pub new_path: Option, pub old_content: Option, pub new_content: Option, + /// True when file contents are intentionally omitted (e.g., too large) + pub content_omitted: bool, + /// Optional precomputed stats for omitted content + pub additions: Option, + pub deletions: Option, } #[derive(Debug, Clone, Serialize, Deserialize, TS)] diff --git a/frontend/src/components/DiffCard.tsx b/frontend/src/components/DiffCard.tsx index 31779f46..1116f507 100644 --- a/frontend/src/components/DiffCard.tsx +++ b/frontend/src/components/DiffCard.tsx @@ -85,6 +85,7 @@ export default function DiffCard({ const newLang = getHighLightLanguageFromPath(newName || oldName || '') || 'plaintext'; const { label, Icon } = labelAndIcon(diff); + const isOmitted = !!diff.contentOmitted; // Build a diff from raw contents so the viewer can expand beyond hunks const oldContentSafe = diff.oldContent || ''; @@ -92,7 +93,7 @@ export default function DiffCard({ const isContentEqual = oldContentSafe === newContentSafe; const diffFile = useMemo(() => { - if (isContentEqual) return null; + if (isContentEqual || isOmitted) return null; try { const oldFileName = oldName || newName || 'unknown'; const newFileName = newName || oldName || 'unknown'; @@ -112,6 +113,7 @@ export default function DiffCard({ } }, [ isContentEqual, + isOmitted, oldName, newName, oldLang, @@ -120,8 +122,12 @@ export default function DiffCard({ newContentSafe, ]); - const add = diffFile?.additionLength ?? 0; - const del = diffFile?.deletionLength ?? 0; + const add = isOmitted + ? (diff.additions ?? 0) + : (diffFile?.additionLength ?? 0); + const del = isOmitted + ? (diff.deletions ?? 0) + : (diffFile?.deletionLength ?? 0); // Review functionality const filePath = newName || oldName || 'unknown'; @@ -293,13 +299,15 @@ export default function DiffCard({ className="px-4 pb-4 text-xs font-mono" style={{ color: 'hsl(var(--muted-foreground) / 0.9)' }} > - {isContentEqual - ? diff.change === 'renamed' - ? 'File renamed with no content changes.' - : diff.change === 'permissionChange' - ? 'File permission changed.' - : 'No content changes to display.' - : 'Failed to render diff for this file.'} + {isOmitted + ? 'Content omitted due to file size. Open in editor to view.' + : isContentEqual + ? diff.change === 'renamed' + ? 'File renamed with no content changes.' + : diff.change === 'permissionChange' + ? 'File permission changed.' + : 'No content changes to display.' + : 'Failed to render diff for this file.'} )} diff --git a/frontend/src/hooks/useDiffSummary.ts b/frontend/src/hooks/useDiffSummary.ts index 45860e8b..41f90429 100644 --- a/frontend/src/hooks/useDiffSummary.ts +++ b/frontend/src/hooks/useDiffSummary.ts @@ -14,6 +14,11 @@ export function useDiffSummary(attemptId: string | null) { return diffs.reduce( (acc, d) => { try { + if (d.contentOmitted) { + acc.added += d.additions ?? 0; + acc.deleted += d.deletions ?? 0; + return acc; + } const oldName = d.oldPath || d.newPath || 'old'; const newName = d.newPath || d.oldPath || 'new'; const oldContent = d.oldContent || ''; diff --git a/shared/types.ts b/shared/types.ts index 1c053887..05092113 100644 --- a/shared/types.ts +++ b/shared/types.ts @@ -110,7 +110,15 @@ export enum CheckTokenResponse { VALID = "VALID", INVALID = "INVALID" } export type GitBranch = { name: string, is_current: boolean, is_remote: boolean, last_commit_date: Date, }; -export type Diff = { change: DiffChangeKind, oldPath: string | null, newPath: string | null, oldContent: string | null, newContent: string | null, }; +export type Diff = { change: DiffChangeKind, oldPath: string | null, newPath: string | null, oldContent: string | null, newContent: string | null, +/** + * True when file contents are intentionally omitted (e.g., too large) + */ +contentOmitted: boolean, +/** + * Optional precomputed stats for omitted content + */ +additions: number | null, deletions: number | null, }; export type DiffChangeKind = "added" | "deleted" | "modified" | "renamed" | "copied" | "permissionChange";