Reduce diff induced UI craches (#794)

This commit is contained in:
Solomon
2025-09-20 12:26:08 +01:00
committed by GitHub
parent 18c88324fc
commit 6e4e6f92ce
6 changed files with 336 additions and 65 deletions

View File

@@ -2,7 +2,10 @@ use std::{
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},
io, io,
path::{Path, PathBuf}, path::{Path, PathBuf},
sync::Arc, sync::{
Arc,
atomic::{AtomicUsize, Ordering},
},
time::Duration, time::Duration,
}; };
@@ -50,6 +53,7 @@ use services::services::{
use tokio::{sync::RwLock, task::JoinHandle}; use tokio::{sync::RwLock, task::JoinHandle};
use tokio_util::io::ReaderStream; use tokio_util::io::ReaderStream;
use utils::{ use utils::{
diff::create_unified_diff_hunk,
log_msg::LogMsg, log_msg::LogMsg,
msg_store::MsgStore, msg_store::MsgStore,
text::{git_branch_id, short_uuid}, text::{git_branch_id, short_uuid},
@@ -70,6 +74,60 @@ pub struct LocalContainerService {
} }
impl LocalContainerService { impl LocalContainerService {
// Max cumulative content bytes allowed per diff stream
const MAX_CUMULATIVE_DIFF_BYTES: usize = 50 * 1024; // 50KB
// Apply stream-level omit policy based on cumulative bytes.
// If adding this diff's contents exceeds the cap, strip contents and set stats.
fn apply_stream_omit_policy(
&self,
diff: &mut utils::diff::Diff,
sent_bytes: &Arc<AtomicUsize>,
) {
// Compute size of current diff payload
let mut size = 0usize;
if let Some(ref s) = diff.old_content {
size += s.len();
}
if let Some(ref s) = diff.new_content {
size += s.len();
}
if size == 0 {
return; // nothing to account
}
let current = sent_bytes.load(Ordering::Relaxed);
if current.saturating_add(size) > Self::MAX_CUMULATIVE_DIFF_BYTES {
// We will omit content for this diff. If we still have both sides loaded
// (i.e., not already omitted by file-size guards), compute stats for UI.
if diff.additions.is_none() && diff.deletions.is_none() {
let old = diff.old_content.as_deref().unwrap_or("");
let new = diff.new_content.as_deref().unwrap_or("");
let hunk = create_unified_diff_hunk(old, new);
let mut add = 0usize;
let mut del = 0usize;
for line in hunk.lines() {
if let Some(first) = line.chars().next() {
if first == '+' {
add += 1;
} else if first == '-' {
del += 1;
}
}
}
diff.additions = Some(add);
diff.deletions = Some(del);
}
diff.old_content = None;
diff.new_content = None;
diff.content_omitted = true;
} else {
// safe to include; account for it
let _ = sent_bytes.fetch_add(size, Ordering::Relaxed);
}
}
pub fn new( pub fn new(
db: DBService, db: DBService,
msg_stores: Arc<RwLock<HashMap<Uuid, Arc<MsgStore>>>>, msg_stores: Arc<RwLock<HashMap<Uuid, Arc<MsgStore>>>>,
@@ -555,6 +613,15 @@ impl LocalContainerService {
None, None,
)?; )?;
let cum = Arc::new(AtomicUsize::new(0));
let diffs: Vec<_> = diffs
.into_iter()
.map(|mut d| {
self.apply_stream_omit_policy(&mut d, &cum);
d
})
.collect();
let stream = futures::stream::iter(diffs.into_iter().map(|diff| { let stream = futures::stream::iter(diffs.into_iter().map(|diff| {
let entry_index = GitService::diff_path(&diff); let entry_index = GitService::diff_path(&diff);
let patch = let patch =
@@ -589,6 +656,29 @@ impl LocalContainerService {
None, None,
)?; )?;
// cumulative counter for entire stream
let cumulative = Arc::new(AtomicUsize::new(0));
// track which file paths have been emitted with full content already
let full_sent = Arc::new(std::sync::RwLock::new(HashSet::<String>::new()));
let initial_diffs: Vec<_> = initial_diffs
.into_iter()
.map(|mut d| {
self.apply_stream_omit_policy(&mut d, &cumulative);
d
})
.collect();
// Record which paths were sent with full content
{
let mut guard = full_sent.write().unwrap();
for d in &initial_diffs {
if !d.content_omitted {
let p = GitService::diff_path(d);
guard.insert(p);
}
}
}
let initial_stream = futures::stream::iter(initial_diffs.into_iter().map(|diff| { let initial_stream = futures::stream::iter(initial_diffs.into_iter().map(|diff| {
let entry_index = GitService::diff_path(&diff); let entry_index = GitService::diff_path(&diff);
let patch = let patch =
@@ -606,6 +696,8 @@ impl LocalContainerService {
let live_stream = { let live_stream = {
let git_service = git_service.clone(); let git_service = git_service.clone();
let worktree_path_for_spawn = worktree_path.clone(); let worktree_path_for_spawn = worktree_path.clone();
let cumulative = Arc::clone(&cumulative);
let full_sent = Arc::clone(&full_sent);
try_stream! { try_stream! {
// Move the expensive watcher setup to blocking thread to avoid blocking the async runtime // Move the expensive watcher setup to blocking thread to avoid blocking the async runtime
let watcher_result = tokio::task::spawn_blocking(move || { let watcher_result = tokio::task::spawn_blocking(move || {
@@ -629,6 +721,8 @@ impl LocalContainerService {
&task_branch, &task_branch,
&base_branch, &base_branch,
&changed_paths, &changed_paths,
&cumulative,
&full_sent,
).map_err(|e| { ).map_err(|e| {
tracing::error!("Error processing file changes: {}", e); tracing::error!("Error processing file changes: {}", e);
io::Error::other(e.to_string()) io::Error::other(e.to_string())
@@ -650,7 +744,9 @@ impl LocalContainerService {
} }
}.boxed(); }.boxed();
let combined_stream = select(initial_stream, live_stream); // Ensure all initial diffs are emitted before live updates, to avoid
// earlier files being abbreviated due to interleaving ordering.
let combined_stream = initial_stream.chain(live_stream);
Ok(combined_stream.boxed()) Ok(combined_stream.boxed())
} }
@@ -680,6 +776,8 @@ impl LocalContainerService {
task_branch: &str, task_branch: &str,
base_branch: &str, base_branch: &str,
changed_paths: &[String], changed_paths: &[String],
cumulative_bytes: &Arc<AtomicUsize>,
full_sent_paths: &Arc<std::sync::RwLock<HashSet<String>>>,
) -> Result<Vec<Event>, ContainerError> { ) -> Result<Vec<Event>, ContainerError> {
let path_filter: Vec<&str> = changed_paths.iter().map(|s| s.as_str()).collect(); let path_filter: Vec<&str> = changed_paths.iter().map(|s| s.as_str()).collect();
@@ -696,9 +794,66 @@ impl LocalContainerService {
let mut files_with_diffs = HashSet::new(); let mut files_with_diffs = HashSet::new();
// Add/update files that have diffs // Add/update files that have diffs
for diff in current_diffs { for mut diff in current_diffs {
let file_path = GitService::diff_path(&diff); let file_path = GitService::diff_path(&diff);
files_with_diffs.insert(file_path.clone()); files_with_diffs.insert(file_path.clone());
// Apply stream-level omit policy (affects contents and stats)
// Note: we can't call self methods from static fn; implement inline
{
// Compute size
let mut size = 0usize;
if let Some(ref s) = diff.old_content {
size += s.len();
}
if let Some(ref s) = diff.new_content {
size += s.len();
}
if size > 0 {
let current = cumulative_bytes.load(Ordering::Relaxed);
if current.saturating_add(size)
> LocalContainerService::MAX_CUMULATIVE_DIFF_BYTES
{
if diff.additions.is_none() && diff.deletions.is_none() {
let old = diff.old_content.as_deref().unwrap_or("");
let new = diff.new_content.as_deref().unwrap_or("");
let hunk = create_unified_diff_hunk(old, new);
let mut add = 0usize;
let mut del = 0usize;
for line in hunk.lines() {
if let Some(first) = line.chars().next() {
if first == '+' {
add += 1;
} else if first == '-' {
del += 1;
}
}
}
diff.additions = Some(add);
diff.deletions = Some(del);
}
diff.old_content = None;
diff.new_content = None;
diff.content_omitted = true;
} else {
let _ = cumulative_bytes.fetch_add(size, Ordering::Relaxed);
}
}
}
// If this diff would be omitted and we already sent a full-content
// version of this path earlier in the stream, skip sending a
// degrading replacement.
if diff.content_omitted {
if full_sent_paths.read().unwrap().contains(&file_path) {
continue;
}
} else {
// Track that we have sent a full-content version
{
let mut guard = full_sent_paths.write().unwrap();
guard.insert(file_path.clone());
}
}
let patch = ConversationPatch::add_diff(escape_json_pointer_segment(&file_path), diff); let patch = ConversationPatch::add_diff(escape_json_pointer_segment(&file_path), diff);
let event = LogMsg::JsonPatch(patch).to_sse_event(); let event = LogMsg::JsonPatch(patch).to_sse_event();
@@ -1108,7 +1263,7 @@ impl ContainerService for LocalContainerService {
&& !parent.exists() && !parent.exists()
{ {
std::fs::create_dir_all(parent).map_err(|e| { std::fs::create_dir_all(parent).map_err(|e| {
ContainerError::Other(anyhow!("Failed to create directory {:?}: {}", parent, e)) ContainerError::Other(anyhow!("Failed to create directory {parent:?}: {e}"))
})?; })?;
} }
@@ -1116,17 +1271,13 @@ impl ContainerService for LocalContainerService {
if source_file.exists() { if source_file.exists() {
std::fs::copy(&source_file, &target_file).map_err(|e| { std::fs::copy(&source_file, &target_file).map_err(|e| {
ContainerError::Other(anyhow!( ContainerError::Other(anyhow!(
"Failed to copy file {:?} to {:?}: {}", "Failed to copy file {source_file:?} to {target_file:?}: {e}"
source_file,
target_file,
e
)) ))
})?; })?;
tracing::info!("Copied file {:?} to worktree", file_path); tracing::info!("Copied file {:?} to worktree", file_path);
} else { } else {
return Err(ContainerError::Other(anyhow!( return Err(ContainerError::Other(anyhow!(
"File {:?} does not exist in the project directory", "File {source_file:?} does not exist in the project directory"
source_file
))); )));
} }
} }

View File

@@ -42,6 +42,10 @@ pub enum GitServiceError {
#[derive(Clone)] #[derive(Clone)]
pub struct GitService {} pub struct GitService {}
// Max inline diff size for UI (in bytes). Files larger than this will have
// their contents omitted from the diff stream to avoid UI crashes.
const MAX_INLINE_DIFF_BYTES: usize = 50 * 1024; // ~50KB
#[derive(Debug, Clone, Serialize, Deserialize, TS, PartialEq, Eq)] #[derive(Debug, Clone, Serialize, Deserialize, TS, PartialEq, Eq)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
#[ts(rename_all = "snake_case")] #[ts(rename_all = "snake_case")]
@@ -350,6 +354,7 @@ impl GitService {
) -> Result<Vec<Diff>, GitServiceError> { ) -> Result<Vec<Diff>, GitServiceError> {
let mut file_diffs = Vec::new(); let mut file_diffs = Vec::new();
let mut delta_index: usize = 0;
diff.foreach( diff.foreach(
&mut |delta, _| { &mut |delta, _| {
if delta.status() == Delta::Unreadable { if delta.status() == Delta::Unreadable {
@@ -358,24 +363,72 @@ impl GitService {
let status = delta.status(); let status = delta.status();
// Only build old_file for non-added entries // Decide if we should omit content due to size
let old_file = if matches!(status, Delta::Added) { let mut content_omitted = false;
None // Check old blob size when applicable
if !matches!(status, Delta::Added) {
let oid = delta.old_file().id();
if !oid.is_zero()
&& let Ok(blob) = repo.find_blob(oid)
&& !blob.is_binary()
&& blob.size() > MAX_INLINE_DIFF_BYTES
{
content_omitted = true;
}
}
// Check new blob size when applicable
if !matches!(status, Delta::Deleted) {
let oid = delta.new_file().id();
if !oid.is_zero()
&& let Ok(blob) = repo.find_blob(oid)
&& !blob.is_binary()
&& blob.size() > MAX_INLINE_DIFF_BYTES
{
content_omitted = true;
}
}
// Only build old/new content if not omitted
let (old_path, old_content) = if matches!(status, Delta::Added) {
(None, None)
} else { } else {
delta let path_opt = delta
.old_file() .old_file()
.path() .path()
.map(|p| self.create_file_details(p, &delta.old_file().id(), repo)) .map(|p| p.to_string_lossy().to_string());
if content_omitted {
(path_opt, None)
} else {
let details = delta
.old_file()
.path()
.map(|p| self.create_file_details(p, &delta.old_file().id(), repo));
(
details.as_ref().and_then(|f| f.file_name.clone()),
details.and_then(|f| f.content),
)
}
}; };
// Only build new_file for non-deleted entries let (new_path, new_content) = if matches!(status, Delta::Deleted) {
let new_file = if matches!(status, Delta::Deleted) { (None, None)
None
} else { } else {
delta let path_opt = delta
.new_file() .new_file()
.path() .path()
.map(|p| self.create_file_details(p, &delta.new_file().id(), repo)) .map(|p| p.to_string_lossy().to_string());
if content_omitted {
(path_opt, None)
} else {
let details = delta
.new_file()
.path()
.map(|p| self.create_file_details(p, &delta.new_file().id(), repo));
(
details.as_ref().and_then(|f| f.file_name.clone()),
details.and_then(|f| f.content),
)
}
}; };
let mut change = match status { let mut change = match status {
@@ -388,33 +441,40 @@ impl GitService {
_ => DiffChangeKind::Modified, _ => DiffChangeKind::Modified,
}; };
let old_path = old_file.as_ref().and_then(|f| f.file_name.clone());
let new_path = new_file.as_ref().and_then(|f| f.file_name.clone());
let old_content = old_file.and_then(|f| f.content);
let new_content = new_file.and_then(|f| f.content);
// Detect pure mode changes (e.g., chmod +/-x) and classify as PermissionChange // Detect pure mode changes (e.g., chmod +/-x) and classify as PermissionChange
if matches!(status, Delta::Modified) if matches!(status, Delta::Modified)
&& delta.old_file().mode() != delta.new_file().mode() && delta.old_file().mode() != delta.new_file().mode()
{ {
// If content unchanged or unavailable, prefer PermissionChange label // Only downgrade to PermissionChange if we KNOW content is unchanged
if old_content if old_content.is_some() && new_content.is_some() && old_content == new_content
.as_ref()
.zip(new_content.as_ref())
.is_none_or(|(o, n)| o == n)
{ {
change = DiffChangeKind::PermissionChange; change = DiffChangeKind::PermissionChange;
} }
} }
// If contents are omitted, try to compute line stats via libgit2 Patch
let mut additions: Option<usize> = None;
let mut deletions: Option<usize> = None;
if content_omitted
&& let Ok(Some(patch)) = git2::Patch::from_diff(&diff, delta_index)
&& let Ok((_ctx, adds, dels)) = patch.line_stats()
{
additions = Some(adds);
deletions = Some(dels);
}
file_diffs.push(Diff { file_diffs.push(Diff {
change, change,
old_path, old_path,
new_path, new_path,
old_content, old_content,
new_content, new_content,
content_omitted,
additions,
deletions,
}); });
delta_index += 1;
true true
}, },
None, None,
@@ -458,11 +518,11 @@ impl GitService {
} }
}; };
// Size guard - skip files larger than 1MB // Size guard - skip files larger than UI inline threshold
if bytes.len() > 1_048_576 { if bytes.len() > MAX_INLINE_DIFF_BYTES {
tracing::debug!( tracing::debug!(
"Skipping large file ({}MB): {:?}", "Skipping large file ({}KB): {:?}",
bytes.len() / 1_048_576, bytes.len() / 1024,
abs_path abs_path
); );
return None; return None;
@@ -544,34 +604,65 @@ impl GitService {
ChangeType::Unknown(_) => (e.old_path.clone(), Some(e.path.clone())), ChangeType::Unknown(_) => (e.old_path.clone(), Some(e.path.clone())),
}; };
// Load old content from base tree if possible // Decide if we should omit content by size (either side)
let old_content = if let Some(ref oldp) = old_path_opt { let mut content_omitted = false;
// Old side (from base tree)
if let Some(ref oldp) = old_path_opt {
let rel = std::path::Path::new(oldp); let rel = std::path::Path::new(oldp);
match base_tree.get_path(rel) { if let Ok(entry) = base_tree.get_path(rel)
Ok(entry) if entry.kind() == Some(git2::ObjectType::Blob) => repo && entry.kind() == Some(git2::ObjectType::Blob)
.find_blob(entry.id()) && let Ok(blob) = repo.find_blob(entry.id())
.ok() && !blob.is_binary()
.and_then(|b| Self::blob_to_string(&b)), && blob.size() > MAX_INLINE_DIFF_BYTES
_ => None, {
content_omitted = true;
} }
} else { }
None // New side (from filesystem)
}; if let Some(ref newp) = new_path_opt
&& let Some(workdir) = repo.workdir()
{
let abs = workdir.join(newp);
if let Ok(md) = std::fs::metadata(&abs)
&& (md.len() as usize) > MAX_INLINE_DIFF_BYTES
{
content_omitted = true;
}
}
// Load new content from filesystem (worktree) when available // Load contents only if not omitted
let new_content = if let Some(ref newp) = new_path_opt { let (old_content, new_content) = if content_omitted {
let rel = std::path::Path::new(newp); (None, None)
Self::read_file_to_string(repo, rel)
} else { } else {
None // Load old content from base tree if possible
let old_content = if let Some(ref oldp) = old_path_opt {
let rel = std::path::Path::new(oldp);
match base_tree.get_path(rel) {
Ok(entry) if entry.kind() == Some(git2::ObjectType::Blob) => repo
.find_blob(entry.id())
.ok()
.and_then(|b| Self::blob_to_string(&b)),
_ => None,
}
} else {
None
};
// Load new content from filesystem (worktree) when available
let new_content = if let Some(ref newp) = new_path_opt {
let rel = std::path::Path::new(newp);
Self::read_file_to_string(repo, rel)
} else {
None
};
(old_content, new_content)
}; };
// If reported as Modified but content is identical, treat as a permission-only change // If reported as Modified but content is identical, treat as a permission-only change
if matches!(change, DiffChangeKind::Modified) if matches!(change, DiffChangeKind::Modified)
&& old_content && old_content.is_some()
.as_ref() && new_content.is_some()
.zip(new_content.as_ref()) && old_content == new_content
.is_none_or(|(o, n)| o == n)
{ {
change = DiffChangeKind::PermissionChange; change = DiffChangeKind::PermissionChange;
} }
@@ -582,6 +673,9 @@ impl GitService {
new_path: new_path_opt, new_path: new_path_opt,
old_content, old_content,
new_content, new_content,
content_omitted,
additions: None,
deletions: None,
} }
} }

View File

@@ -20,6 +20,11 @@ pub struct Diff {
pub new_path: Option<String>, pub new_path: Option<String>,
pub old_content: Option<String>, pub old_content: Option<String>,
pub new_content: Option<String>, pub new_content: Option<String>,
/// True when file contents are intentionally omitted (e.g., too large)
pub content_omitted: bool,
/// Optional precomputed stats for omitted content
pub additions: Option<usize>,
pub deletions: Option<usize>,
} }
#[derive(Debug, Clone, Serialize, Deserialize, TS)] #[derive(Debug, Clone, Serialize, Deserialize, TS)]

View File

@@ -85,6 +85,7 @@ export default function DiffCard({
const newLang = const newLang =
getHighLightLanguageFromPath(newName || oldName || '') || 'plaintext'; getHighLightLanguageFromPath(newName || oldName || '') || 'plaintext';
const { label, Icon } = labelAndIcon(diff); const { label, Icon } = labelAndIcon(diff);
const isOmitted = !!diff.contentOmitted;
// Build a diff from raw contents so the viewer can expand beyond hunks // Build a diff from raw contents so the viewer can expand beyond hunks
const oldContentSafe = diff.oldContent || ''; const oldContentSafe = diff.oldContent || '';
@@ -92,7 +93,7 @@ export default function DiffCard({
const isContentEqual = oldContentSafe === newContentSafe; const isContentEqual = oldContentSafe === newContentSafe;
const diffFile = useMemo(() => { const diffFile = useMemo(() => {
if (isContentEqual) return null; if (isContentEqual || isOmitted) return null;
try { try {
const oldFileName = oldName || newName || 'unknown'; const oldFileName = oldName || newName || 'unknown';
const newFileName = newName || oldName || 'unknown'; const newFileName = newName || oldName || 'unknown';
@@ -112,6 +113,7 @@ export default function DiffCard({
} }
}, [ }, [
isContentEqual, isContentEqual,
isOmitted,
oldName, oldName,
newName, newName,
oldLang, oldLang,
@@ -120,8 +122,12 @@ export default function DiffCard({
newContentSafe, newContentSafe,
]); ]);
const add = diffFile?.additionLength ?? 0; const add = isOmitted
const del = diffFile?.deletionLength ?? 0; ? (diff.additions ?? 0)
: (diffFile?.additionLength ?? 0);
const del = isOmitted
? (diff.deletions ?? 0)
: (diffFile?.deletionLength ?? 0);
// Review functionality // Review functionality
const filePath = newName || oldName || 'unknown'; const filePath = newName || oldName || 'unknown';
@@ -293,13 +299,15 @@ export default function DiffCard({
className="px-4 pb-4 text-xs font-mono" className="px-4 pb-4 text-xs font-mono"
style={{ color: 'hsl(var(--muted-foreground) / 0.9)' }} style={{ color: 'hsl(var(--muted-foreground) / 0.9)' }}
> >
{isContentEqual {isOmitted
? diff.change === 'renamed' ? 'Content omitted due to file size. Open in editor to view.'
? 'File renamed with no content changes.' : isContentEqual
: diff.change === 'permissionChange' ? diff.change === 'renamed'
? 'File permission changed.' ? 'File renamed with no content changes.'
: 'No content changes to display.' : diff.change === 'permissionChange'
: 'Failed to render diff for this file.'} ? 'File permission changed.'
: 'No content changes to display.'
: 'Failed to render diff for this file.'}
</div> </div>
)} )}
</div> </div>

View File

@@ -14,6 +14,11 @@ export function useDiffSummary(attemptId: string | null) {
return diffs.reduce( return diffs.reduce(
(acc, d) => { (acc, d) => {
try { try {
if (d.contentOmitted) {
acc.added += d.additions ?? 0;
acc.deleted += d.deletions ?? 0;
return acc;
}
const oldName = d.oldPath || d.newPath || 'old'; const oldName = d.oldPath || d.newPath || 'old';
const newName = d.newPath || d.oldPath || 'new'; const newName = d.newPath || d.oldPath || 'new';
const oldContent = d.oldContent || ''; const oldContent = d.oldContent || '';

View File

@@ -110,7 +110,15 @@ export enum CheckTokenResponse { VALID = "VALID", INVALID = "INVALID" }
export type GitBranch = { name: string, is_current: boolean, is_remote: boolean, last_commit_date: Date, }; export type GitBranch = { name: string, is_current: boolean, is_remote: boolean, last_commit_date: Date, };
export type Diff = { change: DiffChangeKind, oldPath: string | null, newPath: string | null, oldContent: string | null, newContent: string | null, }; export type Diff = { change: DiffChangeKind, oldPath: string | null, newPath: string | null, oldContent: string | null, newContent: string | null,
/**
* True when file contents are intentionally omitted (e.g., too large)
*/
contentOmitted: boolean,
/**
* Optional precomputed stats for omitted content
*/
additions: number | null, deletions: number | null, };
export type DiffChangeKind = "added" | "deleted" | "modified" | "renamed" | "copied" | "permissionChange"; export type DiffChangeKind = "added" | "deleted" | "modified" | "renamed" | "copied" | "permissionChange";