Deep dive into Git rebase and merge strategies, when to use each, and how to maintain a clean, readable commit history in collaborative projects.
The eternal debate: rebase or merge? After years of managing complex Git repositories and cleaning up countless messy histories, I've learned there's no one-size-fits-all answer. The key is understanding when and how to use each strategy. Here's your complete guide to mastering both approaches and maintaining a pristine Git history.
Understanding the Fundamentals
Rebase vs Merge Visualization
# git_history_visualizer.py
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.patches import FancyBboxPatch
import numpy as np
class GitHistoryVisualizer:
def visualize_merge_vs_rebase(self):
"""Visualize the difference between merge and rebase"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
# Merge visualization
ax1.set_title("Merge Strategy", fontsize=14, fontweight='bold')
self.draw_merge_history(ax1)
# Rebase visualization
ax2.set_title("Rebase Strategy", fontsize=14, fontweight='bold')
self.draw_rebase_history(ax2)
plt.tight_layout()
plt.show()
def draw_merge_history(self, ax):
"""Draw merge commit history"""
# Main branch
ax.plot([0, 5], [0, 0], 'b-', linewidth=2, label='main')
ax.plot([0, 1, 2, 3, 4, 5], [0, 0, 0, 0, 0, 0], 'bo', markersize=8)
# Feature branch
ax.plot([1, 2, 3], [0, 1, 1], 'g-', linewidth=2, label='feature')
ax.plot([2, 3], [1, 1], 'go', markersize=8)
# Merge commit
ax.plot([3, 3], [0, 1], 'r--', linewidth=1)
ax.plot([3], [0], 'ro', markersize=10, label='merge commit')
ax.set_xlim(-0.5, 5.5)
ax.set_ylim(-0.5, 1.5)
ax.legend()
ax.set_xlabel('Time →')
ax.set_yticks([])
def draw_rebase_history(self, ax):
"""Draw rebase commit history"""
# Linear history after rebase
ax.plot([0, 5], [0, 0], 'b-', linewidth=2, label='main')
ax.plot([0, 1, 2, 3, 4, 5], [0, 0, 0, 0, 0, 0], 'bo', markersize=8)
# Rebased commits
ax.plot([3, 4], [0, 0], 'go', markersize=8, label='rebased commits')
ax.set_xlim(-0.5, 5.5)
ax.set_ylim(-0.5, 1.5)
ax.legend()
ax.set_xlabel('Time →')
ax.set_yticks([])
Interactive Rebase Mastery
Advanced Interactive Rebase
#!/bin/bash
# interactive_rebase_helper.sh
# Start interactive rebase with custom editor
interactive_rebase_with_automation() {
COMMITS_BACK=$1
# Create rebase script
cat > /tmp/rebase_script.sh << 'EOF'
#!/bin/bash
# Automated rebase script
# Read the rebase todo
TODO_FILE=$1
ORIGINAL=$(cat $TODO_FILE)
# Apply transformations
MODIFIED=$(echo "$ORIGINAL" | sed -e '
# Squash all "fixup!" commits
s/^pick \([a-f0-9]*\) fixup! /fixup \1 fixup! /g
# Squash all "squash!" commits
s/^pick \([a-f0-9]*\) squash! /squash \1 squash! /g
# Reword all "reword!" commits
s/^pick \([a-f0-9]*\) reword! /reword \1 reword! /g
# Drop all "drop!" commits
s/^pick \([a-f0-9]*\) drop! /drop \1 drop! /g
')
# Write modified todo
echo "$MODIFIED" > $TODO_FILE
EOF
chmod +x /tmp/rebase_script.sh
# Run interactive rebase with custom script
GIT_SEQUENCE_EDITOR=/tmp/rebase_script.sh git rebase -i HEAD~$COMMITS_BACK
}
# Smart commit squashing
smart_squash() {
# Analyze commits for automatic squashing
COMMITS=$(git log --oneline -n 20 --pretty=format:"%h %s")
echo "Analyzing commits for squashing..."
# Group related commits
declare -A commit_groups
while IFS= read -r commit; do
HASH=$(echo $commit | cut -d' ' -f1)
MSG=$(echo $commit | cut -d' ' -f2-)
# Extract ticket number or feature name
if [[ $MSG =~ \[([A-Z]+-[0-9]+)\] ]]; then
TICKET="${BASH_REMATCH[1]}"
commit_groups[$TICKET]+="$HASH "
fi
done <<< "$COMMITS"
# Generate squash commands
for ticket in "${!commit_groups[@]}"; do
HASHES=(${commit_groups[$ticket]})
if [ ${#HASHES[@]} -gt 1 ]; then
echo "Squashing ${#HASHES[@]} commits for $ticket"
# Create fixup commits for all but the first
for ((i=1; i<${#HASHES[@]}; i++)); do
git commit --fixup=${HASHES[0]}
done
fi
done
# Run autosquash
git rebase -i --autosquash HEAD~20
}
Rebase Workflow Automation
# rebase_workflow.py
import subprocess
import re
from typing import List, Dict
from dataclasses import dataclass
@dataclass
class Commit:
hash: str
message: str
author: str
files: List[str]
class RebaseWorkflow:
def __init__(self, repo_path: str):
self.repo_path = repo_path
def smart_rebase(self, target_branch: str = 'main'):
"""Intelligently rebase current branch onto target"""
# Fetch latest changes
self.run_git(['fetch', 'origin', target_branch])
# Analyze commits
commits = self.get_commits_to_rebase(target_branch)
# Group and organize commits
organized = self.organize_commits(commits)
# Generate rebase todo
todo = self.generate_rebase_todo(organized)
# Execute rebase
self.execute_interactive_rebase(todo, target_branch)
def get_commits_to_rebase(self, target: str) -> List[Commit]:
"""Get commits that need rebasing"""
output = self.run_git([
'log', f'{target}..HEAD',
'--pretty=format:%H|%s|%an',
'--name-only'
])
commits = []
lines = output.split('\n')
i = 0
while i < len(lines):
if '|' in lines[i]:
parts = lines[i].split('|')
files = []
i += 1
# Collect file names
while i < len(lines) and lines[i] and '|' not in lines[i]:
files.append(lines[i])
i += 1
commits.append(Commit(
hash=parts[0],
message=parts[1],
author=parts[2],
files=files
))
else:
i += 1
return commits
def organize_commits(self, commits: List[Commit]) -> List[List[Commit]]:
"""Organize commits into logical groups"""
groups = []
current_group = []
for commit in commits:
if self.should_start_new_group(commit, current_group):
if current_group:
groups.append(current_group)
current_group = [commit]
else:
current_group.append(commit)
if current_group:
groups.append(current_group)
return groups
def should_start_new_group(self, commit: Commit, group: List[Commit]) -> bool:
"""Determine if commit should start a new group"""
if not group:
return True
# Check if files overlap
commit_files = set(commit.files)
group_files = set()
for c in group:
group_files.update(c.files)
if not commit_files.intersection(group_files):
return True
# Check if it's a different feature
if self.extract_feature(commit.message) != self.extract_feature(group[0].message):
return True
return False
def extract_feature(self, message: str) -> str:
"""Extract feature identifier from commit message"""
# Look for ticket numbers
match = re.search(r'\[([A-Z]+-\d+)\]', message)
if match:
return match.group(1)
# Look for feature prefixes
match = re.search(r'^(feat|fix|refactor)\(([^)]+)\)', message)
if match:
return match.group(2)
return ""
def generate_rebase_todo(self, groups: List[List[Commit]]) -> str:
"""Generate rebase todo list"""
todo = []
for group in groups:
if len(group) == 1:
todo.append(f"pick {group[0].hash[:7]} {group[0].message}")
else:
# Keep first, squash rest
todo.append(f"pick {group[0].hash[:7]} {group[0].message}")
for commit in group[1:]:
if 'fixup' in commit.message.lower():
todo.append(f"fixup {commit.hash[:7]} {commit.message}")
else:
todo.append(f"squash {commit.hash[:7]} {commit.message}")
return '\n'.join(todo)
Merge Strategies
Advanced Merge Techniques
#!/bin/bash
# advanced_merge.sh
# Three-way merge with custom strategy
custom_three_way_merge() {
SOURCE=$1
TARGET=$2
echo "Performing custom three-way merge..."
# Find merge base
MERGE_BASE=$(git merge-base $SOURCE $TARGET)
echo "Merge base: $MERGE_BASE"
# Check for conflicts beforehand
git merge-tree $MERGE_BASE $TARGET $SOURCE > /tmp/merge_preview
if grep -q "<<<<<<< " /tmp/merge_preview; then
echo "Conflicts detected. Preparing resolution strategy..."
# Try different merge strategies
STRATEGIES=("recursive" "resolve" "ours" "theirs" "octopus")
for strategy in "${STRATEGIES[@]}"; do
echo "Trying strategy: $strategy"
# Create temporary branch
git checkout -b temp_merge_$strategy $TARGET
if git merge -s $strategy $SOURCE --no-commit 2>/dev/null; then
echo "Strategy $strategy succeeded!"
# Check the result
CONFLICTS=$(git diff --name-only --diff-filter=U | wc -l)
if [ $CONFLICTS -eq 0 ]; then
echo "No conflicts with strategy: $strategy"
git commit -m "Merge $SOURCE into $TARGET using $strategy strategy"
break
fi
fi
# Clean up
git merge --abort 2>/dev/null
git checkout $TARGET
git branch -D temp_merge_$strategy
done
else
echo "No conflicts detected. Proceeding with standard merge."
git merge $SOURCE
fi
}
# Octopus merge for multiple branches
octopus_merge() {
TARGET=$1
shift
BRANCHES=$@
echo "Performing octopus merge of branches: $BRANCHES"
git checkout $TARGET
# Verify all branches can be merged
for branch in $BRANCHES; do
if ! git merge-base --is-ancestor $(git merge-base $TARGET $branch) $branch; then
echo "Warning: $branch may cause conflicts"
fi
done
# Perform octopus merge
git merge $BRANCHES -m "Octopus merge: $(echo $BRANCHES | tr ' ' ', ')"
}
# Subtree merge for vendor code
subtree_merge() {
REMOTE_URL=$1
REMOTE_BRANCH=$2
LOCAL_PATH=$3
# Add remote
git remote add -f vendor $REMOTE_URL
# Merge as subtree
git merge -s ours --no-commit --allow-unrelated-histories vendor/$REMOTE_BRANCH
git read-tree --prefix=$LOCAL_PATH/ -u vendor/$REMOTE_BRANCH
git commit -m "Subtree merge: $REMOTE_URL at $LOCAL_PATH"
}
Conflict Resolution
Intelligent Conflict Resolution
# conflict_resolution.py
import difflib
import re
from typing import List, Tuple, Optional
class ConflictResolver:
def __init__(self):
self.resolution_strategies = {
'package.json': self.resolve_package_json,
'yarn.lock': self.resolve_lockfile,
'Gemfile.lock': self.resolve_lockfile,
'.gitignore': self.resolve_gitignore,
'CHANGELOG.md': self.resolve_changelog
}
def resolve_conflict(self, filepath: str, content: str) -> str:
"""Intelligently resolve merge conflicts"""
# Extract conflict sections
conflicts = self.extract_conflicts(content)
if not conflicts:
return content
# Check for specific file handlers
filename = filepath.split('/')[-1]
if filename in self.resolution_strategies:
return self.resolution_strategies[filename](content, conflicts)
# Generic resolution
return self.generic_resolve(content, conflicts)
def extract_conflicts(self, content: str) -> List[Tuple[str, str, str]]:
"""Extract conflict sections from content"""
pattern = r'<<<<<<< (.+?)\n(.*?)\n=======\n(.*?)\n>>>>>>> (.+?)\n'
conflicts = []
for match in re.finditer(pattern, content, re.DOTALL):
ours_branch = match.group(1)
ours_content = match.group(2)
theirs_content = match.group(3)
theirs_branch = match.group(4)
conflicts.append((ours_content, theirs_content, match.group(0)))
return conflicts
def resolve_package_json(self, content: str, conflicts: List) -> str:
"""Resolve package.json conflicts"""
import json
for ours, theirs, full_conflict in conflicts:
try:
ours_json = json.loads('{' + ours + '}')
theirs_json = json.loads('{' + theirs + '}')
# Merge dependencies
merged = {}
# Merge regular dependencies
if 'dependencies' in ours_json or 'dependencies' in theirs_json:
merged['dependencies'] = {
**ours_json.get('dependencies', {}),
**theirs_json.get('dependencies', {})
}
# Merge dev dependencies
if 'devDependencies' in ours_json or 'devDependencies' in theirs_json:
merged['devDependencies'] = {
**ours_json.get('devDependencies', {}),
**theirs_json.get('devDependencies', {})
}
# Take latest versions
for dep_type in ['dependencies', 'devDependencies']:
if dep_type in merged:
for package, version in merged[dep_type].items():
if package in ours_json.get(dep_type, {}) and package in theirs_json.get(dep_type, {}):
# Compare versions and take newer
ours_ver = ours_json[dep_type][package]
theirs_ver = theirs_json[dep_type][package]
merged[dep_type][package] = self.compare_versions(ours_ver, theirs_ver)
# Replace conflict with merged content
merged_str = json.dumps(merged, indent=2)[1:-1] # Remove outer braces
content = content.replace(full_conflict, merged_str)
except json.JSONDecodeError:
# Fall back to generic resolution
content = self.generic_resolve(content, [(ours, theirs, full_conflict)])
return content
def resolve_lockfile(self, content: str, conflicts: List) -> str:
"""Resolve lockfile conflicts by regenerating"""
# For lockfiles, we typically want to regenerate
print("Lockfile conflict detected. Regenerating...")
# Remove all conflict markers
for _, _, full_conflict in conflicts:
content = content.replace(full_conflict, '')
# Mark for regeneration
content = "# REGENERATE_LOCKFILE\n" + content
return content
def resolve_gitignore(self, content: str, conflicts: List) -> str:
"""Resolve .gitignore by combining both versions"""
for ours, theirs, full_conflict in conflicts:
# Combine both versions and remove duplicates
ours_lines = set(ours.strip().split('\n'))
theirs_lines = set(theirs.strip().split('\n'))
combined = sorted(ours_lines.union(theirs_lines))
merged = '\n'.join(combined)
content = content.replace(full_conflict, merged)
return content
def resolve_changelog(self, content: str, conflicts: List) -> str:
"""Resolve CHANGELOG.md by keeping both versions chronologically"""
for ours, theirs, full_conflict in conflicts:
# Keep both versions, latest first
merged = f"{theirs}\n\n{ours}"
content = content.replace(full_conflict, merged)
return content
def compare_versions(self, v1: str, v2: str) -> str:
"""Compare semantic versions and return newer"""
import packaging.version
try:
ver1 = packaging.version.parse(v1.replace('^', '').replace('~', ''))
ver2 = packaging.version.parse(v2.replace('^', '').replace('~', ''))
return v1 if ver1 >= ver2 else v2
except:
# If parsing fails, return the second (theirs)
return v2
History Cleanup Strategies
Git History Cleanup
#!/bin/bash
# history_cleanup.sh
# Clean up messy history before merging to main
cleanup_feature_branch() {
FEATURE_BRANCH=$1
BASE_BRANCH=${2:-main}
echo "Cleaning up $FEATURE_BRANCH history..."
git checkout $FEATURE_BRANCH
# Step 1: Squash fixup commits
echo "Step 1: Squashing fixup commits..."
git rebase -i --autosquash $BASE_BRANCH
# Step 2: Reorder commits logically
echo "Step 2: Reordering commits..."
GIT_SEQUENCE_EDITOR="sed -i -e '/^pick.*test/d' -e '/^pick.*fix/d'" git rebase -i $BASE_BRANCH
# Step 3: Combine related commits
echo "Step 3: Combining related commits..."
# Get commit list
COMMITS=$(git log --oneline $BASE_BRANCH..$FEATURE_BRANCH --pretty=format:"%h %s")
# Group by feature/component
declare -A groups
while IFS= read -r commit; do
HASH=$(echo $commit | cut -d' ' -f1)
MSG=$(echo $commit | cut -d' ' -f2-)
# Extract component from conventional commit
if [[ $MSG =~ ^[a-z]+\(([^)]+)\): ]]; then
COMPONENT="${BASH_REMATCH[1]}"
groups[$COMPONENT]+="$HASH "
fi
done <<< "$COMMITS"
# Create squash script
for component in "${!groups[@]}"; do
HASHES=(${groups[$component]})
if [ ${#HASHES[@]} -gt 1 ]; then
echo "Squashing ${#HASHES[@]} commits for component: $component"
fi
done
}
# Remove sensitive data from history
remove_sensitive_data() {
FILE_TO_REMOVE=$1
echo "Removing $FILE_TO_REMOVE from entire history..."
# Use BFG Repo Cleaner or git filter-branch
if command -v bfg &> /dev/null; then
bfg --delete-files $FILE_TO_REMOVE
else
git filter-branch --force --index-filter \
"git rm --cached --ignore-unmatch $FILE_TO_REMOVE" \
--prune-empty --tag-name-filter cat -- --all
fi
# Force push
echo "Force pushing cleaned history..."
git push origin --force --all
git push origin --force --tags
}
# Linearize history
linearize_history() {
BRANCH=$1
BASE=${2:-main}
echo "Linearizing history of $BRANCH..."
# Get all commits
COMMITS=$(git rev-list --reverse $BASE..$BRANCH)
# Create new branch
git checkout -b ${BRANCH}_linear $BASE
# Cherry-pick each commit
for commit in $COMMITS; do
echo "Cherry-picking $commit"
git cherry-pick $commit || {
echo "Conflict in $commit, please resolve and continue"
return 1
}
done
# Replace original branch
git branch -D $BRANCH
git branch -m ${BRANCH}_linear $BRANCH
}
Rebase Best Practices
Safe Rebase Practices
# safe_rebase.py
import subprocess
import os
from datetime import datetime
from typing import Optional
class SafeRebase:
def __init__(self, repo_path: str):
self.repo_path = repo_path
self.backup_dir = os.path.join(repo_path, '.git', 'rebase-backups')
os.makedirs(self.backup_dir, exist_ok=True)
def safe_rebase(self, target: str, interactive: bool = False) -> bool:
"""Perform rebase with safety checks and backups"""
# Check if we can rebase
if not self.can_rebase():
print("Cannot rebase: uncommitted changes detected")
return False
# Create backup
backup_ref = self.create_backup()
print(f"Created backup at: {backup_ref}")
try:
# Perform rebase
if interactive:
result = self.interactive_rebase(target)
else:
result = self.standard_rebase(target)
if result:
print("Rebase completed successfully")
self.verify_rebase(backup_ref)
else:
print("Rebase failed, restoring from backup...")
self.restore_backup(backup_ref)
return result
except Exception as e:
print(f"Error during rebase: {e}")
self.restore_backup(backup_ref)
return False
def can_rebase(self) -> bool:
"""Check if it's safe to rebase"""
# Check for uncommitted changes
status = subprocess.run(
['git', 'status', '--porcelain'],
cwd=self.repo_path,
capture_output=True,
text=True
)
if status.stdout.strip():
return False
# Check if we're already rebasing
rebase_dir = os.path.join(self.repo_path, '.git', 'rebase-merge')
if os.path.exists(rebase_dir):
return False
return True
def create_backup(self) -> str:
"""Create backup before rebase"""
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
current_branch = self.get_current_branch()
backup_name = f"backup_{current_branch}_{timestamp}"
# Create backup tag
subprocess.run(
['git', 'tag', backup_name],
cwd=self.repo_path
)
return backup_name
def restore_backup(self, backup_ref: str):
"""Restore from backup"""
# Abort any ongoing rebase
subprocess.run(
['git', 'rebase', '--abort'],
cwd=self.repo_path,
stderr=subprocess.DEVNULL
)
# Reset to backup
subprocess.run(
['git', 'reset', '--hard', backup_ref],
cwd=self.repo_path
)
print(f"Restored from backup: {backup_ref}")
def verify_rebase(self, backup_ref: str):
"""Verify rebase didn't lose commits"""
# Get commits from backup
backup_commits = subprocess.run(
['git', 'rev-list', backup_ref],
cwd=self.repo_path,
capture_output=True,
text=True
).stdout.strip().split('\n')
# Get current commits
current_commits = subprocess.run(
['git', 'rev-list', 'HEAD'],
cwd=self.repo_path,
capture_output=True,
text=True
).stdout.strip().split('\n')
# Check for lost commits
lost_commits = set(backup_commits) - set(current_commits)
if lost_commits:
print(f"Warning: {len(lost_commits)} commits may have been lost!")
print("Run 'git reflog' to investigate")
Merge vs Rebase Decision Matrix
Decision Framework
# merge_rebase_decision.py
from dataclasses import dataclass
from typing import List, Dict
@dataclass
class BranchContext:
name: str
is_public: bool
collaborators: int
commits_ahead: int
days_old: int
has_conflicts: bool
class MergeRebaseDecision:
def decide_strategy(self, context: BranchContext) -> Dict:
"""Decide whether to merge or rebase based on context"""
score_merge = 0
score_rebase = 0
reasons = []
# Public branch - prefer merge
if context.is_public:
score_merge += 5
reasons.append("Public branch - history should be preserved")
else:
score_rebase += 2
reasons.append("Private branch - safe to rewrite history")
# Multiple collaborators - prefer merge
if context.collaborators > 1:
score_merge += 3
reasons.append(f"{context.collaborators} collaborators - avoid rewriting shared history")
else:
score_rebase += 2
reasons.append("Single developer - rebase is safe")
# Many commits - consider squashing
if context.commits_ahead > 10:
score_rebase += 2
reasons.append(f"{context.commits_ahead} commits - consider squashing")
# Old branch - prefer rebase to update
if context.days_old > 7:
score_rebase += 2
reasons.append(f"Branch is {context.days_old} days old - rebase to get latest changes")
# Conflicts - merge might be safer
if context.has_conflicts:
score_merge += 1
reasons.append("Has conflicts - merge provides better conflict tracking")
strategy = "merge" if score_merge > score_rebase else "rebase"
return {
'strategy': strategy,
'merge_score': score_merge,
'rebase_score': score_rebase,
'reasons': reasons,
'command': self.get_command(strategy, context)
}
def get_command(self, strategy: str, context: BranchContext) -> str:
"""Get the appropriate git command"""
if strategy == "merge":
if context.is_public:
return "git merge --no-ff" # Preserve branch context
else:
return "git merge" # Simple merge
else:
if context.commits_ahead > 10:
return "git rebase -i" # Interactive for squashing
else:
return "git rebase" # Standard rebase
Recovery Strategies
Rebase and Merge Recovery
#!/bin/bash
# recovery.sh
# Recover from bad rebase
recover_from_bad_rebase() {
echo "Recovering from bad rebase..."
# Find the original branch tip
ORIGINAL_TIP=$(git reflog | grep -m1 "rebase: checkout" | cut -d' ' -f1)
if [ -z "$ORIGINAL_TIP" ]; then
echo "Could not find original branch tip"
echo "Manual recovery needed. Check git reflog"
return 1
fi
echo "Found original tip at: $ORIGINAL_TIP"
# Create recovery branch
git checkout -b recovery_branch $ORIGINAL_TIP
echo "Recovery branch created. Please verify and rename if correct"
}
# Undo a merge
undo_merge() {
MERGE_COMMIT=${1:-HEAD}
echo "Undoing merge at $MERGE_COMMIT..."
# Check if it's actually a merge commit
PARENT_COUNT=$(git show --no-patch --format=%P $MERGE_COMMIT | wc -w)
if [ $PARENT_COUNT -ne 2 ]; then
echo "Error: $MERGE_COMMIT is not a merge commit"
return 1
fi
# Revert the merge
git revert -m 1 $MERGE_COMMIT
echo "Merge reverted. You may want to rebase instead."
}
# Recover lost commits
recover_lost_commits() {
echo "Searching for lost commits..."
# Get all dangling commits
DANGLING=$(git fsck --lost-found | grep "dangling commit" | cut -d' ' -f3)
for commit in $DANGLING; do
echo "Found: $(git log --oneline -1 $commit)"
done
echo "To recover a commit, run: git cherry-pick <commit-hash>"
}
Best Practices Checklist
Conclusion
The merge vs rebase debate isn't about choosing one over the other—it's about using the right tool for the right situation. Master both techniques, understand their implications, and apply them strategically. A clean Git history isn't just about aesthetics; it's about maintainability, debuggability, and team productivity.