tools/tree-diff: Use hash for content diffs
We need to know the exact difference of modified files in both trees. Outputting the whole files into a diff might make a huge diff file, therefore only their hashes are written.
This commit is contained in:
parent
9fd9270c53
commit
fd2a20d247
1 changed files with 19 additions and 13 deletions
32
tree-diff
32
tree-diff
|
|
@ -1,10 +1,22 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def hash_file(fd):
|
||||||
|
BLOCK_SIZE = 4096
|
||||||
|
hasher = hashlib.sha256()
|
||||||
|
buf = os.read(fd, BLOCK_SIZE)
|
||||||
|
while len(buf) > 0:
|
||||||
|
hasher.update(buf)
|
||||||
|
buf = os.read(fd, BLOCK_SIZE)
|
||||||
|
|
||||||
|
return f"sha256:{hasher.hexdigest()}"
|
||||||
|
|
||||||
|
|
||||||
def stat_diff(stat1, stat2, path, differences):
|
def stat_diff(stat1, stat2, path, differences):
|
||||||
if stat1.st_mode != stat2.st_mode:
|
if stat1.st_mode != stat2.st_mode:
|
||||||
props = differences.setdefault(path, {})
|
props = differences.setdefault(path, {})
|
||||||
|
|
@ -32,11 +44,7 @@ def selinux_diff(path1, path2, path, differences):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def content_diff(name, dir_fd1, dir_fd2, size1, size2, path, differences):
|
def content_diff(name, dir_fd1, dir_fd2, path, differences):
|
||||||
if size1 != size2:
|
|
||||||
props = differences.setdefault(path, {})
|
|
||||||
props["content"] = "different"
|
|
||||||
return
|
|
||||||
try:
|
try:
|
||||||
fd1 = os.open(name, flags=os.O_RDONLY, dir_fd=dir_fd1)
|
fd1 = os.open(name, flags=os.O_RDONLY, dir_fd=dir_fd1)
|
||||||
except OSError:
|
except OSError:
|
||||||
|
|
@ -47,12 +55,12 @@ def content_diff(name, dir_fd1, dir_fd2, size1, size2, path, differences):
|
||||||
os.close(fd1)
|
os.close(fd1)
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
for (byte_block1, byte_block2) in zip(iter(lambda f=fd1: os.read(f, 4096), b""),
|
hash1 = hash_file(fd1)
|
||||||
iter(lambda f=fd2: os.read(f, 4096), b"")):
|
hash2 = hash_file(fd2)
|
||||||
if byte_block1 != byte_block2:
|
|
||||||
props = differences.setdefault(path, {})
|
if hash1 != hash2:
|
||||||
props["content"] = "different"
|
props = differences.setdefault(path, {})
|
||||||
break
|
props["content"] = [hash1, hash2]
|
||||||
finally:
|
finally:
|
||||||
os.close(fd1)
|
os.close(fd1)
|
||||||
os.close(fd2)
|
os.close(fd2)
|
||||||
|
|
@ -106,8 +114,6 @@ def diff_aux(dir_fd1, dir_fd2, path, report):
|
||||||
content_diff(dirent.name,
|
content_diff(dirent.name,
|
||||||
dir_fd1,
|
dir_fd1,
|
||||||
dir_fd2,
|
dir_fd2,
|
||||||
stat1.st_size,
|
|
||||||
stat2.st_size,
|
|
||||||
os.path.join(path, dirent.name),
|
os.path.join(path, dirent.name),
|
||||||
report["differences"])
|
report["differences"])
|
||||||
elif dirent.is_dir(follow_symlinks=False):
|
elif dirent.is_dir(follow_symlinks=False):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue