blob: 13ab78211ed13d37e5b627fd75d1efb135f8637c [file] [log] [blame]
Carlos Eduardo Seo6e6a95b2023-02-28 02:55:22 +00001#!/bin/bash
2
3# Find duplicate shared libraries by md5 checksum and possible duplicates by size.
4# Results will be available in the out directory of the build.
5# Usage:
6# ./dupcheck.sh <out_dir> <image>
7
8OUT_DIR="$1"
9IMG="$2"
10TMP_MD5="${OUT_DIR}/_dup_md5"
11TMP_SIZE="${OUT_DIR}/_dup_size"
12TMP_CHECK="${OUT_DIR}/_dup_tmp_check"
13TMP_SIZE_REAL="${OUT_DIR}/_dup_size_real"
14TMP_FILE1="${OUT_DIR}/_dup_f1"
15TMP_FILE2="${OUT_DIR}/_dup_f2"
16MD5_DUPLICATES="${OUT_DIR}/duplicate-libs-md5-${IMG}.txt"
17SIZE_DUPLICATES="${OUT_DIR}/duplicate-libs-size-${IMG}.txt"
18
19# Check arguments
20if [ "$#" -ne 2 ]; then
21 echo "Usage: ./dupcheck.sh <out_dir> <image>"
22 exit 1
23fi
24
25# Check host and toolchain version
26CHECK_HOST=$(uname)
27if [ "${CHECK_HOST}" == "Linux" ]; then
28 ARCH="linux-x86"
29else
30 ARCH="darwin-x86"
31fi
32BINUTILS_PATH="./prebuilts/clang/host/${ARCH}/llvm-binutils-stable"
33
34# Remove any old files if they exist.
35if [ -f "${MD5_DUPLICATES}" ]; then
36 rm "${MD5_DUPLICATES}"
37fi
38
39if [ -f "${SIZE_DUPLICATES}" ]; then
40 rm "${SIZE_DUPLICATES}"
41fi
42
43# Find all .so files and calculate their md5.
44find ./"${OUT_DIR}"/${IMG}/ -name "lib*.so" -type f -print0 | xargs -0 md5sum | sed -e "s# .*/# #" | sort | uniq -c | sort -g | sed "/^.*1 /d" | sed "s/^. *[0-9] //" > "${TMP_MD5}" 2>&1
45
46if [ -s "${TMP_MD5}" ]; then
47 while read -r list; do
48 checksum=$(echo "${list}" | cut -f1 -d ' ')
49 filename=$(echo "${list}" | cut -f2 -d ' ')
50 # For each md5, list the file paths that match.
51 {
52 echo "MD5: ${checksum}"; \
53 find ./"${OUT_DIR}"/${IMG}/ -name "${filename}" -type f -print0 | xargs -0 md5sum | grep "${checksum}" | sed 's/^.* //'; \
54 echo ""; \
55 } >> "${MD5_DUPLICATES}"
56 done <"${TMP_MD5}"
57else
58 echo "No duplicate files by md5 found." >> "${MD5_DUPLICATES}"
59fi
60
61# Cleanup
62rm "${TMP_MD5}"
63
64# Find possible duplicate .so files by size.
65find ./"${OUT_DIR}"/${IMG}/ -name "*.so" -type f -print0 | xargs -0 stat --format="%s %n" 2>/dev/null | sed -e "s# .*/# #" | sort | uniq -c | sort -g | sed "/^.*1 /d" > "${TMP_SIZE}" 2>&1
66if [ -s "${TMP_SIZE}" ]; then
67 while read -r list; do
68 size=$(echo "${list}" | cut -f2 -d ' ')
69 filename=$(echo "${list}" | cut -f3 -d ' ')
70 # Check if the files are not in the md5sum list and do nothing if that is the case.
71 find ./"${OUT_DIR}"/${IMG}/ -name "${filename}" -type f -print0 | xargs -0 stat --format="%s %n" 2>/dev/null | grep "${size}" | sed "s/^.* //" | sort > "${TMP_CHECK}" 2>&1
72 while read -r filepath; do
73 found=$(grep -F "${filepath}" "${MD5_DUPLICATES}")
74 if [ -z "${found}" ]; then
75 echo "${filepath}" >> "${TMP_SIZE_REAL}"
76 fi
77 done<"${TMP_CHECK}"
78 # For every duplication found, diff the .note and .text sections.
79 if [ -s "${TMP_SIZE_REAL}" ]; then
80 {
81 echo "File: ${filename}, Size: ${size}"; \
82 cat "${TMP_SIZE_REAL}"; \
83 echo ""; \
84 } >> "${SIZE_DUPLICATES}"
85 count=$(wc -l "${TMP_SIZE_REAL}" | cut -f1 -d ' ')
86 # Limitation: this only works for file pairs. If more than two possible duplications are found, the user need to check manually
87 # all the possible combinations using the llvm-readelf and llvm-objdump commands below.
88 if [ "${count}" = 2 ]; then
89 file1=$(head -n 1 "${TMP_SIZE_REAL}")
90 file2=$(tail -n 1 "${TMP_SIZE_REAL}")
91 # Check .note section
92 ${BINUTILS_PATH}/llvm-readelf --wide --notes "${file1}" > "${TMP_FILE1}" 2>&1
93 ${BINUTILS_PATH}/llvm-readelf --wide --notes "${file2}" > "${TMP_FILE2}" 2>&1
94 {
95 diff -u "${TMP_FILE1}" "${TMP_FILE2}" | sed "1d;2d;3d"; \
96 echo "";
97 } >> "${SIZE_DUPLICATES}"
98 # Check .text section
99 ${BINUTILS_PATH}/llvm-objdump --line-numbers --disassemble --demangle --reloc --no-show-raw-insn --section=.text "${file1}" | sed "1d;2d"> "${TMP_FILE1}" 2>&1
100 ${BINUTILS_PATH}/llvm-objdump --line-numbers --disassemble --demangle --reloc --no-show-raw-insn --section=.text "${file2}" | sed "1d;2d"> "${TMP_FILE2}" 2>&1
101 {
102 diff -u "${TMP_FILE1}" "${TMP_FILE2}" | sed "1d;2d;3d"; \
103 echo "";
104 } >> "${SIZE_DUPLICATES}"
105 # Cleanup
106 rm "${TMP_FILE1}" "${TMP_FILE2}"
107 else
108 echo "*Note: more than one duplicate. Manually verify all possible combinations." >> "${SIZE_DUPLICATES}"
109 fi
110 rm "${TMP_SIZE_REAL}"
111 echo "" >> "${SIZE_DUPLICATES}"
112 fi
113 done <"${TMP_SIZE}"
114 # Cleanup
115 rm "${TMP_SIZE}" "${TMP_CHECK}"
116else
117 echo "No duplicate files by size found." >> "${SIZE_DUPLICATES}"
118fi