Carlos Eduardo Seo | 6e6a95b | 2023-02-28 02:55:22 +0000 | [diff] [blame^] | 1 | #!/bin/bash |
| 2 | |
| 3 | # Find duplicate shared libraries by md5 checksum and possible duplicates by size. |
| 4 | # Results will be available in the out directory of the build. |
| 5 | # Usage: |
| 6 | # ./dupcheck.sh <out_dir> <image> |
| 7 | |
| 8 | OUT_DIR="$1" |
| 9 | IMG="$2" |
| 10 | TMP_MD5="${OUT_DIR}/_dup_md5" |
| 11 | TMP_SIZE="${OUT_DIR}/_dup_size" |
| 12 | TMP_CHECK="${OUT_DIR}/_dup_tmp_check" |
| 13 | TMP_SIZE_REAL="${OUT_DIR}/_dup_size_real" |
| 14 | TMP_FILE1="${OUT_DIR}/_dup_f1" |
| 15 | TMP_FILE2="${OUT_DIR}/_dup_f2" |
| 16 | MD5_DUPLICATES="${OUT_DIR}/duplicate-libs-md5-${IMG}.txt" |
| 17 | SIZE_DUPLICATES="${OUT_DIR}/duplicate-libs-size-${IMG}.txt" |
| 18 | |
| 19 | # Check arguments |
| 20 | if [ "$#" -ne 2 ]; then |
| 21 | echo "Usage: ./dupcheck.sh <out_dir> <image>" |
| 22 | exit 1 |
| 23 | fi |
| 24 | |
| 25 | # Check host and toolchain version |
| 26 | CHECK_HOST=$(uname) |
| 27 | if [ "${CHECK_HOST}" == "Linux" ]; then |
| 28 | ARCH="linux-x86" |
| 29 | else |
| 30 | ARCH="darwin-x86" |
| 31 | fi |
| 32 | BINUTILS_PATH="./prebuilts/clang/host/${ARCH}/llvm-binutils-stable" |
| 33 | |
| 34 | # Remove any old files if they exist. |
| 35 | if [ -f "${MD5_DUPLICATES}" ]; then |
| 36 | rm "${MD5_DUPLICATES}" |
| 37 | fi |
| 38 | |
| 39 | if [ -f "${SIZE_DUPLICATES}" ]; then |
| 40 | rm "${SIZE_DUPLICATES}" |
| 41 | fi |
| 42 | |
| 43 | # Find all .so files and calculate their md5. |
| 44 | find ./"${OUT_DIR}"/${IMG}/ -name "lib*.so" -type f -print0 | xargs -0 md5sum | sed -e "s# .*/# #" | sort | uniq -c | sort -g | sed "/^.*1 /d" | sed "s/^. *[0-9] //" > "${TMP_MD5}" 2>&1 |
| 45 | |
| 46 | if [ -s "${TMP_MD5}" ]; then |
| 47 | while read -r list; do |
| 48 | checksum=$(echo "${list}" | cut -f1 -d ' ') |
| 49 | filename=$(echo "${list}" | cut -f2 -d ' ') |
| 50 | # For each md5, list the file paths that match. |
| 51 | { |
| 52 | echo "MD5: ${checksum}"; \ |
| 53 | find ./"${OUT_DIR}"/${IMG}/ -name "${filename}" -type f -print0 | xargs -0 md5sum | grep "${checksum}" | sed 's/^.* //'; \ |
| 54 | echo ""; \ |
| 55 | } >> "${MD5_DUPLICATES}" |
| 56 | done <"${TMP_MD5}" |
| 57 | else |
| 58 | echo "No duplicate files by md5 found." >> "${MD5_DUPLICATES}" |
| 59 | fi |
| 60 | |
| 61 | # Cleanup |
| 62 | rm "${TMP_MD5}" |
| 63 | |
| 64 | # Find possible duplicate .so files by size. |
| 65 | find ./"${OUT_DIR}"/${IMG}/ -name "*.so" -type f -print0 | xargs -0 stat --format="%s %n" 2>/dev/null | sed -e "s# .*/# #" | sort | uniq -c | sort -g | sed "/^.*1 /d" > "${TMP_SIZE}" 2>&1 |
| 66 | if [ -s "${TMP_SIZE}" ]; then |
| 67 | while read -r list; do |
| 68 | size=$(echo "${list}" | cut -f2 -d ' ') |
| 69 | filename=$(echo "${list}" | cut -f3 -d ' ') |
| 70 | # Check if the files are not in the md5sum list and do nothing if that is the case. |
| 71 | find ./"${OUT_DIR}"/${IMG}/ -name "${filename}" -type f -print0 | xargs -0 stat --format="%s %n" 2>/dev/null | grep "${size}" | sed "s/^.* //" | sort > "${TMP_CHECK}" 2>&1 |
| 72 | while read -r filepath; do |
| 73 | found=$(grep -F "${filepath}" "${MD5_DUPLICATES}") |
| 74 | if [ -z "${found}" ]; then |
| 75 | echo "${filepath}" >> "${TMP_SIZE_REAL}" |
| 76 | fi |
| 77 | done<"${TMP_CHECK}" |
| 78 | # For every duplication found, diff the .note and .text sections. |
| 79 | if [ -s "${TMP_SIZE_REAL}" ]; then |
| 80 | { |
| 81 | echo "File: ${filename}, Size: ${size}"; \ |
| 82 | cat "${TMP_SIZE_REAL}"; \ |
| 83 | echo ""; \ |
| 84 | } >> "${SIZE_DUPLICATES}" |
| 85 | count=$(wc -l "${TMP_SIZE_REAL}" | cut -f1 -d ' ') |
| 86 | # Limitation: this only works for file pairs. If more than two possible duplications are found, the user need to check manually |
| 87 | # all the possible combinations using the llvm-readelf and llvm-objdump commands below. |
| 88 | if [ "${count}" = 2 ]; then |
| 89 | file1=$(head -n 1 "${TMP_SIZE_REAL}") |
| 90 | file2=$(tail -n 1 "${TMP_SIZE_REAL}") |
| 91 | # Check .note section |
| 92 | ${BINUTILS_PATH}/llvm-readelf --wide --notes "${file1}" > "${TMP_FILE1}" 2>&1 |
| 93 | ${BINUTILS_PATH}/llvm-readelf --wide --notes "${file2}" > "${TMP_FILE2}" 2>&1 |
| 94 | { |
| 95 | diff -u "${TMP_FILE1}" "${TMP_FILE2}" | sed "1d;2d;3d"; \ |
| 96 | echo ""; |
| 97 | } >> "${SIZE_DUPLICATES}" |
| 98 | # Check .text section |
| 99 | ${BINUTILS_PATH}/llvm-objdump --line-numbers --disassemble --demangle --reloc --no-show-raw-insn --section=.text "${file1}" | sed "1d;2d"> "${TMP_FILE1}" 2>&1 |
| 100 | ${BINUTILS_PATH}/llvm-objdump --line-numbers --disassemble --demangle --reloc --no-show-raw-insn --section=.text "${file2}" | sed "1d;2d"> "${TMP_FILE2}" 2>&1 |
| 101 | { |
| 102 | diff -u "${TMP_FILE1}" "${TMP_FILE2}" | sed "1d;2d;3d"; \ |
| 103 | echo ""; |
| 104 | } >> "${SIZE_DUPLICATES}" |
| 105 | # Cleanup |
| 106 | rm "${TMP_FILE1}" "${TMP_FILE2}" |
| 107 | else |
| 108 | echo "*Note: more than one duplicate. Manually verify all possible combinations." >> "${SIZE_DUPLICATES}" |
| 109 | fi |
| 110 | rm "${TMP_SIZE_REAL}" |
| 111 | echo "" >> "${SIZE_DUPLICATES}" |
| 112 | fi |
| 113 | done <"${TMP_SIZE}" |
| 114 | # Cleanup |
| 115 | rm "${TMP_SIZE}" "${TMP_CHECK}" |
| 116 | else |
| 117 | echo "No duplicate files by size found." >> "${SIZE_DUPLICATES}" |
| 118 | fi |