Makoto Onuki | 620e6ad | 2017-03-16 09:40:08 -0700 | [diff] [blame] | 1 | // Copyright 2017 Google Inc. All rights reserved. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | // fileslist.py replacement written in GO, which utilizes multi-cores. |
| 16 | |
| 17 | package main |
| 18 | |
| 19 | import ( |
| 20 | "crypto/sha256" |
| 21 | "encoding/json" |
| 22 | "flag" |
| 23 | "fmt" |
| 24 | "io" |
| 25 | "os" |
| 26 | "path/filepath" |
| 27 | "runtime" |
| 28 | "sort" |
| 29 | "strings" |
| 30 | "sync" |
| 31 | ) |
| 32 | |
| 33 | const ( |
| 34 | MAX_DEFAULT_PARA = 24 |
| 35 | ) |
| 36 | |
| 37 | func defaultPara() int { |
| 38 | ret := runtime.NumCPU() |
| 39 | if ret > MAX_DEFAULT_PARA { |
| 40 | return MAX_DEFAULT_PARA |
| 41 | } |
| 42 | return ret |
| 43 | } |
| 44 | |
| 45 | var ( |
| 46 | para = flag.Int("para", defaultPara(), "Number of goroutines") |
| 47 | ) |
| 48 | |
| 49 | // Represents each file. |
| 50 | type Node struct { |
| 51 | SHA256 string |
| 52 | Name string // device side path. |
| 53 | Size int64 |
| 54 | path string // host side path. |
| 55 | stat os.FileInfo |
| 56 | } |
| 57 | |
| 58 | func newNode(hostPath string, devicePath string, stat os.FileInfo) Node { |
| 59 | return Node{Name: devicePath, path: hostPath, stat: stat} |
| 60 | } |
| 61 | |
| 62 | // Scan a Node and returns true if it should be added to the result. |
| 63 | func (n *Node) scan() bool { |
| 64 | n.Size = n.stat.Size() |
| 65 | |
| 66 | // Calculate SHA256. |
Makoto Onuki | 620e6ad | 2017-03-16 09:40:08 -0700 | [diff] [blame] | 67 | h := sha256.New() |
Sen Jiang | 1b4141f | 2017-03-28 15:54:05 -0700 | [diff] [blame] | 68 | if n.stat.Mode()&os.ModeSymlink == 0 { |
| 69 | f, err := os.Open(n.path) |
| 70 | if err != nil { |
| 71 | panic(err) |
| 72 | } |
| 73 | defer f.Close() |
| 74 | |
| 75 | if _, err := io.Copy(h, f); err != nil { |
| 76 | panic(err) |
| 77 | } |
| 78 | } else { |
| 79 | // Hash the content of symlink, not the file it points to. |
| 80 | s, err := os.Readlink(n.path) |
| 81 | if err != nil { |
| 82 | panic(err) |
| 83 | } |
| 84 | if _, err := io.WriteString(h, s); err != nil { |
| 85 | panic(err) |
| 86 | } |
Makoto Onuki | 620e6ad | 2017-03-16 09:40:08 -0700 | [diff] [blame] | 87 | } |
| 88 | n.SHA256 = fmt.Sprintf("%x", h.Sum(nil)) |
| 89 | return true |
| 90 | } |
| 91 | |
| 92 | func main() { |
| 93 | flag.Parse() |
| 94 | |
| 95 | allOutput := make([]Node, 0, 1024) // Store all outputs. |
| 96 | mutex := &sync.Mutex{} // Guard allOutput |
| 97 | |
| 98 | ch := make(chan Node) // Pass nodes to goroutines. |
| 99 | |
| 100 | var wg sync.WaitGroup // To wait for all goroutines. |
| 101 | wg.Add(*para) |
| 102 | |
| 103 | // Scan files in multiple goroutines. |
| 104 | for i := 0; i < *para; i++ { |
| 105 | go func() { |
| 106 | defer wg.Done() |
| 107 | |
| 108 | output := make([]Node, 0, 1024) // Local output list. |
| 109 | for node := range ch { |
| 110 | if node.scan() { |
| 111 | output = append(output, node) |
| 112 | } |
| 113 | } |
| 114 | // Add to the global output list. |
| 115 | mutex.Lock() |
| 116 | allOutput = append(allOutput, output...) |
| 117 | mutex.Unlock() |
| 118 | }() |
| 119 | } |
| 120 | |
| 121 | // Walk the directories and find files to scan. |
| 122 | for _, dir := range flag.Args() { |
| 123 | absDir, err := filepath.Abs(dir) |
| 124 | if err != nil { |
| 125 | panic(err) |
| 126 | } |
| 127 | deviceRoot := filepath.Clean(absDir + "/..") |
| 128 | err = filepath.Walk(dir, func(path string, stat os.FileInfo, err error) error { |
| 129 | if err != nil { |
| 130 | panic(err) |
| 131 | } |
| 132 | if stat.IsDir() { |
| 133 | return nil |
| 134 | } |
| 135 | absPath, err := filepath.Abs(path) |
| 136 | if err != nil { |
| 137 | panic(err) |
| 138 | } |
| 139 | devicePath, err := filepath.Rel(deviceRoot, absPath) |
| 140 | if err != nil { |
| 141 | panic(err) |
| 142 | } |
| 143 | devicePath = "/" + devicePath |
| 144 | ch <- newNode(absPath, devicePath, stat) |
| 145 | return nil |
| 146 | }) |
| 147 | if err != nil { |
| 148 | panic(err) |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | // Wait until all the goroutines finish. |
| 153 | close(ch) |
| 154 | wg.Wait() |
| 155 | |
| 156 | // Sort the entries and dump as json. |
| 157 | sort.Slice(allOutput, func(i, j int) bool { |
| 158 | if allOutput[i].Size > allOutput[j].Size { |
| 159 | return true |
| 160 | } |
| 161 | if allOutput[i].Size == allOutput[j].Size && strings.Compare(allOutput[i].Name, allOutput[j].Name) > 0 { |
| 162 | return true |
| 163 | } |
| 164 | return false |
| 165 | }) |
| 166 | |
| 167 | j, err := json.MarshalIndent(allOutput, "", " ") |
| 168 | if err != nil { |
| 169 | panic(nil) |
| 170 | } |
| 171 | |
| 172 | fmt.Printf("%s\n", j) |
| 173 | } |