Merge "Reimplement ioutil.ReadDir with a version that avoids calling lstat"
diff --git a/finder/fs/Android.bp b/finder/fs/Android.bp
index fe0a0d3..27e3c7d 100644
--- a/finder/fs/Android.bp
+++ b/finder/fs/Android.bp
@@ -21,6 +21,10 @@
     pkgPath: "android/soong/finder/fs",
     srcs: [
         "fs.go",
+        "readdir.go",
+    ],
+    testSrcs: [
+        "readdir_test.go",
     ],
     darwin: {
         srcs: [
diff --git a/finder/fs/fs.go b/finder/fs/fs.go
index 3de5486..9c138cd 100644
--- a/finder/fs/fs.go
+++ b/finder/fs/fs.go
@@ -75,8 +75,19 @@
 	IsDir() bool
 }
 
+type dirEntryInfo struct {
+	name       string
+	mode       os.FileMode
+	modeExists bool
+}
+
 var _ DirEntryInfo = os.FileInfo(nil)
 
+func (d *dirEntryInfo) Name() string      { return d.name }
+func (d *dirEntryInfo) Mode() os.FileMode { return d.mode }
+func (d *dirEntryInfo) IsDir() bool       { return d.mode.IsDir() }
+func (d *dirEntryInfo) String() string    { return d.name + ": " + d.mode.String() }
+
 // osFs implements FileSystem using the local disk.
 type osFs struct{}
 
@@ -89,7 +100,7 @@
 }
 
 func (osFs) ReadDir(path string) (contents []DirEntryInfo, err error) {
-	entries, err := ioutil.ReadDir(path)
+	entries, err := readdir(path)
 	if err != nil {
 		return nil, err
 	}
diff --git a/finder/fs/readdir.go b/finder/fs/readdir.go
new file mode 100644
index 0000000..f6d7813
--- /dev/null
+++ b/finder/fs/readdir.go
@@ -0,0 +1,219 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fs
+
+// This is based on the readdir implementation from Go 1.9:
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+import (
+	"os"
+	"syscall"
+	"unsafe"
+)
+
+const (
+	blockSize = 4096
+)
+
+func readdir(path string) ([]DirEntryInfo, error) {
+	f, err := os.Open(path)
+	defer f.Close()
+
+	if err != nil {
+		return nil, err
+	}
+	// This implicitly switches the fd to non-blocking mode, which is less efficient than what
+	// file.ReadDir does since it will keep a thread blocked and not just a goroutine.
+	fd := int(f.Fd())
+
+	buf := make([]byte, blockSize)
+	entries := make([]*dirEntryInfo, 0, 100)
+
+	for {
+		n, errno := syscall.ReadDirent(fd, buf)
+		if errno != nil {
+			err = os.NewSyscallError("readdirent", errno)
+			break
+		}
+		if n <= 0 {
+			break // EOF
+		}
+
+		entries = parseDirent(buf[:n], entries)
+	}
+
+	ret := make([]DirEntryInfo, 0, len(entries))
+
+	for _, entry := range entries {
+		if !entry.modeExists {
+			mode, lerr := lstatFileMode(path + "/" + entry.name)
+			if os.IsNotExist(lerr) {
+				// File disappeared between readdir + stat.
+				// Just treat it as if it didn't exist.
+				continue
+			}
+			if lerr != nil {
+				return ret, lerr
+			}
+			entry.mode = mode
+			entry.modeExists = true
+		}
+		ret = append(ret, entry)
+	}
+
+	return ret, err
+}
+
+func parseDirent(buf []byte, entries []*dirEntryInfo) []*dirEntryInfo {
+	for len(buf) > 0 {
+		reclen, ok := direntReclen(buf)
+		if !ok || reclen > uint64(len(buf)) {
+			return entries
+		}
+		rec := buf[:reclen]
+		buf = buf[reclen:]
+		ino, ok := direntIno(rec)
+		if !ok {
+			break
+		}
+		if ino == 0 { // File absent in directory.
+			continue
+		}
+		typ, ok := direntType(rec)
+		if !ok {
+			break
+		}
+		const namoff = uint64(unsafe.Offsetof(syscall.Dirent{}.Name))
+		namlen, ok := direntNamlen(rec)
+		if !ok || namoff+namlen > uint64(len(rec)) {
+			break
+		}
+		name := rec[namoff : namoff+namlen]
+
+		for i, c := range name {
+			if c == 0 {
+				name = name[:i]
+				break
+			}
+		}
+		// Check for useless names before allocating a string.
+		if string(name) == "." || string(name) == ".." {
+			continue
+		}
+
+		mode, modeExists := direntTypeToFileMode(typ)
+
+		entries = append(entries, &dirEntryInfo{string(name), mode, modeExists})
+	}
+	return entries
+}
+
+func direntIno(buf []byte) (uint64, bool) {
+	return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Ino), unsafe.Sizeof(syscall.Dirent{}.Ino))
+}
+
+func direntType(buf []byte) (uint64, bool) {
+	return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Type), unsafe.Sizeof(syscall.Dirent{}.Type))
+}
+
+func direntReclen(buf []byte) (uint64, bool) {
+	return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen))
+}
+
+func direntNamlen(buf []byte) (uint64, bool) {
+	reclen, ok := direntReclen(buf)
+	if !ok {
+		return 0, false
+	}
+	return reclen - uint64(unsafe.Offsetof(syscall.Dirent{}.Name)), true
+}
+
+// readInt returns the size-bytes unsigned integer in native byte order at offset off.
+func readInt(b []byte, off, size uintptr) (u uint64, ok bool) {
+	if len(b) < int(off+size) {
+		return 0, false
+	}
+	return readIntLE(b[off:], size), true
+}
+
+func readIntLE(b []byte, size uintptr) uint64 {
+	switch size {
+	case 1:
+		return uint64(b[0])
+	case 2:
+		_ = b[1] // bounds check hint to compiler; see golang.org/issue/14808
+		return uint64(b[0]) | uint64(b[1])<<8
+	case 4:
+		_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
+		return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24
+	case 8:
+		_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
+		return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+			uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+	default:
+		panic("syscall: readInt with unsupported size")
+	}
+}
+
+// If the directory entry doesn't specify the type, fall back to using lstat to get the type.
+func lstatFileMode(name string) (os.FileMode, error) {
+	stat, err := os.Lstat(name)
+	if err != nil {
+		return 0, err
+	}
+
+	return stat.Mode() & (os.ModeType | os.ModeCharDevice), nil
+}
+
+// from Linux and Darwin dirent.h
+const (
+	DT_UNKNOWN = 0
+	DT_FIFO    = 1
+	DT_CHR     = 2
+	DT_DIR     = 4
+	DT_BLK     = 6
+	DT_REG     = 8
+	DT_LNK     = 10
+	DT_SOCK    = 12
+)
+
+func direntTypeToFileMode(typ uint64) (os.FileMode, bool) {
+	exists := true
+	var mode os.FileMode
+	switch typ {
+	case DT_UNKNOWN:
+		exists = false
+	case DT_FIFO:
+		mode = os.ModeNamedPipe
+	case DT_CHR:
+		mode = os.ModeDevice | os.ModeCharDevice
+	case DT_DIR:
+		mode = os.ModeDir
+	case DT_BLK:
+		mode = os.ModeDevice
+	case DT_REG:
+		mode = 0
+	case DT_LNK:
+		mode = os.ModeSymlink
+	case DT_SOCK:
+		mode = os.ModeSocket
+	default:
+		exists = false
+	}
+
+	return mode, exists
+}
diff --git a/finder/fs/readdir_test.go b/finder/fs/readdir_test.go
new file mode 100644
index 0000000..24a6d18
--- /dev/null
+++ b/finder/fs/readdir_test.go
@@ -0,0 +1,312 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fs
+
+import (
+	"os"
+	"reflect"
+	"runtime"
+	"testing"
+)
+
+func TestParseDirent(t *testing.T) {
+	testCases := []struct {
+		name string
+		in   []byte
+		out  []*dirEntryInfo
+	}{
+		{
+			// Test that type DT_DIR is translated to os.ModeDir
+			name: "dir",
+			in: []byte{
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x28, 0x00,
+				// unsigned char d_type;
+				0x04,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
+				0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			},
+			out: []*dirEntryInfo{
+				{".module_paths", os.ModeDir, true},
+			},
+		},
+		{
+			// Test that type DT_REG is translated to a regular file
+			name: "file",
+			in: []byte{
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x28, 0x00,
+				// unsigned char d_type;
+				0x08,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
+				0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			},
+			out: []*dirEntryInfo{
+				{".module_paths", 0, true},
+			},
+		},
+		{
+			// Test that type DT_LNK is translated to a regular os.ModeSymlink
+			name: "symlink",
+			in: []byte{
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x28, 0x00,
+				// unsigned char d_type;
+				0x0a,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
+				0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			},
+			out: []*dirEntryInfo{
+				{".module_paths", os.ModeSymlink, true},
+			},
+		},
+		{
+			// Test that type DT_UNKNOWN sets modeExists: false
+			name: "unknown",
+			in: []byte{
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x28, 0x00,
+				// unsigned char d_type;
+				0x00,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
+				0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			},
+			out: []*dirEntryInfo{
+				{".module_paths", 0, false},
+			},
+		},
+		{
+			// Test a name with no padding after the null terminator
+			name: "no padding",
+			in: []byte{
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x20, 0x00,
+				// unsigned char d_type;
+				0x04,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
+			},
+			out: []*dirEntryInfo{
+				{".module_path", os.ModeDir, true},
+			},
+		},
+		{
+			// Test two sequential entries
+			name: "two entries",
+			in: []byte{
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x28, 0x00,
+				// unsigned char d_type;
+				0x04,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
+				0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x28, 0x00,
+				// unsigned char d_type;
+				0x04,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x74,
+				0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			},
+			out: []*dirEntryInfo{
+				{".module_paths", os.ModeDir, true},
+				{".module_patht", os.ModeDir, true},
+			},
+		},
+		{
+			// Test two sequential entries with no padding between them
+			name: "two entries no padding",
+			in: []byte{
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x20, 0x00,
+				// unsigned char d_type;
+				0x04,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
+
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x28, 0x00,
+				// unsigned char d_type;
+				0x04,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
+				0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			},
+			out: []*dirEntryInfo{
+				{".module_path", os.ModeDir, true},
+				{".module_paths", os.ModeDir, true},
+			},
+		},
+		{
+			// Test an empty buffer.  This shouldn't happen in practice because
+			// readdir doesn't call parseDirent if no bytes were returned.
+			name: "empty",
+			in:   []byte{},
+			out:  nil,
+		},
+		{
+			name: "missing null terminator",
+			in: []byte{
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x20, 0x00,
+				// unsigned char d_type;
+				0x04,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
+			},
+			out: []*dirEntryInfo{
+				{".module_paths", os.ModeDir, true},
+			},
+		},
+		{
+			// Test two sequential entries where the first has an incorrect d_reclen.
+			// Should return with no entries.
+			name: "two entries first malformed",
+			in: []byte{
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x10, 0x00,
+				// unsigned char d_type;
+				0x04,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
+
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x28, 0x00,
+				// unsigned char d_type;
+				0x04,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
+				0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			},
+			out: nil,
+		},
+		{
+			// Test two sequential entries where the second has an incorrect d_reclen.
+			// Should return the first entry.
+			name: "two entries second malformed",
+			in: []byte{
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x28, 0x00,
+				// unsigned char d_type;
+				0x04,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
+
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x10, 0x00,
+				// unsigned char d_type;
+				0x04,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
+				0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			},
+			out: []*dirEntryInfo{
+				{".module_path", os.ModeDir, true},
+			},
+		},
+		{
+			// Test a reclen that goes past the end of the buffer.
+			name: "overrun",
+			in: []byte{
+				// __ino64_t d_ino;
+				0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
+				// __off64_t d_off;
+				0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
+				// unsigned short int d_reclen;
+				0x30, 0x00,
+				// unsigned char d_type;
+				0x04,
+				// char d_name[];
+				0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
+			},
+			out: nil,
+		},
+	}
+
+	if runtime.GOOS != "linux" {
+		t.Skip("depends on Linux definitions of syscall.Dirent")
+	}
+
+	for _, testCase := range testCases {
+		t.Run(testCase.name, func(t *testing.T) {
+			entries := parseDirent(testCase.in, nil)
+			if !reflect.DeepEqual(testCase.out, entries) {
+				t.Fatalf("expected:\n %v\ngot:\n %v\n", testCase.out, entries)
+			}
+		})
+	}
+}