transcoding: fixes for binder died handling

Change client id type to uintptr_t counter, instead of
casting binder to int64_t.

Save all shared_ptrs of clients in global registry.

bug: 154734285
bug: 145233472

test: transcoding unit tests; manually plant crash
in test client and check binderDied handling in log.
Change-Id: If6523a1c6b7ce797a2201462399174e9cf0a3c71

Change-Id: I6b312c0f28a345285e27b738c37baee2bccae755
diff --git a/media/libmediatranscoding/TranscodingClientManager.cpp b/media/libmediatranscoding/TranscodingClientManager.cpp
index fd87cb4..1060791 100644
--- a/media/libmediatranscoding/TranscodingClientManager.cpp
+++ b/media/libmediatranscoding/TranscodingClientManager.cpp
@@ -32,6 +32,13 @@
 using Status = ::ndk::ScopedAStatus;
 using ::ndk::SpAIBinder;
 
+//static
+std::atomic<ClientIdType> TranscodingClientManager::sCookieCounter = 0;
+//static
+std::mutex TranscodingClientManager::sCookie2ClientLock;
+//static
+std::map<ClientIdType, std::shared_ptr<TranscodingClientManager::ClientImpl>>
+        TranscodingClientManager::sCookie2Client;
 ///////////////////////////////////////////////////////////////////////////////
 
 /**
@@ -56,7 +63,7 @@
     std::string mClientOpPackageName;
 
     // Next jobId to assign
-    std::atomic<std::int32_t> mNextJobId;
+    std::atomic<int32_t> mNextJobId;
     // Pointer to the client manager for this client
     TranscodingClientManager* mOwner;
 
@@ -81,7 +88,7 @@
         TranscodingClientManager* owner)
       : mClientBinder((callback != nullptr) ? callback->asBinder() : nullptr),
         mClientCallback(callback),
-        mClientId((int64_t)mClientBinder.get()),
+        mClientId(sCookieCounter.fetch_add(1, std::memory_order_relaxed)),
         mClientPid(pid),
         mClientUid(uid),
         mClientName(clientName),
@@ -142,9 +149,24 @@
 
 // static
 void TranscodingClientManager::BinderDiedCallback(void* cookie) {
-    ClientImpl* client = static_cast<ClientImpl*>(cookie);
-    ALOGD("Client %lld is dead", (long long)client->mClientId);
-    client->unregister();
+    ClientIdType clientId = reinterpret_cast<ClientIdType>(cookie);
+
+    ALOGD("Client %lld is dead", (long long)clientId);
+
+    std::shared_ptr<ClientImpl> client;
+
+    {
+        std::scoped_lock lock{sCookie2ClientLock};
+
+        auto it = sCookie2Client.find(clientId);
+        if (it != sCookie2Client.end()) {
+            client = it->second;
+        }
+    }
+
+    if (client != nullptr) {
+        client->unregister();
+    }
 }
 
 TranscodingClientManager::TranscodingClientManager(
@@ -191,25 +213,33 @@
         return BAD_VALUE;
     }
 
-    // Creates the client and uses its process id as client id.
-    std::shared_ptr<ClientImpl> client = ::ndk::SharedRefBase::make<ClientImpl>(
-            callback, pid, uid, clientName, opPackageName, this);
+    SpAIBinder binder = callback->asBinder();
 
     std::scoped_lock lock{mLock};
 
     // Checks if the client already registers.
-    if (mClientIdToClientMap.find(client->mClientId) != mClientIdToClientMap.end()) {
+    if (mRegisteredCallbacks.count((uintptr_t)binder.get()) > 0) {
         return ALREADY_EXISTS;
     }
 
+    // Creates the client and uses its process id as client id.
+    std::shared_ptr<ClientImpl> client = ::ndk::SharedRefBase::make<ClientImpl>(
+            callback, pid, uid, clientName, opPackageName, this);
+
     ALOGD("Adding client id %lld, pid %d, uid %d, name %s, package %s",
           (long long)client->mClientId, client->mClientPid, client->mClientUid,
           client->mClientName.c_str(), client->mClientOpPackageName.c_str());
 
-    AIBinder_linkToDeath(client->mClientBinder.get(), mDeathRecipient.get(),
-                         reinterpret_cast<void*>(client.get()));
+    {
+        std::scoped_lock lock{sCookie2ClientLock};
+        sCookie2Client.emplace(std::make_pair(client->mClientId, client));
+    }
+
+    AIBinder_linkToDeath(binder.get(), mDeathRecipient.get(),
+                         reinterpret_cast<void*>(client->mClientId));
 
     // Adds the new client to the map.
+    mRegisteredCallbacks.insert((uintptr_t)binder.get());
     mClientIdToClientMap[client->mClientId] = client;
 
     *outClient = client;
@@ -228,16 +258,22 @@
         return INVALID_OPERATION;
     }
 
-    SpAIBinder callback = it->second->mClientBinder;
+    SpAIBinder binder = it->second->mClientBinder;
 
     // Check if the client still live. If alive, unlink the death.
-    if (callback.get() != nullptr) {
-        AIBinder_unlinkToDeath(callback.get(), mDeathRecipient.get(),
-                               reinterpret_cast<void*>(it->second.get()));
+    if (binder.get() != nullptr) {
+        AIBinder_unlinkToDeath(binder.get(), mDeathRecipient.get(),
+                               reinterpret_cast<void*>(it->second->mClientId));
+    }
+
+    {
+        std::scoped_lock lock{sCookie2ClientLock};
+        sCookie2Client.erase(it->second->mClientId);
     }
 
     // Erase the entry.
     mClientIdToClientMap.erase(it);
+    mRegisteredCallbacks.erase((uintptr_t)binder.get());
 
     return OK;
 }
diff --git a/media/libmediatranscoding/TranscodingJobScheduler.cpp b/media/libmediatranscoding/TranscodingJobScheduler.cpp
index 83d7a82..cf9d3c8 100644
--- a/media/libmediatranscoding/TranscodingJobScheduler.cpp
+++ b/media/libmediatranscoding/TranscodingJobScheduler.cpp
@@ -174,7 +174,7 @@
     }
 }
 
-bool TranscodingJobScheduler::submit(ClientIdType clientId, int32_t jobId, uid_t uid,
+bool TranscodingJobScheduler::submit(ClientIdType clientId, JobIdType jobId, uid_t uid,
                                      const TranscodingRequestParcel& request,
                                      const std::weak_ptr<ITranscodingClientCallback>& callback) {
     JobKeyType jobKey = std::make_pair(clientId, jobId);
@@ -231,7 +231,7 @@
     return true;
 }
 
-bool TranscodingJobScheduler::cancel(ClientIdType clientId, int32_t jobId) {
+bool TranscodingJobScheduler::cancel(ClientIdType clientId, JobIdType jobId) {
     JobKeyType jobKey = std::make_pair(clientId, jobId);
 
     ALOGV("%s: job %s", __FUNCTION__, jobToString(jobKey).c_str());
@@ -257,7 +257,7 @@
     return true;
 }
 
-bool TranscodingJobScheduler::getJob(ClientIdType clientId, int32_t jobId,
+bool TranscodingJobScheduler::getJob(ClientIdType clientId, JobIdType jobId,
                                      TranscodingRequestParcel* request) {
     JobKeyType jobKey = std::make_pair(clientId, jobId);
 
@@ -272,7 +272,7 @@
     return true;
 }
 
-void TranscodingJobScheduler::onFinish(ClientIdType clientId, int32_t jobId) {
+void TranscodingJobScheduler::onFinish(ClientIdType clientId, JobIdType jobId) {
     JobKeyType jobKey = std::make_pair(clientId, jobId);
 
     ALOGV("%s: job %s", __FUNCTION__, jobToString(jobKey).c_str());
@@ -308,7 +308,8 @@
     validateState_l();
 }
 
-void TranscodingJobScheduler::onError(int64_t clientId, int32_t jobId, TranscodingErrorCode err) {
+void TranscodingJobScheduler::onError(ClientIdType clientId, JobIdType jobId,
+                                      TranscodingErrorCode err) {
     JobKeyType jobKey = std::make_pair(clientId, jobId);
 
     ALOGV("%s: job %s, err %d", __FUNCTION__, jobToString(jobKey).c_str(), (int32_t)err);
@@ -344,7 +345,8 @@
     validateState_l();
 }
 
-void TranscodingJobScheduler::onProgressUpdate(int64_t clientId, int32_t jobId, int32_t progress) {
+void TranscodingJobScheduler::onProgressUpdate(ClientIdType clientId, JobIdType jobId,
+                                               int32_t progress) {
     JobKeyType jobKey = std::make_pair(clientId, jobId);
 
     ALOGV("%s: job %s, progress %d", __FUNCTION__, jobToString(jobKey).c_str(), progress);
diff --git a/media/libmediatranscoding/include/media/SchedulerClientInterface.h b/media/libmediatranscoding/include/media/SchedulerClientInterface.h
index 6ccf117..dc7648c 100644
--- a/media/libmediatranscoding/include/media/SchedulerClientInterface.h
+++ b/media/libmediatranscoding/include/media/SchedulerClientInterface.h
@@ -19,25 +19,24 @@
 
 #include <aidl/android/media/ITranscodingClientCallback.h>
 #include <aidl/android/media/TranscodingRequestParcel.h>
+#include <media/TranscodingDefs.h>
 
 namespace android {
 
 using ::aidl::android::media::ITranscodingClientCallback;
 using ::aidl::android::media::TranscodingRequestParcel;
 
-using ClientIdType = int64_t;
-
 // Interface for a client to call the scheduler to schedule or retrieve
 // the status of a job.
 class SchedulerClientInterface {
 public:
-    virtual bool submit(ClientIdType clientId, int32_t jobId, uid_t uid,
+    virtual bool submit(ClientIdType clientId, JobIdType jobId, uid_t uid,
                         const TranscodingRequestParcel& request,
                         const std::weak_ptr<ITranscodingClientCallback>& clientCallback) = 0;
 
-    virtual bool cancel(ClientIdType clientId, int32_t jobId) = 0;
+    virtual bool cancel(ClientIdType clientId, JobIdType jobId) = 0;
 
-    virtual bool getJob(ClientIdType clientId, int32_t jobId,
+    virtual bool getJob(ClientIdType clientId, JobIdType jobId,
                         TranscodingRequestParcel* request) = 0;
 
 protected:
diff --git a/media/libmediatranscoding/include/media/TranscoderInterface.h b/media/libmediatranscoding/include/media/TranscoderInterface.h
index 3c72c17..52b357d 100644
--- a/media/libmediatranscoding/include/media/TranscoderInterface.h
+++ b/media/libmediatranscoding/include/media/TranscoderInterface.h
@@ -18,6 +18,7 @@
 #define ANDROID_MEDIA_TRANSCODER_INTERFACE_H
 
 #include <aidl/android/media/TranscodingErrorCode.h>
+#include <media/TranscodingDefs.h>
 
 namespace android {
 
@@ -30,9 +31,9 @@
     // TODO(chz): determine what parameters are needed here.
     // For now, always pass in clientId&jobId.
     virtual void setCallback(const std::shared_ptr<TranscoderCallbackInterface>& cb) = 0;
-    virtual void start(int64_t clientId, int32_t jobId) = 0;
-    virtual void pause(int64_t clientId, int32_t jobId) = 0;
-    virtual void resume(int64_t clientId, int32_t jobId) = 0;
+    virtual void start(ClientIdType clientId, JobIdType jobId) = 0;
+    virtual void pause(ClientIdType clientId, JobIdType jobId) = 0;
+    virtual void resume(ClientIdType clientId, JobIdType jobId) = 0;
 
 protected:
     virtual ~TranscoderInterface() = default;
@@ -43,9 +44,9 @@
 class TranscoderCallbackInterface {
 public:
     // TODO(chz): determine what parameters are needed here.
-    virtual void onFinish(int64_t clientId, int32_t jobId) = 0;
-    virtual void onError(int64_t clientId, int32_t jobId, TranscodingErrorCode err) = 0;
-    virtual void onProgressUpdate(int64_t clientId, int32_t jobId, int32_t progress) = 0;
+    virtual void onFinish(ClientIdType clientId, JobIdType jobId) = 0;
+    virtual void onError(ClientIdType clientId, JobIdType jobId, TranscodingErrorCode err) = 0;
+    virtual void onProgressUpdate(ClientIdType clientId, JobIdType jobId, int32_t progress) = 0;
 
     // Called when transcoding becomes temporarily inaccessible due to loss of resource.
     // If there is any job currently running, it will be paused. When resource contention
diff --git a/media/libmediatranscoding/include/media/TranscodingClientManager.h b/media/libmediatranscoding/include/media/TranscodingClientManager.h
index dba669a..00b2e7f 100644
--- a/media/libmediatranscoding/include/media/TranscodingClientManager.h
+++ b/media/libmediatranscoding/include/media/TranscodingClientManager.h
@@ -24,8 +24,10 @@
 #include <utils/String8.h>
 #include <utils/Vector.h>
 
+#include <map>
 #include <mutex>
 #include <unordered_map>
+#include <unordered_set>
 
 #include "SchedulerClientInterface.h"
 
@@ -37,9 +39,8 @@
 /*
  * TranscodingClientManager manages all the transcoding clients across different processes.
  *
- * TranscodingClientManager is a global singleton that could only acquired by
- * MediaTranscodingService. It manages all the clients's registration/unregistration and clients'
- * information. It also bookkeeps all the clients' information. It also monitors to the death of the
+ * TranscodingClientManager manages all the clients's registration/unregistration and clients'
+ * information. It also bookkeeps all the clients' information. It also monitors the death of the
  * clients. Upon client's death, it will remove the client from it.
  *
  * TODO(hkuang): Hook up with ResourceManager for resource management.
@@ -102,10 +103,16 @@
     mutable std::mutex mLock;
     std::unordered_map<ClientIdType, std::shared_ptr<ClientImpl>> mClientIdToClientMap
             GUARDED_BY(mLock);
+    std::unordered_set<uintptr_t> mRegisteredCallbacks GUARDED_BY(mLock);
 
     ::ndk::ScopedAIBinder_DeathRecipient mDeathRecipient;
 
     std::shared_ptr<SchedulerClientInterface> mJobScheduler;
+
+    static std::atomic<ClientIdType> sCookieCounter;
+    static std::mutex sCookie2ClientLock;
+    static std::map<ClientIdType, std::shared_ptr<ClientImpl>> sCookie2Client
+            GUARDED_BY(sCookie2ClientLock);
 };
 
 }  // namespace android
diff --git a/media/libmediatranscoding/include/media/TranscodingDefs.h b/media/libmediatranscoding/include/media/TranscodingDefs.h
new file mode 100644
index 0000000..31d83ac
--- /dev/null
+++ b/media/libmediatranscoding/include/media/TranscodingDefs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_MEDIA_TRANSCODING_DEFS_H
+#define ANDROID_MEDIA_TRANSCODING_DEFS_H
+
+#include <aidl/android/media/ITranscodingClientCallback.h>
+#include <aidl/android/media/TranscodingRequestParcel.h>
+
+namespace android {
+
+using ClientIdType = uintptr_t;
+using JobIdType = int32_t;
+
+}  // namespace android
+#endif  // ANDROID_MEDIA_TRANSCODING_DEFS_H
diff --git a/media/libmediatranscoding/include/media/TranscodingJobScheduler.h b/media/libmediatranscoding/include/media/TranscodingJobScheduler.h
index 0838977..63001c3 100644
--- a/media/libmediatranscoding/include/media/TranscodingJobScheduler.h
+++ b/media/libmediatranscoding/include/media/TranscodingJobScheduler.h
@@ -39,17 +39,17 @@
     virtual ~TranscodingJobScheduler();
 
     // SchedulerClientInterface
-    bool submit(ClientIdType clientId, int32_t jobId, uid_t uid,
+    bool submit(ClientIdType clientId, JobIdType jobId, uid_t uid,
                 const TranscodingRequestParcel& request,
                 const std::weak_ptr<ITranscodingClientCallback>& clientCallback) override;
-    bool cancel(ClientIdType clientId, int32_t jobId) override;
-    bool getJob(ClientIdType clientId, int32_t jobId, TranscodingRequestParcel* request) override;
+    bool cancel(ClientIdType clientId, JobIdType jobId) override;
+    bool getJob(ClientIdType clientId, JobIdType jobId, TranscodingRequestParcel* request) override;
     // ~SchedulerClientInterface
 
     // TranscoderCallbackInterface
-    void onFinish(ClientIdType clientId, int32_t jobId) override;
-    void onError(int64_t clientId, int32_t jobId, TranscodingErrorCode err) override;
-    void onProgressUpdate(int64_t clientId, int32_t jobId, int32_t progress) override;
+    void onFinish(ClientIdType clientId, JobIdType jobId) override;
+    void onError(ClientIdType clientId, JobIdType jobId, TranscodingErrorCode err) override;
+    void onProgressUpdate(ClientIdType clientId, JobIdType jobId, int32_t progress) override;
     void onResourceLost() override;
     // ~TranscoderCallbackInterface
 
@@ -62,7 +62,7 @@
     friend class MediaTranscodingService;
     friend class TranscodingJobSchedulerTest;
 
-    using JobKeyType = std::pair<ClientIdType, int32_t /*jobId*/>;
+    using JobKeyType = std::pair<ClientIdType, JobIdType>;
     using JobQueueType = std::list<JobKeyType>;
 
     struct Job {
diff --git a/media/libmediatranscoding/tests/TranscodingClientManager_tests.cpp b/media/libmediatranscoding/tests/TranscodingClientManager_tests.cpp
index 7e5ae61..6204f47 100644
--- a/media/libmediatranscoding/tests/TranscodingClientManager_tests.cpp
+++ b/media/libmediatranscoding/tests/TranscodingClientManager_tests.cpp
@@ -83,12 +83,12 @@
             Finished,
             Failed,
         } type;
-        int32_t jobId;
+        JobIdType jobId;
     };
 
     static constexpr Event NoEvent = {Event::NoEvent, 0};
 #define DECLARE_EVENT(action) \
-    static Event action(int32_t jobId) { return {Event::action, jobId}; }
+    static Event action(JobIdType jobId) { return {Event::action, jobId}; }
 
     DECLARE_EVENT(Finished);
     DECLARE_EVENT(Failed);
@@ -120,7 +120,7 @@
 
     virtual ~TestScheduler() { ALOGI("TestScheduler Destroyed"); }
 
-    bool submit(int64_t clientId, int32_t jobId, uid_t /*uid*/,
+    bool submit(ClientIdType clientId, JobIdType jobId, uid_t /*uid*/,
                 const TranscodingRequestParcel& request,
                 const std::weak_ptr<ITranscodingClientCallback>& clientCallback) override {
         JobKeyType jobKey = std::make_pair(clientId, jobId);
@@ -141,7 +141,7 @@
         return true;
     }
 
-    bool cancel(int64_t clientId, int32_t jobId) override {
+    bool cancel(ClientIdType clientId, JobIdType jobId) override {
         JobKeyType jobKey = std::make_pair(clientId, jobId);
 
         if (mJobs.count(jobKey) == 0) {
@@ -151,7 +151,8 @@
         return true;
     }
 
-    bool getJob(int64_t clientId, int32_t jobId, TranscodingRequestParcel* request) override {
+    bool getJob(ClientIdType clientId, JobIdType jobId,
+                TranscodingRequestParcel* request) override {
         JobKeyType jobKey = std::make_pair(clientId, jobId);
         if (mJobs.count(jobKey) == 0) {
             return false;
@@ -196,7 +197,7 @@
         std::weak_ptr<ITranscodingClientCallback> callback;
     };
 
-    typedef std::pair<int64_t, int32_t> JobKeyType;
+    typedef std::pair<ClientIdType, JobIdType> JobKeyType;
     std::map<JobKeyType, Job> mJobs;
     JobKeyType mLastJob;
 };
diff --git a/media/libmediatranscoding/tests/TranscodingJobScheduler_tests.cpp b/media/libmediatranscoding/tests/TranscodingJobScheduler_tests.cpp
index adb16a2..6931490 100644
--- a/media/libmediatranscoding/tests/TranscodingJobScheduler_tests.cpp
+++ b/media/libmediatranscoding/tests/TranscodingJobScheduler_tests.cpp
@@ -40,8 +40,8 @@
 using aidl::android::media::IMediaTranscodingService;
 using aidl::android::media::ITranscodingClient;
 
-constexpr int64_t kClientId = 1000;
-constexpr int32_t kClientJobId = 0;
+constexpr ClientIdType kClientId = 1000;
+constexpr JobIdType kClientJobId = 0;
 constexpr uid_t kClientUid = 5000;
 constexpr uid_t kInvalidUid = (uid_t)-1;
 
@@ -86,21 +86,21 @@
     // TranscoderInterface
     void setCallback(const std::shared_ptr<TranscoderCallbackInterface>& /*cb*/) override {}
 
-    void start(int64_t clientId, int32_t jobId) override {
+    void start(ClientIdType clientId, JobIdType jobId) override {
         mEventQueue.push_back(Start(clientId, jobId));
     }
-    void pause(int64_t clientId, int32_t jobId) override {
+    void pause(ClientIdType clientId, JobIdType jobId) override {
         mEventQueue.push_back(Pause(clientId, jobId));
     }
-    void resume(int64_t clientId, int32_t jobId) override {
+    void resume(ClientIdType clientId, JobIdType jobId) override {
         mEventQueue.push_back(Resume(clientId, jobId));
     }
 
-    void onFinished(int64_t clientId, int32_t jobId) {
+    void onFinished(ClientIdType clientId, JobIdType jobId) {
         mEventQueue.push_back(Finished(clientId, jobId));
     }
 
-    void onFailed(int64_t clientId, int32_t jobId, TranscodingErrorCode err) {
+    void onFailed(ClientIdType clientId, JobIdType jobId, TranscodingErrorCode err) {
         mLastError = err;
         mEventQueue.push_back(Failed(clientId, jobId));
     }
@@ -113,15 +113,15 @@
 
     struct Event {
         enum { NoEvent, Start, Pause, Resume, Finished, Failed } type;
-        int64_t clientId;
-        int32_t jobId;
+        ClientIdType clientId;
+        JobIdType jobId;
     };
 
     static constexpr Event NoEvent = {Event::NoEvent, 0, 0};
 
-#define DECLARE_EVENT(action)                              \
-    static Event action(int64_t clientId, int32_t jobId) { \
-        return {Event::action, clientId, jobId};           \
+#define DECLARE_EVENT(action)                                     \
+    static Event action(ClientIdType clientId, JobIdType jobId) { \
+        return {Event::action, clientId, jobId};                  \
     }
 
     DECLARE_EVENT(Start);