Merge "MPEG4Extractor:media timescale for audio delay/pad"
diff --git a/media/extractors/mp4/MPEG4Extractor.cpp b/media/extractors/mp4/MPEG4Extractor.cpp
index eeac1d7..21ccf44 100644
--- a/media/extractors/mp4/MPEG4Extractor.cpp
+++ b/media/extractors/mp4/MPEG4Extractor.cpp
@@ -459,6 +459,7 @@
                 track->meta.findInt64(kKeyDuration, &duration) &&
                 track->meta.findInt32(kKeySampleRate, &samplerate)) {
 
+            // elst has to be processed only the first time this function is called
             track->has_elst = false;
 
             if (track->elst_segment_duration > INT64_MAX) {
@@ -466,16 +467,19 @@
             }
             int64_t segment_duration = track->elst_segment_duration;
             int64_t media_time = track->elst_media_time;
-            int64_t halfscale = mHeaderTimescale / 2;
+            int64_t halfscale = track->timescale / 2;
+
             ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
-                  ", halfscale = %" PRId64 ", timescale = %d",
-                  segment_duration,
-                  media_time,
-                  halfscale,
-                  mHeaderTimescale);
+                  ", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
+                  segment_duration, media_time,
+                  halfscale, mHeaderTimescale, track->timescale);
+
+            if ((uint32_t)samplerate != track->timescale){
+                ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!", samplerate);
+            }
 
             int64_t delay;
-            // delay = ((media_time * samplerate) + halfscale) / mHeaderTimescale;
+            // delay = ((media_time * samplerate) + halfscale) / track->timescale;
             if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
                     __builtin_add_overflow(delay, halfscale, &delay) ||
                     (delay /= mHeaderTimescale, false) ||
@@ -502,33 +506,43 @@
 
             int64_t segment_end;
             int64_t padding;
-            // padding = scaled_duration - ((segment_duration + media_time) * 1000000);
-            if (__builtin_add_overflow(segment_duration, media_time, &segment_end) ||
-                    __builtin_mul_overflow(segment_end, 1000000, &segment_end) ||
-                    __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
+            int64_t segment_duration_e6;
+            int64_t media_time_scaled_e6;
+            int64_t media_time_scaled;
+            // padding = scaled_duration - ((segment_duration * 1000000) +
+            //                  ((media_time * mHeaderTimeScale * 1000000)/track->timescale) )
+            // segment_duration is based on timescale in movie header box(mdhd)
+            // media_time is based on timescale track header/media timescale
+            if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
+                __builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
+                __builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
+                return;
+            }
+            media_time_scaled_e6 /= track->timescale;
+            if(__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end) ||
+                __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
                 return;
             }
             ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
-
+            int64_t paddingsamples = 0;
             if (padding < 0) {
                 // track duration from media header (which is what kKeyDuration is) might
                 // be slightly shorter than the segment duration, which would make the
                 // padding negative. Clamp to zero.
                 padding = 0;
-            }
-
-            int64_t paddingsamples;
-            int64_t halfscale_e6;
-            int64_t timescale_e6;
-            // paddingsamples = ((padding * samplerate) + (halfscale * 1000000))
-            //                / (mHeaderTimescale * 1000000);
-            if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
-                    __builtin_mul_overflow(halfscale, 1000000, &halfscale_e6) ||
-                    __builtin_mul_overflow(mHeaderTimescale, 1000000, &timescale_e6) ||
-                    __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
-                    (paddingsamples /= timescale_e6, false) ||
-                    paddingsamples > INT32_MAX) {
-                return;
+            } else {
+                int64_t halfscale_e6;
+                int64_t timescale_e6;
+                // paddingsamples = ((padding * samplerate) + (halfscale * 1000000))
+                //                / (mHeaderTimescale * 1000000);
+                if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
+                        __builtin_mul_overflow(halfscale, 1000000, &halfscale_e6) ||
+                        __builtin_mul_overflow(mHeaderTimescale, 1000000, &timescale_e6) ||
+                        __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
+                        (paddingsamples /= timescale_e6, false) ||
+                        paddingsamples > INT32_MAX) {
+                    return;
+                }
             }
             ALOGV("paddingsamples = %" PRId64, paddingsamples);
             track->meta.setInt32(kKeyEncoderPadding, paddingsamples);