BpfRingbuf: Clean up memory access

Consumer pos is only written to by userspace (except for
initialization), so reading from it does not require a memory barrier.

This change also moves producer pos to be acquired before anything else
and marks start_ptr as volatile, so it is only read once.

Test: atest BpfRingbufTest
Change-Id: I228578ef4b42c1732646f0ae928b9a0aa2445304
diff --git a/staticlibs/native/bpf_headers/include/bpf/BpfRingbuf.h b/staticlibs/native/bpf_headers/include/bpf/BpfRingbuf.h
index e320c77..dd1504c 100644
--- a/staticlibs/native/bpf_headers/include/bpf/BpfRingbuf.h
+++ b/staticlibs/native/bpf_headers/include/bpf/BpfRingbuf.h
@@ -200,8 +200,9 @@
 inline base::Result<int> BpfRingbufBase::ConsumeAll(
     const std::function<void(const void*)>& callback) {
   int64_t count = 0;
-  uint64_t cons_pos = mConsumerPos->load(std::memory_order_acquire);
   uint32_t prod_pos = mProducerPos->load(std::memory_order_acquire);
+  // Only userspace writes to mConsumerPos, so no need to use std::memory_order_acquire
+  uint64_t cons_pos = mConsumerPos->load(std::memory_order_relaxed);
   while ((cons_pos & 0xFFFFFFFF) != prod_pos) {
     // Find the start of the entry for this read (wrapping is done here).
     void* start_ptr = pointerAddBytes<void*>(mDataPos, cons_pos & mPosMask);
@@ -211,7 +212,7 @@
     //   u32 len;
     //   u32 pg_off;
     // };
-    uint32_t length = *reinterpret_cast<uint32_t*>(start_ptr);
+    uint32_t length = *reinterpret_cast<volatile uint32_t*>(start_ptr);
 
     // If the sample isn't committed, we're caught up with the producer.
     if (length & BPF_RINGBUF_BUSY_BIT) return count;