tRP
~18 cycles)tRCD
~18 cycles)CAS
~16 cycles)
636 + S + 1 |
6 + S 6 |
50 |
Tag | Set | Offset |
64 - 6 - S bits |
S bits |
6 bits |
S
bits choose a set
// Cache config parameters:
constexpr auto NumLineBits = 6u; // 6 bits on Intel CPUs, 64-byte lines
constexpr auto NumSetBits = 6u; // 6 bits on Intel L1 caches
constexpr auto NumWays = 8u; // 8 ways on Intel L1 cache
constexpr auto LineSize = 1u << NumLineBits;
struct Line { char data[LineSize]; };
struct Way { uint64_t tag; Line line; };
struct Set { Way ways[NumWays]; };
constexpr auto NumSets = 1u << NumSetBits;
struct Cache { Set sets[NumSets]; };
int Cache::readByte(uint64_t addr) const {
constexpr auto LineMask = (1u << NumLineBits) - 1u;
auto lineOffset = addr & LineMask;
constexpr auto SetMask = (1u << NumSetBits) - 1u;
const Set &set = sets[(addr >> NumLineBits) & SetMask];
constexpr auto TagMask = ~((1u << (NumLineBits + NumSetBits)) - 1u);
auto tag = addr & TagMask;
for (const auto &way : set.ways)
if (way.tag == tag) return way.line.data[lineOffset];
return -1;
}
Level | Size | Access time |
---|---|---|
L1 | 32KB | ~3 cycles |
L2 | 256KB | ~8 cycles |
L3 | 10-20MB | ~30-40 cycles |
(RAM) | ~250 cycles |
struct Element {
Element *next;
uint8_t padding[ElementSize - sizeof(Element *)];
};
Element elements[NumElements];
prefetch
is available
Maybe...