From a2c9de8fcc63a954b6486846b80c402a85d956ca Mon Sep 17 00:00:00 2001 From: Julian Blake Kongslie Date: Fri, 11 Nov 2022 16:29:22 -0800 Subject: Multi-word fetch bundles and icache with realistic dram latency --- uarch/core.cpp | 55 +++++++++++++++++++++++++++++++++++++++---------------- uarch/core.h | 27 ++++++++++++++++++++++++--- 2 files changed, 63 insertions(+), 19 deletions(-) (limited to 'uarch') diff --git a/uarch/core.cpp b/uarch/core.cpp index 34f422a..8b2db9b 100644 --- a/uarch/core.cpp +++ b/uarch/core.cpp @@ -25,21 +25,38 @@ void fetch_stage::clock() { gen = r.new_gen; pc = r.new_pc; didrestart = true; + outstandingfill = false; c.fetch_restarto.reset(); } + if (c.fetch_mem_responsep.can_read()) { + auto r = c.fetch_mem_responsep.read(); + cache.handle_response(r); + } + if (c.fetch_bundlep.can_write()) { fetch_bundle b; - b.tr = infra::pt::toplevel(); - b.gen = gen; - b.pc = pc; - b.word = c.mem.fetch(pc); - if (didrestart) - infra::pt::event(b.tr, ">", now-1, ""); - pte(b.tr, "F"); - c.fetch_bundlep.write(std::move(b)); - pc = (pc & 070000) | ((pc + 1) & 007777); - didrestart = false; + if (auto t = cache.fetchline(b.data, pc); t.has_value()) { + b.tr = infra::pt::toplevel(); + b.gen = gen; + b.pc = pc; + if (didrestart) + infra::pt::event(b.tr, ">", now-1, ""); + pte(b.tr, "F", fmt::format("pc={:05o}", b.pc)); + c.fetch_bundlep.write(std::move(b)); + pc = (pc & 070000) | (((pc & ~memory::LINE_BYTE_OFFSET_MASK) + memory::LINE_BYTES) & 007777); + didrestart = false; + outstandingfill = false; + } + } + + if (!outstandingfill && c./*fetch_*/mem_commandp.can_write() && !cache.probe(pc)) { + memory::dram::command fr; + fr.transaction = infra::pt::toplevel(); + fr.line_address = pc >> memory::LINE_BYTES_LOG2; + fr.responsep = &c.fetch_mem_responsep; + c./*fetch_*/mem_commandp.write(std::move(fr)); + outstandingfill = true; } } @@ -47,28 +64,31 @@ void decode_stage::clock() { bool progress = ctlregs[HALTED]; if (!ctlregs[HALTED] && c.fetch_bundlep.can_read()) { - auto b = c.fetch_bundlep.read(); + auto b = c.fetch_bundlep.peek(); if (b.gen != gen) goto bail_out; - if (b.pc != pc) { + if ((b.pc >> memory::LINE_BYTES_LOG2) != (pc >> memory::LINE_BYTES_LOG2)) { pte(b.tr, "~"); fetch_restart r; r.tr = b.tr; r.new_gen = ++gen; r.new_pc = pc; c.fetch_restarto = std::move(r); + c.fetch_bundlep.discard(); goto bail_out; } progress = true; - pte(b.tr, "E"); + auto tr = infra::pt::child(b.tr); + + pte(tr, "E"); inst = decode(ctlregs[FLAGS], pc, - c.mem.fetch(pc), + b.data[pc & memory::LINE_BYTE_OFFSET_MASK], interrupt); auto next_pc = inst.next_pc; @@ -84,7 +104,7 @@ void decode_stage::clock() { assert(!inst.need_autoinc_store); } - pte(b.tr, "", inst.disasm()); + pte(tr, "", inst.disasm()); if (inst.need_exec_load) inst.data = c.mem.fetch(inst.final_address.value()); @@ -114,6 +134,9 @@ void decode_stage::clock() { assert(inst.next_pc == next_pc || inst.possibly_redirects); pc = inst.next_pc; + + if ((b.pc >> memory::LINE_BYTES_LOG2) != (pc >> memory::LINE_BYTES_LOG2)) + c.fetch_bundlep.discard(); } bail_out: @@ -125,7 +148,7 @@ bail_out: c.checker.execute(); assert(c.checker.icount == icount); - std::cerr << fmt::format("icount={:} pc={:05o} checkerpc={:05o}\n", icount, pc, c.checker.pc); +// std::cerr << fmt::format("icount={:} pc={:05o} checkerpc={:05o}\n", icount, pc, c.checker.pc); assert(pc == c.checker.pc); assert(acc == c.checker.acc); assert(link == c.checker.link); diff --git a/uarch/core.h b/uarch/core.h index 0f9be74..a6772f3 100644 --- a/uarch/core.h +++ b/uarch/core.h @@ -2,11 +2,14 @@ #include +#include "infra/arbiter.h" #include "infra/pipetrace.h" #include "infra/port.h" #include "io/model.h" #include "isa/checker.h" #include "isa/isa.h" +#include "memory/cache.h" +#include "memory/dram.h" struct core; @@ -14,7 +17,7 @@ struct fetch_bundle { infra::transaction tr; unsigned int gen; unsigned int pc; - unsigned int word; + memory::line data; }; struct fetch_restart { @@ -26,9 +29,12 @@ struct fetch_restart { struct fetch_stage : public infra::sim { core &c; + memory::inline_cache<8, 2> cache; + unsigned int gen = 0; unsigned int pc; bool didrestart = false; + bool outstandingfill = false; fetch_stage(core &c); @@ -49,6 +55,7 @@ struct decode_stage : public infra::sim { std::array ctlregs; std::uint64_t icount; instruction_context inst; + bool outstandingfill = false; decode_stage(core &c); @@ -58,11 +65,20 @@ struct decode_stage : public infra::sim { struct core { iomodel &system; funcchecker checker; - funcmem mem; + memory::dram mem{12}; + infra::port mem_commandp; + + infra::priority_arbiter mem_command_arb; + + infra::port fetch_mem_commandp; + infra::port fetch_mem_responsep; infra::port fetch_bundlep; std::optional fetch_restarto; + infra::port decode_mem_commandp; + infra::port decode_mem_responsep; + // Construction order is execution order within a cycle, so this list should be back-to-front (for zero-cycle restarts) decode_stage decode{*this}; fetch_stage fetch{*this}; @@ -70,5 +86,10 @@ struct core { core(iomodel &model) : system(model) , checker(model) - { } + { + mem.commandp = &mem_commandp; + mem_command_arb.outp = &mem_commandp; + mem_command_arb.peerp[0] = &decode_mem_commandp; + mem_command_arb.peerp[1] = &fetch_mem_commandp; + } }; -- cgit v1.2.3