From 58da72b83d4f6ef1a729ef5fafc9cb64331af601 Mon Sep 17 00:00:00 2001 From: Julian Blake Kongslie Date: Sun, 22 Jan 2023 14:27:27 -0800 Subject: Fix focal 69. (by accident, by adding a d-side cache with write-through stores, and propagating writes to the i-side cache) --- uarch/core.cpp | 99 +++++++++++++++++++++++++++++++++++++++++++--------------- uarch/core.h | 19 +++++------ 2 files changed, 83 insertions(+), 35 deletions(-) (limited to 'uarch') diff --git a/uarch/core.cpp b/uarch/core.cpp index ce58df2..e2f35e7 100644 --- a/uarch/core.cpp +++ b/uarch/core.cpp @@ -38,12 +38,12 @@ void fetch_stage::clock() { if (c.fetch_mem_responsep.can_read()) { auto r = c.fetch_mem_responsep.read(); - cache.handle_response(r); + c.icache.handle_response(r); } if (c.fetch_bundlep.can_write()) { fetch_bundle b; - if (auto t = cache.fetchline(b.data, pc); t.has_value()) { + if (auto t = c.icache.fetchline(b.data, pc); t.has_value()) { b.tr = infra::pt::toplevel(); b.gen = c.gen; b.pc = pc; @@ -57,7 +57,7 @@ void fetch_stage::clock() { } } - if (!outstandingfill && c.fetch_mem_commandp.can_write() && !cache.probe(pc)) { + if (!outstandingfill && c.fetch_mem_commandp.can_write() && !c.icache.probe(pc)) { memory::dram::command fr; fr.transaction = infra::pt::toplevel(); pte(fr.transaction, "p", fmt::format("fpc={:05o}", pc)); @@ -77,7 +77,7 @@ void decode_stage::clock() { speculative_stores_sent = r.stores_sent; } - if (c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) { + if (c.fetch_bundlep.can_read() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) { auto b = c.fetch_bundlep.peek(); if (b.gen != c.gen) { @@ -129,14 +129,17 @@ void decode_stage::clock() { speculative_stores_sent += i.inst.need_exec_store; if (i.inst.need_indirect_load) { +#if 0 memory::dram::command fr; fr.transaction = i.tr; fr.line_address = *i.inst.init_address >> memory::LINE_BYTES_LOG2; fr.responsep = &c.indir_mem_responsep; pte(i.tr, "", fmt::format("iload={:05o}", *i.inst.init_address)); c.decode_mem_commandp.write(std::move(fr)); +#endif c.indir_instp.write(std::move(i)); } else { +#if 0 if (i.inst.need_exec_load) { memory::dram::command fr; fr.transaction = i.tr; @@ -145,6 +148,7 @@ void decode_stage::clock() { pte(i.tr, "", fmt::format("load={:05o}", *i.inst.final_address)); c.decode_mem_commandp.write(std::move(fr)); } +#endif c.decode_to_exec_instp.write(std::move(i)); } @@ -160,7 +164,12 @@ void indir_stage::clock() { gen = c.gen; } - if (c.indir_instp.can_read() && c.indir_mem_load_commandp.can_write() && c.indir_mem_store_commandp.can_write() && c.indir_to_exec_instp.can_write()) { + if (c.indir_mem_responsep.can_read()) { + auto r = c.indir_mem_responsep.read(); + c.dcache.handle_response(r); + } + + if (c.indir_instp.can_read() && c.indir_to_exec_instp.can_write()) { auto &i = c.indir_instp.peek(); if (i.gen != gen && i.gen != c.gen) { @@ -172,15 +181,23 @@ void indir_stage::clock() { assert(i.gen == gen); } - if (i.inst.need_indirect_load && !c.indir_mem_responsep.can_read()) - return; - if (i.inst.need_indirect_load) { - auto l = c.indir_mem_responsep.read(); - if (l.line_address != i.inst.init_address.value() >> memory::LINE_BYTES_LOG2) + unsigned int addr; + auto t = c.dcache.fetch(addr, i.inst.init_address.value()); + if (!t.has_value()) { + if (c.indir_mem_load_commandp.can_write()) { + memory::dram::command fr; + fr.transaction = i.tr; + pte(fr.transaction, "1", fmt::format("indir load fill {:05o}", i.inst.init_address.value())); + fr.line_address = i.inst.init_address.value() >> memory::LINE_BYTES_LOG2; + fr.responsep = &c.indir_mem_responsep; + c.indir_mem_load_commandp.write(std::move(fr)); + } return; - auto addr = l.data[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK]; + } if (i.inst.need_autoinc_store) { + if (!c.indir_mem_store_commandp.can_write()) + return; addr = (addr + 1) & 07777; pte(i.tr, "+", fmt::format("istore={:05o} istoredata={:04o}", *i.inst.init_address, addr)); memory::dram::command sr; @@ -191,6 +208,8 @@ void indir_stage::clock() { sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; sr.write = true; c.indir_mem_store_commandp.write(std::move(sr)); + c.dcache.opportunistic_store(*i.inst.init_address, addr); + c.icache.opportunistic_store(*i.inst.init_address, addr); } else { pte(i.tr, "I"); } @@ -198,6 +217,7 @@ void indir_stage::clock() { i.inst.final_address = (unsigned int)((df << 12) | addr); } +#if 0 if (i.inst.need_exec_load) { memory::dram::command fr; fr.transaction = i.tr; @@ -206,6 +226,7 @@ void indir_stage::clock() { pte(i.tr, "", fmt::format("load={:05o}", *i.inst.final_address)); c.indir_mem_load_commandp.write(std::move(fr)); } +#endif c.indir_to_exec_instp.write(std::move(i)); @@ -216,18 +237,24 @@ void indir_stage::clock() { void exec_stage::clock() { c.restarto.reset(); + if (c.exec_mem_responsep.can_read()) { + auto r = c.exec_mem_responsep.read(); + c.dcache.handle_response(r); + } + std::optional restarttr; std::optional insto; bool progress = ctlregs[HALTED]; - if (!ctlregs[HALTED] && (c.decode_to_exec_instp.can_read() || c.indir_to_exec_instp.can_read()) && c.exec_mem_commandp.can_write()) { + if (!ctlregs[HALTED] && (c.decode_to_exec_instp.can_read() || c.indir_to_exec_instp.can_read())) { infra::port *instp = nullptr; if (c.decode_to_exec_instp.can_read()) { auto &i = c.decode_to_exec_instp.peek(); if (i.gen != gen && i.gen != c.gen) { - pte(i.tr, "~"); if (i.inst.need_autoinc_store) { + if (!c.exec_mem_store_commandp.can_write()) + return; auto addr = (*i.inst.final_address - 1) & 07777; pte(i.tr, "U", fmt::format("unstore={:05o}, unstoredata={:04o}", *i.inst.init_address, addr)); memory::dram::command sr; @@ -237,10 +264,13 @@ void exec_stage::clock() { sr.mask.fill(false); sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; sr.write = true; - c.exec_mem_commandp.write(std::move(sr)); + c.exec_mem_store_commandp.write(std::move(sr)); stores_sent += 2; // Original store sent by Indir stage plus unstore here c.stores_done += 2; + c.dcache.opportunistic_store(*i.inst.init_address, addr); + c.icache.opportunistic_store(*i.inst.init_address, addr); } + pte(i.tr, "~"); c.decode_to_exec_instp.discard(); } else if (i.icount == c.icount) { instp = &c.decode_to_exec_instp; @@ -249,8 +279,9 @@ void exec_stage::clock() { if (c.indir_to_exec_instp.can_read()) { auto &i = c.indir_to_exec_instp.peek(); if (i.gen != gen && i.gen != c.gen) { - pte(i.tr, "~"); if (i.inst.need_autoinc_store) { + if (!c.exec_mem_store_commandp.can_write()) + return; auto addr = (*i.inst.final_address - 1) & 07777; pte(i.tr, "U", fmt::format("unstore={:05o}, unstoredata={:04o}", *i.inst.init_address, addr)); memory::dram::command sr; @@ -260,10 +291,13 @@ void exec_stage::clock() { sr.mask.fill(false); sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; sr.write = true; - c.exec_mem_commandp.write(std::move(sr)); + c.exec_mem_store_commandp.write(std::move(sr)); stores_sent += 2; // Original store sent by Indir stage plus unstore here c.stores_done += 2; + c.dcache.opportunistic_store(*i.inst.init_address, addr); + c.icache.opportunistic_store(*i.inst.init_address, addr); } + pte(i.tr, "~"); c.indir_to_exec_instp.discard(); } else if (i.icount == c.icount) { instp = &c.indir_to_exec_instp; @@ -271,8 +305,6 @@ void exec_stage::clock() { } if (!instp) goto bail_out; - if (!c.exec_mem_commandp.can_write()) - goto bail_out; auto &i = instp->peek(); assert(i.gen == gen || i.gen == c.gen); @@ -281,17 +313,27 @@ void exec_stage::clock() { assert(i.gen == gen); } - if (i.inst.need_exec_load && !c.exec_mem_responsep.can_read()) - return; - if (i.inst.need_exec_load) { - auto l = c.exec_mem_responsep.read(); - if (l.line_address != i.inst.final_address.value() >> memory::LINE_BYTES_LOG2) + unsigned int data; + auto t = c.dcache.fetch(data, i.inst.final_address.value()); + if (t.has_value()) { + i.inst.data = data; + } else { + if (c.exec_mem_load_commandp.can_write()) { + memory::dram::command fr; + fr.transaction = i.tr; + pte(fr.transaction, "2", fmt::format("exec load fill {:05o}", i.inst.final_address.value())); + fr.line_address = i.inst.final_address.value() >> memory::LINE_BYTES_LOG2; + fr.responsep = &c.exec_mem_responsep; + c.exec_mem_load_commandp.write(std::move(fr)); + } return; - i.inst.data = l.data[*i.inst.final_address & memory::LINE_BYTE_OFFSET_MASK]; - pte(i.tr, "", fmt::format("loaddata={:04o}", *i.inst.data)); + } } + if (i.inst.need_exec_store && !c.exec_mem_store_commandp.can_write()) + return; + // We are committed to complete execution of this instruction this cycle. insto = std::move(i.inst); @@ -304,6 +346,9 @@ void exec_stage::clock() { auto next_pc = inst.next_pc; + if (inst.need_exec_load) + pte(i.tr, "", fmt::format("loaddata={:04o}", *i.inst.data)); + if (inst.need_autoinc_store) { ++stores_sent; // It was sent by Indir stage ++c.stores_done; @@ -339,9 +384,11 @@ void exec_stage::clock() { sr.mask.fill(false); sr.mask[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true; sr.write = true; - c.exec_mem_commandp.write(std::move(sr)); + c.exec_mem_store_commandp.write(std::move(sr)); ++stores_sent; ++c.stores_done; + c.dcache.opportunistic_store(*inst.final_address, *inst.data); + c.icache.opportunistic_store(*inst.final_address, *inst.data); } assert(inst.next_pc == next_pc || inst.possibly_redirects); diff --git a/uarch/core.h b/uarch/core.h index ebfd388..a4ad0fe 100644 --- a/uarch/core.h +++ b/uarch/core.h @@ -38,8 +38,6 @@ struct inst_bundle { struct fetch_stage : public infra::sim { core &c; - memory::inline_cache<8, 2> cache; - unsigned int pc; bool didrestart = false; @@ -99,6 +97,10 @@ struct core { iomodel &system; funcchecker checker; + // + memory::inline_cache<8, 2> icache; + memory::inline_cache<8, 2> dcache; + std::optional restarto; unsigned int gen = 0; @@ -113,8 +115,6 @@ struct core { infra::port fetch_mem_responsep; infra::port fetch_bundlep; - infra::port decode_mem_commandp; - infra::port decode_mem_responsep; infra::port decode_to_exec_instp; infra::port indir_instp; @@ -123,7 +123,8 @@ struct core { infra::port indir_mem_responsep; infra::port indir_to_exec_instp; - infra::port exec_mem_commandp; + infra::port exec_mem_load_commandp; + infra::port exec_mem_store_commandp; infra::port exec_mem_responsep; // Global counters (should be Gray code in FPGA implementation, only do == comparisons) @@ -142,10 +143,10 @@ struct core { { mem.commandp = &mem_commandp; mem_command_arb.outp = &mem_commandp; - mem_command_arb.peerp[0] = &exec_mem_commandp; - mem_command_arb.peerp[1] = &indir_mem_store_commandp; - mem_command_arb.peerp[2] = &indir_mem_load_commandp; - mem_command_arb.peerp[3] = &decode_mem_commandp; + mem_command_arb.peerp[0] = &exec_mem_store_commandp; + mem_command_arb.peerp[1] = &exec_mem_load_commandp; + mem_command_arb.peerp[2] = &indir_mem_store_commandp; + mem_command_arb.peerp[3] = &indir_mem_load_commandp; mem_command_arb.peerp[4] = &fetch_mem_commandp; } }; -- cgit v1.2.3