From 27a58c86ce494588a12023a12b027b7f44bb35fc Mon Sep 17 00:00:00 2001 From: Julian Blake Kongslie Date: Sun, 15 Jan 2023 14:02:54 -0800 Subject: Stall decode after an instruction with stores until the stores are done. --- Plan | 1 + isa/decode.cpp | 6 ++-- isa/isa.h | 27 +++++++++++++++++- uarch/core.cpp | 90 +++++++++++++++++++++++++++++++++++++++------------------- uarch/core.h | 7 +++++ 5 files changed, 98 insertions(+), 33 deletions(-) diff --git a/Plan b/Plan index c7e833c..55dc73e 100644 --- a/Plan +++ b/Plan @@ -10,6 +10,7 @@ vim: set sw=8 noet : of depending on the caller * Make it complete * Make it pretty + * Deadman timer * D-side cache * Store forwarding * Cache consistency between I and D side diff --git a/isa/decode.cpp b/isa/decode.cpp index f0cdca8..abcb3e3 100644 --- a/isa/decode.cpp +++ b/isa/decode.cpp @@ -88,15 +88,15 @@ instruction_context decode(std::uint_fast32_t flags, unsigned int pc, unsigned i #pragma GCC diagnostic pop instruction_context inst; - inst.bits = bits; - inst.next_pc = (pc & ~07777) | ((pc + 1) & 07777); - if (interrupt) { inst.bits = bits = 04000; assert(df == 0); assert(ifb == 0); inst.next_pc = pc; pc = 0; + } else { + inst.bits = bits; + inst.next_pc = (pc & ~07777) | ((pc + 1) & 07777); } switch (bits >> 9) { diff --git a/isa/isa.h b/isa/isa.h index 4083e16..f94a117 100644 --- a/isa/isa.h +++ b/isa/isa.h @@ -57,6 +57,9 @@ static std::string opr_disasm_group2_neg[0366]; static std::string opr_disasm_extended_arith[0376]; struct instruction_context { + // Known statically before decode time + unsigned int bits; + // Known statically at decode time bool need_indirect_load = false; // final_address = mem[init_address] bool need_autoinc_store = false; // mem[init_address] += 1 @@ -79,7 +82,6 @@ struct instruction_context { void execute() { ef(*this); } // May change over the lifetime of the instruction execution - unsigned int bits; unsigned int next_pc; // includes IF std::optional init_address; // includes DF std::optional final_address; // includes DF @@ -88,6 +90,29 @@ struct instruction_context { std::optional acc; std::optional link; std::optional mq; + + // N.B. two "identical" instructions may compare unequal if they are at different points in their execution + bool operator==(const instruction_context &that) const { + if (bits != that.bits) + return false; + if (init_address != that.init_address) + return false; + if (final_address != that.final_address) + return false; + if (ctlval != that.ctlval) + return false; + if (data != that.data) + return false; + if (acc != that.acc) + return false; + if (link != that.link) + return false; + if (mq != that.mq) + return false; + if (next_pc != that.next_pc) + return false; + return true; + } }; void init_disasm_tables(); diff --git a/uarch/core.cpp b/uarch/core.cpp index 12f8b5b..7304442 100644 --- a/uarch/core.cpp +++ b/uarch/core.cpp @@ -74,9 +74,16 @@ void decode_stage::clock() { pc = r.new_pc; interrupt |= r.interrupt; icount = c.icount; + speculative_stores_sent = r.stores_sent; } - if (c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) { + if (c.decode_store_completep.can_read()) { + ++stores_done; + assert((int)(speculative_stores_sent - stores_done) >= 0); + c.decode_store_completep.discard(); + } + + if (speculative_stores_sent == stores_done && c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) { auto b = c.fetch_bundlep.peek(); if (b.gen != c.gen) { @@ -118,6 +125,9 @@ void decode_stage::clock() { pc = i.inst.next_pc; + speculative_stores_sent += i.inst.need_autoinc_store; + speculative_stores_sent += i.inst.need_exec_store; + if (i.inst.need_indirect_load) { memory::dram::command fr; fr.transaction = i.tr; @@ -180,6 +190,7 @@ void indir_stage::clock() { sr.mask.fill(false); sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; sr.write = true; + sr.responsep = &c.decode_store_completep; c.indir_mem_store_commandp.write(std::move(sr)); } else { pte(i.tr, "I"); @@ -207,6 +218,7 @@ void exec_stage::clock() { c.restarto.reset(); std::optional restarttr; + std::optional insto; bool progress = ctlregs[HALTED]; @@ -226,7 +238,9 @@ void exec_stage::clock() { sr.mask.fill(false); sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; sr.write = true; + sr.responsep = &c.decode_store_completep; c.exec_mem_commandp.write(std::move(sr)); + stores_sent += 2; // Original store sent by Indir stage plus unstore here } c.decode_to_exec_instp.discard(); } else if (i.icount == c.icount) { @@ -247,7 +261,9 @@ void exec_stage::clock() { sr.mask.fill(false); sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; sr.write = true; + sr.responsep = &c.decode_store_completep; c.exec_mem_commandp.write(std::move(sr)); + stores_sent += 2; // Original store sent by Indir stage plus unstore here } c.indir_to_exec_instp.discard(); } else if (i.icount == c.icount) { @@ -277,49 +293,58 @@ void exec_stage::clock() { pte(i.tr, "", fmt::format("loaddata={:04o}", *i.inst.data)); } + // We are committed to complete execution of this instruction this cycle. + + insto = std::move(i.inst); + auto &inst = *insto; + pte(i.tr, "E"); progress = true; assert(i.pc == pc); - auto next_pc = i.inst.next_pc; - - if (i.inst.need_read_acc) - i.inst.acc = acc; - if (i.inst.need_read_link) - i.inst.link = link; - if (i.inst.need_read_mq) - i.inst.mq = mq; - if (i.inst.read_ctlreg.has_value()) - i.inst.ctlval = ctlregs[*i.inst.read_ctlreg]; - - i.inst.execute(); - - if (i.inst.need_write_acc) - acc = i.inst.acc.value(); - if (i.inst.need_write_link) - link = i.inst.link.value(); - if (i.inst.need_write_mq) - mq = i.inst.mq.value(); - if (i.inst.write_ctlreg.has_value()) { - ctlregs[*i.inst.write_ctlreg] = i.inst.ctlval.value(); + auto next_pc = inst.next_pc; + + if (inst.need_autoinc_store) + ++stores_sent; // It was sent by Indir stage + if (inst.need_read_acc) + inst.acc = acc; + if (inst.need_read_link) + inst.link = link; + if (inst.need_read_mq) + inst.mq = mq; + if (inst.read_ctlreg.has_value()) + inst.ctlval = ctlregs[*inst.read_ctlreg]; + + inst.execute(); + + if (inst.need_write_acc) + acc = inst.acc.value(); + if (inst.need_write_link) + link = inst.link.value(); + if (inst.need_write_mq) + mq = inst.mq.value(); + if (inst.write_ctlreg.has_value()) { + ctlregs[*inst.write_ctlreg] = inst.ctlval.value(); restarttr = i.tr; } - if (i.inst.need_exec_store) { - pte(i.tr, "", fmt::format("store={:05o} storedata={:04o}", *i.inst.final_address, *i.inst.data)); + if (inst.need_exec_store) { + pte(i.tr, "", fmt::format("store={:05o} storedata={:04o}", *inst.final_address, *inst.data)); memory::dram::command sr; sr.transaction = i.tr; - sr.line_address = *i.inst.final_address >> memory::LINE_BYTES_LOG2; - sr.data[*i.inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = *i.inst.data; + sr.line_address = *inst.final_address >> memory::LINE_BYTES_LOG2; + sr.data[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = *inst.data; sr.mask.fill(false); - sr.mask[*i.inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true; + sr.mask[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true; sr.write = true; + sr.responsep = &c.decode_store_completep; c.exec_mem_commandp.write(std::move(sr)); + ++stores_sent; } - assert(i.inst.next_pc == next_pc || i.inst.possibly_redirects); - pc = i.inst.next_pc; + assert(inst.next_pc == next_pc || inst.possibly_redirects); + pc = inst.next_pc; if (pc != next_pc) { pte(i.tr, "", fmt::format("jump={:05o}", pc)); @@ -345,6 +370,7 @@ bail_out: r.tr = *restarttr; r.new_pc = pc; r.interrupt = interrupt; + r.stores_sent = stores_sent; gen = ++c.gen; c.restarto = std::move(r); } @@ -356,6 +382,12 @@ bail_out: assert(c.checker.icount == c.icount); // std::cerr << fmt::format("icount={:} pc={:05o} checkerpc={:05o}\n", c.icount, pc, c.checker.pc); assert(pc == c.checker.pc); + + if (insto.has_value()) { + auto &inst = *insto; + assert(inst == c.checker.inst); + } + assert(acc == c.checker.acc); assert(link == c.checker.link); assert(mq == c.checker.mq); diff --git a/uarch/core.h b/uarch/core.h index b53a205..21725b3 100644 --- a/uarch/core.h +++ b/uarch/core.h @@ -17,6 +17,7 @@ struct restart { infra::transaction tr; unsigned int new_pc; bool interrupt; + unsigned int stores_sent = 0; }; struct fetch_bundle { @@ -57,6 +58,9 @@ struct decode_stage : public infra::sim { unsigned int pc; std::uint64_t icount; + unsigned int speculative_stores_sent = 0; + unsigned int stores_done = 0; + decode_stage(core &c); void clock(); @@ -77,6 +81,8 @@ struct exec_stage : public infra::sim { unsigned int gen = 0; + unsigned int stores_sent = 0; + unsigned int acc; unsigned int link; unsigned int mq; @@ -108,6 +114,7 @@ struct core { infra::port decode_mem_commandp; infra::port decode_mem_responsep; + infra::port decode_store_completep; infra::port decode_to_exec_instp; infra::port indir_instp; -- cgit v1.2.3