From 6b7d1cdc98475f2f21002e7de5a2c563215cb279 Mon Sep 17 00:00:00 2001 From: Julian Blake Kongslie Date: Sun, 15 Jan 2023 14:24:17 -0800 Subject: Reduce stalling due to stores by using a global counter. --- uarch/core.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'uarch/core.cpp') diff --git a/uarch/core.cpp b/uarch/core.cpp index 7304442..76af173 100644 --- a/uarch/core.cpp +++ b/uarch/core.cpp @@ -77,13 +77,7 @@ void decode_stage::clock() { speculative_stores_sent = r.stores_sent; } - if (c.decode_store_completep.can_read()) { - ++stores_done; - assert((int)(speculative_stores_sent - stores_done) >= 0); - c.decode_store_completep.discard(); - } - - if (speculative_stores_sent == stores_done && c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) { + if (c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) { auto b = c.fetch_bundlep.peek(); if (b.gen != c.gen) { @@ -106,6 +100,11 @@ void decode_stage::clock() { return; } + if (speculative_stores_sent != c.stores_done) { + pte(b.tr, "z"); + return; + } + inst_bundle i; i.tr = infra::pt::child(b.tr); @@ -190,7 +189,6 @@ void indir_stage::clock() { sr.mask.fill(false); sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; sr.write = true; - sr.responsep = &c.decode_store_completep; c.indir_mem_store_commandp.write(std::move(sr)); } else { pte(i.tr, "I"); @@ -238,9 +236,9 @@ void exec_stage::clock() { sr.mask.fill(false); sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; sr.write = true; - sr.responsep = &c.decode_store_completep; c.exec_mem_commandp.write(std::move(sr)); stores_sent += 2; // Original store sent by Indir stage plus unstore here + c.stores_done += 2; } c.decode_to_exec_instp.discard(); } else if (i.icount == c.icount) { @@ -261,9 +259,9 @@ void exec_stage::clock() { sr.mask.fill(false); sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; sr.write = true; - sr.responsep = &c.decode_store_completep; c.exec_mem_commandp.write(std::move(sr)); stores_sent += 2; // Original store sent by Indir stage plus unstore here + c.stores_done += 2; } c.indir_to_exec_instp.discard(); } else if (i.icount == c.icount) { @@ -305,8 +303,10 @@ void exec_stage::clock() { auto next_pc = inst.next_pc; - if (inst.need_autoinc_store) + if (inst.need_autoinc_store) { ++stores_sent; // It was sent by Indir stage + ++c.stores_done; + } if (inst.need_read_acc) inst.acc = acc; if (inst.need_read_link) @@ -338,9 +338,9 @@ void exec_stage::clock() { sr.mask.fill(false); sr.mask[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true; sr.write = true; - sr.responsep = &c.decode_store_completep; c.exec_mem_commandp.write(std::move(sr)); ++stores_sent; + ++c.stores_done; } assert(inst.next_pc == next_pc || inst.possibly_redirects); -- cgit v1.2.3