From fdb61389099cee41b86c25c3d1ddb660b509e5fa Mon Sep 17 00:00:00 2001 From: Julian Blake Kongslie Date: Sat, 19 Nov 2022 16:29:27 -0800 Subject: Pipelined microarchitecture, which even almost works! Includes such horrors as the "unstore" operation for undoing autoincremented addresses on instructions subsequently found to be bogus. --- BigGolf | 10 +- evts/quickfoobar.evt | 12 ++ isa/decode.cpp | 4 +- nanosim | 2 +- uarch/core.cpp | 348 ++++++++++++++++++++++++++++++++++++++++----------- uarch/core.h | 74 ++++++++--- 6 files changed, 354 insertions(+), 96 deletions(-) create mode 100644 evts/quickfoobar.evt diff --git a/BigGolf b/BigGolf index a358ed8..4e0e9d2 100644 --- a/BigGolf +++ b/BigGolf @@ -26,14 +26,16 @@ Memory transactions: Opcodes that do it: (second set is the indirect versi + -----------------> +Fetch -> Decode -> Indir -> Exec + + ┌─────┐ ┌──────┐ ┌────┐ │Fetch├──────►│Decode│ ┌─►│Exec│ └─────┘ └──────┘ │ └────┘ │ next_pc ┌───init_indirect_load │ init_execution_store │ init_execution_load───┤ retire - │ init_execution_store │ - │ retire │ │ rubberband_stall(1/2) │ │ │ │ ┌───────┐ │ @@ -42,16 +44,12 @@ Memory transactions: Opcodes that do it: (second set is the indirect versi │ ┌───init_autoinc_store │ │ init_execution_load───┤ - │ init_execution_store │ - │ retire │ │ │ │ ┌─────┐ │ └─►│Indir│ │ └─────┘ │ │ init_execution_load───┘ - init_execution_store - retire diff --git a/evts/quickfoobar.evt b/evts/quickfoobar.evt new file mode 100644 index 0000000..a89f0d5 --- /dev/null +++ b/evts/quickfoobar.evt @@ -0,0 +1,12 @@ ++10 +$foo +TT_BITS=0x0A01/0xFF01 # kbd in '\n' ++100 +HALTED=1 ++1000 +HALTED=0 ++10 +$bar +TT_BITS=0x0A01/0xFF01 # kbd in '\n' ++100 +HALTED=1 diff --git a/isa/decode.cpp b/isa/decode.cpp index 061fe5c..f0cdca8 100644 --- a/isa/decode.cpp +++ b/isa/decode.cpp @@ -92,7 +92,7 @@ instruction_context decode(std::uint_fast32_t flags, unsigned int pc, unsigned i inst.next_pc = (pc & ~07777) | ((pc + 1) & 07777); if (interrupt) { - bits = 04000; + inst.bits = bits = 04000; assert(df == 0); assert(ifb == 0); inst.next_pc = pc; @@ -146,7 +146,7 @@ instruction_context decode(std::uint_fast32_t flags, unsigned int pc, unsigned i case 4: // JMS jump subroutine inst.need_exec_store = true; inst.possibly_redirects = true; - inst.df = "JMS %a"; + inst.df = interrupt ? " JMS %a" : "JMS %a"; inst.ef = [ifb](auto &ctx) { ctx.data = ctx.next_pc; ctx.next_pc = (ifb << 12) | ((ctx.final_address.value() + 1) & 07777); diff --git a/nanosim b/nanosim index db82579..bab124c 160000 --- a/nanosim +++ b/nanosim @@ -1 +1 @@ -Subproject commit db82579d3c023c441c895d26d32de3fa039eafa4 +Subproject commit bab124c8c00053eebd24329b95f0554440a5004e diff --git a/uarch/core.cpp b/uarch/core.cpp index 8b2db9b..12f8b5b 100644 --- a/uarch/core.cpp +++ b/uarch/core.cpp @@ -9,24 +9,31 @@ fetch_stage::fetch_stage(core &c) { } decode_stage::decode_stage(core &c) + : c(c) + , pc(c.checker.pc) + , icount(c.checker.icount) +{ } + +indir_stage::indir_stage(core &c) + : c(c) +{ } + +exec_stage::exec_stage(core &c) : c(c) , acc(c.checker.acc) , link(c.checker.link) , mq(c.checker.mq) , pc(c.checker.pc) , ctlregs(c.checker.ctlregs) - , icount(c.checker.icount) { } void fetch_stage::clock() { - if (c.fetch_restarto.has_value()) { - auto &r = *c.fetch_restarto; - pte(r.tr, "!"); - gen = r.new_gen; + if (c.restarto.has_value()) { + auto &r = *c.restarto; + pte(r.tr, "!", fmt::format("newpc={:05o}", r.new_pc)); pc = r.new_pc; didrestart = true; outstandingfill = false; - c.fetch_restarto.reset(); } if (c.fetch_mem_responsep.can_read()) { @@ -38,11 +45,11 @@ void fetch_stage::clock() { fetch_bundle b; if (auto t = cache.fetchline(b.data, pc); t.has_value()) { b.tr = infra::pt::toplevel(); - b.gen = gen; + b.gen = c.gen; b.pc = pc; if (didrestart) infra::pt::event(b.tr, ">", now-1, ""); - pte(b.tr, "F", fmt::format("pc={:05o}", b.pc)); + pte(b.tr, "F"); c.fetch_bundlep.write(std::move(b)); pc = (pc & 070000) | (((pc & ~memory::LINE_BYTE_OFFSET_MASK) + memory::LINE_BYTES) & 007777); didrestart = false; @@ -50,114 +57,309 @@ void fetch_stage::clock() { } } - if (!outstandingfill && c./*fetch_*/mem_commandp.can_write() && !cache.probe(pc)) { + if (!outstandingfill && c.fetch_mem_commandp.can_write() && !cache.probe(pc)) { memory::dram::command fr; fr.transaction = infra::pt::toplevel(); + pte(fr.transaction, "p", fmt::format("fpc={:05o}", pc)); fr.line_address = pc >> memory::LINE_BYTES_LOG2; fr.responsep = &c.fetch_mem_responsep; - c./*fetch_*/mem_commandp.write(std::move(fr)); + c.fetch_mem_commandp.write(std::move(fr)); outstandingfill = true; } } void decode_stage::clock() { - bool progress = ctlregs[HALTED]; + if (c.restarto.has_value()) { + auto &r = *c.restarto; + pc = r.new_pc; + interrupt |= r.interrupt; + icount = c.icount; + } - if (!ctlregs[HALTED] && c.fetch_bundlep.can_read()) { + if (c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) { auto b = c.fetch_bundlep.peek(); - if (b.gen != gen) - goto bail_out; + if (b.gen != c.gen) { + pte(b.tr, "~"); + c.fetch_bundlep.discard(); + return; + } if ((b.pc >> memory::LINE_BYTES_LOG2) != (pc >> memory::LINE_BYTES_LOG2)) { + if (!c.restarto.has_value()) { + restart r; + r.tr = b.tr; + r.new_pc = pc; + r.interrupt = false; + ++c.gen; + c.restarto = std::move(r); + } pte(b.tr, "~"); - fetch_restart r; - r.tr = b.tr; - r.new_gen = ++gen; - r.new_pc = pc; - c.fetch_restarto = std::move(r); c.fetch_bundlep.discard(); - goto bail_out; + return; } - progress = true; + inst_bundle i; - auto tr = infra::pt::child(b.tr); + i.tr = infra::pt::child(b.tr); + i.gen = c.gen; + i.pc = pc; + i.icount = icount++; - pte(tr, "E"); + pte(i.tr, "D"); - inst = decode(ctlregs[FLAGS], - pc, - b.data[pc & memory::LINE_BYTE_OFFSET_MASK], - interrupt); - auto next_pc = inst.next_pc; + i.inst = decode(c.exec.ctlregs[FLAGS], + pc, + b.data[pc & memory::LINE_BYTE_OFFSET_MASK], + interrupt); + interrupt = false; + + pte(i.tr, "", fmt::format("{:05o}: {}", pc, i.inst.disasm())); + + pc = i.inst.next_pc; + + if (i.inst.need_indirect_load) { + memory::dram::command fr; + fr.transaction = i.tr; + fr.line_address = *i.inst.init_address >> memory::LINE_BYTES_LOG2; + fr.responsep = &c.indir_mem_responsep; + pte(i.tr, "", fmt::format("iload={:05o}", *i.inst.init_address)); + c.decode_mem_commandp.write(std::move(fr)); + c.indir_instp.write(std::move(i)); + } else { + if (i.inst.need_exec_load) { + memory::dram::command fr; + fr.transaction = i.tr; + fr.line_address = *i.inst.final_address >> memory::LINE_BYTES_LOG2; + fr.responsep = &c.exec_mem_responsep; + pte(i.tr, "", fmt::format("load={:05o}", *i.inst.final_address)); + c.decode_mem_commandp.write(std::move(fr)); + } + c.decode_to_exec_instp.write(std::move(i)); + } + + if ((b.pc >> memory::LINE_BYTES_LOG2) != (pc >> memory::LINE_BYTES_LOG2)) { + pte(b.tr, "~"); + c.fetch_bundlep.discard(); + } + } +} + +void indir_stage::clock() { + if (c.restarto.has_value()) { + gen = c.gen; + } + + if (c.indir_instp.can_read() && c.indir_mem_load_commandp.can_write() && c.indir_mem_store_commandp.can_write() && c.indir_to_exec_instp.can_write()) { + auto &i = c.indir_instp.peek(); + + if (i.gen != gen && i.gen != c.gen) { + pte(i.tr, "~"); + c.indir_instp.discard(); + return; + } else if (i.gen != gen) { + gen = c.gen; + assert(i.gen == gen); + } - if (inst.need_indirect_load) { - auto addr = c.mem.fetch(inst.init_address.value()); - if (inst.need_autoinc_store) { + if (i.inst.need_indirect_load && !c.indir_mem_responsep.can_read()) + return; + + if (i.inst.need_indirect_load) { + auto l = c.indir_mem_responsep.read(); + if (l.line_address != i.inst.init_address.value() >> memory::LINE_BYTES_LOG2) + return; + auto addr = l.data[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK]; + if (i.inst.need_autoinc_store) { addr = (addr + 1) & 07777; - c.mem.store(*inst.init_address, addr); + pte(i.tr, "+", fmt::format("istore={:05o} istoredata={:04o}", *i.inst.init_address, addr)); + memory::dram::command sr; + sr.transaction = i.tr; + sr.line_address = *i.inst.init_address >> memory::LINE_BYTES_LOG2; + sr.data[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = addr; + sr.mask.fill(false); + sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; + sr.write = true; + c.indir_mem_store_commandp.write(std::move(sr)); + } else { + pte(i.tr, "I"); } - auto df = (ctlregs[FLAGS] & FLAG_DF) >> FLAG_DF_SHIFT; - inst.final_address = (df << 12) | addr; - } else { - assert(!inst.need_autoinc_store); + auto df = (c.exec.ctlregs[FLAGS] & FLAG_DF) >> FLAG_DF_SHIFT; + i.inst.final_address = (unsigned int)((df << 12) | addr); } - pte(tr, "", inst.disasm()); + if (i.inst.need_exec_load) { + memory::dram::command fr; + fr.transaction = i.tr; + fr.line_address = *i.inst.final_address >> memory::LINE_BYTES_LOG2; + fr.responsep = &c.exec_mem_responsep; + pte(i.tr, "", fmt::format("load={:05o}", *i.inst.final_address)); + c.indir_mem_load_commandp.write(std::move(fr)); + } - if (inst.need_exec_load) - inst.data = c.mem.fetch(inst.final_address.value()); + c.indir_to_exec_instp.write(std::move(i)); - if (inst.need_read_acc) - inst.acc = acc; - if (inst.need_read_link) - inst.link = link; - if (inst.need_read_mq) - inst.mq = mq; - if (inst.read_ctlreg.has_value()) - inst.ctlval = ctlregs[*inst.read_ctlreg]; + c.indir_instp.discard(); + } +} + +void exec_stage::clock() { + c.restarto.reset(); + + std::optional restarttr; + + bool progress = ctlregs[HALTED]; - inst.execute(); + if (!ctlregs[HALTED] && (c.decode_to_exec_instp.can_read() || c.indir_to_exec_instp.can_read()) && c.exec_mem_commandp.can_write()) { + infra::port *instp = nullptr; + if (c.decode_to_exec_instp.can_read()) { + auto &i = c.decode_to_exec_instp.peek(); + if (i.gen != gen && i.gen != c.gen) { + pte(i.tr, "~"); + if (i.inst.need_autoinc_store) { + auto addr = (*i.inst.final_address - 1) & 07777; + pte(i.tr, "", fmt::format("unstore={:05o}, unstoredata={:04o}", *i.inst.init_address, addr)); + memory::dram::command sr; + sr.transaction = i.tr; + sr.line_address = *i.inst.init_address >> memory::LINE_BYTES_LOG2; + sr.data[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = addr; + sr.mask.fill(false); + sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; + sr.write = true; + c.exec_mem_commandp.write(std::move(sr)); + } + c.decode_to_exec_instp.discard(); + } else if (i.icount == c.icount) { + instp = &c.decode_to_exec_instp; + } + } + if (c.indir_to_exec_instp.can_read()) { + auto &i = c.indir_to_exec_instp.peek(); + if (i.gen != gen && i.gen != c.gen) { + pte(i.tr, "~"); + if (i.inst.need_autoinc_store) { + auto addr = (*i.inst.final_address - 1) & 07777; + pte(i.tr, "", fmt::format("unstore={:05o}, unstoredata={:04o}", *i.inst.init_address, addr)); + memory::dram::command sr; + sr.transaction = i.tr; + sr.line_address = *i.inst.init_address >> memory::LINE_BYTES_LOG2; + sr.data[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = addr; + sr.mask.fill(false); + sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; + sr.write = true; + c.exec_mem_commandp.write(std::move(sr)); + } + c.indir_to_exec_instp.discard(); + } else if (i.icount == c.icount) { + instp = &c.indir_to_exec_instp; + } + } + if (!instp) + goto bail_out; + if (!c.exec_mem_commandp.can_write()) + goto bail_out; + auto &i = instp->peek(); - if (inst.need_write_acc) - acc = inst.acc.value(); - if (inst.need_write_link) - link = inst.link.value(); - if (inst.need_write_mq) - mq = inst.mq.value(); - if (inst.write_ctlreg.has_value()) - ctlregs[*inst.write_ctlreg] = inst.ctlval.value(); + assert(i.gen == gen || i.gen == c.gen); + if (i.gen != gen) { + gen = c.gen; + assert(i.gen == gen); + } - if (inst.need_exec_store) - c.mem.store(inst.final_address.value(), inst.data.value()); + if (i.inst.need_exec_load && !c.exec_mem_responsep.can_read()) + return; - assert(inst.next_pc == next_pc || inst.possibly_redirects); - pc = inst.next_pc; + if (i.inst.need_exec_load) { + auto l = c.exec_mem_responsep.read(); + if (l.line_address != i.inst.final_address.value() >> memory::LINE_BYTES_LOG2) + return; + i.inst.data = l.data[*i.inst.final_address & memory::LINE_BYTE_OFFSET_MASK]; + pte(i.tr, "", fmt::format("loaddata={:04o}", *i.inst.data)); + } - if ((b.pc >> memory::LINE_BYTES_LOG2) != (pc >> memory::LINE_BYTES_LOG2)) - c.fetch_bundlep.discard(); + pte(i.tr, "E"); + progress = true; + + assert(i.pc == pc); + + auto next_pc = i.inst.next_pc; + + if (i.inst.need_read_acc) + i.inst.acc = acc; + if (i.inst.need_read_link) + i.inst.link = link; + if (i.inst.need_read_mq) + i.inst.mq = mq; + if (i.inst.read_ctlreg.has_value()) + i.inst.ctlval = ctlregs[*i.inst.read_ctlreg]; + + i.inst.execute(); + + if (i.inst.need_write_acc) + acc = i.inst.acc.value(); + if (i.inst.need_write_link) + link = i.inst.link.value(); + if (i.inst.need_write_mq) + mq = i.inst.mq.value(); + if (i.inst.write_ctlreg.has_value()) { + ctlregs[*i.inst.write_ctlreg] = i.inst.ctlval.value(); + restarttr = i.tr; + } + + if (i.inst.need_exec_store) { + pte(i.tr, "", fmt::format("store={:05o} storedata={:04o}", *i.inst.final_address, *i.inst.data)); + memory::dram::command sr; + sr.transaction = i.tr; + sr.line_address = *i.inst.final_address >> memory::LINE_BYTES_LOG2; + sr.data[*i.inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = *i.inst.data; + sr.mask.fill(false); + sr.mask[*i.inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true; + sr.write = true; + c.exec_mem_commandp.write(std::move(sr)); + } + + assert(i.inst.next_pc == next_pc || i.inst.possibly_redirects); + pc = i.inst.next_pc; + + if (pc != next_pc) { + pte(i.tr, "", fmt::format("jump={:05o}", pc)); + restarttr = i.tr; + } + + instp->discard(); } + bail_out: + bool interrupt = false; - if (progress) - interrupt = c.system.interact(icount++, ctlregs); + if (progress) { + auto oldctlregs = ctlregs; + interrupt = c.system.interact(c.icount++, ctlregs); + if (interrupt || oldctlregs != ctlregs) + if (!restarttr.has_value()) + restarttr = infra::pt::toplevel(); + } + + if (restarttr.has_value()) { + restart r; + r.tr = *restarttr; + r.new_pc = pc; + r.interrupt = interrupt; + gen = ++c.gen; + c.restarto = std::move(r); + } - if (c.checker.icount != icount) { - assert(c.checker.icount + 1 == icount); + if (c.checker.icount != c.icount) { + assert(c.checker.icount + 1 == c.icount); c.checker.execute(); - assert(c.checker.icount == icount); -// std::cerr << fmt::format("icount={:} pc={:05o} checkerpc={:05o}\n", icount, pc, c.checker.pc); + assert(c.checker.icount == c.icount); +// std::cerr << fmt::format("icount={:} pc={:05o} checkerpc={:05o}\n", c.icount, pc, c.checker.pc); assert(pc == c.checker.pc); assert(acc == c.checker.acc); assert(link == c.checker.link); assert(mq == c.checker.mq); assert(ctlregs == c.checker.ctlregs); - if (inst.init_address.has_value()) - assert(c.mem.fetch(*inst.init_address) == c.checker.mem.fetch(*inst.init_address)); - if (inst.final_address.has_value()) - assert(c.mem.fetch(*inst.final_address) == c.checker.mem.fetch(*inst.final_address)); - assert(c.mem.fetch(pc) == c.checker.mem.fetch(pc)); } + } diff --git a/uarch/core.h b/uarch/core.h index a6772f3..b53a205 100644 --- a/uarch/core.h +++ b/uarch/core.h @@ -13,6 +13,12 @@ struct core; +struct restart { + infra::transaction tr; + unsigned int new_pc; + bool interrupt; +}; + struct fetch_bundle { infra::transaction tr; unsigned int gen; @@ -20,10 +26,12 @@ struct fetch_bundle { memory::line data; }; -struct fetch_restart { +struct inst_bundle { infra::transaction tr; - unsigned int new_gen; - unsigned int new_pc; + unsigned int gen; + unsigned int pc; + std::uint64_t icount; + instruction_context inst; }; struct fetch_stage : public infra::sim { @@ -31,8 +39,8 @@ struct fetch_stage : public infra::sim { memory::inline_cache<8, 2> cache; - unsigned int gen = 0; unsigned int pc; + bool didrestart = false; bool outstandingfill = false; @@ -44,20 +52,38 @@ struct fetch_stage : public infra::sim { struct decode_stage : public infra::sim { core &c; + bool interrupt = false; + + unsigned int pc; + std::uint64_t icount; + + decode_stage(core &c); + + void clock(); +}; + +struct indir_stage : public infra::sim { + core &c; + unsigned int gen = 0; - bool interrupt = false; + indir_stage(core &c); + + void clock(); +}; + +struct exec_stage : public infra::sim { + core &c; + + unsigned int gen = 0; unsigned int acc; unsigned int link; unsigned int mq; unsigned int pc; std::array ctlregs; - std::uint64_t icount; - instruction_context inst; - bool outstandingfill = false; - decode_stage(core &c); + exec_stage(core &c); void clock(); }; @@ -66,30 +92,50 @@ struct core { iomodel &system; funcchecker checker; - memory::dram mem{12}; + std::optional restarto; + unsigned int gen = 0; + + std::uint64_t icount; + + memory::dram mem{0}; infra::port mem_commandp; - infra::priority_arbiter mem_command_arb; + infra::priority_arbiter mem_command_arb; infra::port fetch_mem_commandp; infra::port fetch_mem_responsep; infra::port fetch_bundlep; - std::optional fetch_restarto; infra::port decode_mem_commandp; infra::port decode_mem_responsep; + infra::port decode_to_exec_instp; + + infra::port indir_instp; + infra::port indir_mem_load_commandp; + infra::port indir_mem_store_commandp; + infra::port indir_mem_responsep; + infra::port indir_to_exec_instp; + + infra::port exec_mem_commandp; + infra::port exec_mem_responsep; // Construction order is execution order within a cycle, so this list should be back-to-front (for zero-cycle restarts) + exec_stage exec{*this}; + indir_stage indir{*this}; decode_stage decode{*this}; fetch_stage fetch{*this}; core(iomodel &model) : system(model) , checker(model) + , icount(checker.icount) { mem.commandp = &mem_commandp; mem_command_arb.outp = &mem_commandp; - mem_command_arb.peerp[0] = &decode_mem_commandp; - mem_command_arb.peerp[1] = &fetch_mem_commandp; + mem_command_arb.peerp[0] = &exec_mem_commandp; + mem_command_arb.peerp[1] = &indir_mem_store_commandp; + mem_command_arb.peerp[2] = &indir_mem_load_commandp; + mem_command_arb.peerp[3] = &decode_mem_commandp; + mem_command_arb.peerp[4] = &fetch_mem_commandp; } }; -- cgit v1.2.3