diff options
| author | Julian Blake Kongslie | 2023-01-15 14:02:54 -0800 |
|---|---|---|
| committer | Julian Blake Kongslie | 2023-01-15 15:04:07 -0800 |
| commit | 27a58c86ce494588a12023a12b027b7f44bb35fc (patch) | |
| tree | b793a60b97ab402d8438aba0800667d4151909fe | |
| parent | Enable parallel LTO linking. (diff) | |
| download | biggolf-27a58c86ce494588a12023a12b027b7f44bb35fc.tar.xz | |
Stall decode after an instruction with stores until the stores are done.
Diffstat (limited to '')
| -rw-r--r-- | Plan | 1 | ||||
| -rw-r--r-- | isa/decode.cpp | 6 | ||||
| -rw-r--r-- | isa/isa.h | 27 | ||||
| -rw-r--r-- | uarch/core.cpp | 84 | ||||
| -rw-r--r-- | uarch/core.h | 7 |
5 files changed, 95 insertions, 30 deletions
| @@ -10,6 +10,7 @@ vim: set sw=8 noet : | |||
| 10 | of depending on the caller | 10 | of depending on the caller |
| 11 | * Make it complete | 11 | * Make it complete |
| 12 | * Make it pretty | 12 | * Make it pretty |
| 13 | * Deadman timer | ||
| 13 | * D-side cache | 14 | * D-side cache |
| 14 | * Store forwarding | 15 | * Store forwarding |
| 15 | * Cache consistency between I and D side | 16 | * Cache consistency between I and D side |
diff --git a/isa/decode.cpp b/isa/decode.cpp index f0cdca8..abcb3e3 100644 --- a/isa/decode.cpp +++ b/isa/decode.cpp | |||
| @@ -88,15 +88,15 @@ instruction_context decode(std::uint_fast32_t flags, unsigned int pc, unsigned i | |||
| 88 | #pragma GCC diagnostic pop | 88 | #pragma GCC diagnostic pop |
| 89 | 89 | ||
| 90 | instruction_context inst; | 90 | instruction_context inst; |
| 91 | inst.bits = bits; | ||
| 92 | inst.next_pc = (pc & ~07777) | ((pc + 1) & 07777); | ||
| 93 | |||
| 94 | if (interrupt) { | 91 | if (interrupt) { |
| 95 | inst.bits = bits = 04000; | 92 | inst.bits = bits = 04000; |
| 96 | assert(df == 0); | 93 | assert(df == 0); |
| 97 | assert(ifb == 0); | 94 | assert(ifb == 0); |
| 98 | inst.next_pc = pc; | 95 | inst.next_pc = pc; |
| 99 | pc = 0; | 96 | pc = 0; |
| 97 | } else { | ||
| 98 | inst.bits = bits; | ||
| 99 | inst.next_pc = (pc & ~07777) | ((pc + 1) & 07777); | ||
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | switch (bits >> 9) { | 102 | switch (bits >> 9) { |
| @@ -57,6 +57,9 @@ static std::string opr_disasm_group2_neg[0366]; | |||
| 57 | static std::string opr_disasm_extended_arith[0376]; | 57 | static std::string opr_disasm_extended_arith[0376]; |
| 58 | 58 | ||
| 59 | struct instruction_context { | 59 | struct instruction_context { |
| 60 | // Known statically before decode time | ||
| 61 | unsigned int bits; | ||
| 62 | |||
| 60 | // Known statically at decode time | 63 | // Known statically at decode time |
| 61 | bool need_indirect_load = false; // final_address = mem[init_address] | 64 | bool need_indirect_load = false; // final_address = mem[init_address] |
| 62 | bool need_autoinc_store = false; // mem[init_address] += 1 | 65 | bool need_autoinc_store = false; // mem[init_address] += 1 |
| @@ -79,7 +82,6 @@ struct instruction_context { | |||
| 79 | void execute() { ef(*this); } | 82 | void execute() { ef(*this); } |
| 80 | 83 | ||
| 81 | // May change over the lifetime of the instruction execution | 84 | // May change over the lifetime of the instruction execution |
| 82 | unsigned int bits; | ||
| 83 | unsigned int next_pc; // includes IF | 85 | unsigned int next_pc; // includes IF |
| 84 | std::optional<unsigned int> init_address; // includes DF | 86 | std::optional<unsigned int> init_address; // includes DF |
| 85 | std::optional<unsigned int> final_address; // includes DF | 87 | std::optional<unsigned int> final_address; // includes DF |
| @@ -88,6 +90,29 @@ struct instruction_context { | |||
| 88 | std::optional<unsigned int> acc; | 90 | std::optional<unsigned int> acc; |
| 89 | std::optional<bool> link; | 91 | std::optional<bool> link; |
| 90 | std::optional<unsigned int> mq; | 92 | std::optional<unsigned int> mq; |
| 93 | |||
| 94 | // N.B. two "identical" instructions may compare unequal if they are at different points in their execution | ||
| 95 | bool operator==(const instruction_context &that) const { | ||
| 96 | if (bits != that.bits) | ||
| 97 | return false; | ||
| 98 | if (init_address != that.init_address) | ||
| 99 | return false; | ||
| 100 | if (final_address != that.final_address) | ||
| 101 | return false; | ||
| 102 | if (ctlval != that.ctlval) | ||
| 103 | return false; | ||
| 104 | if (data != that.data) | ||
| 105 | return false; | ||
| 106 | if (acc != that.acc) | ||
| 107 | return false; | ||
| 108 | if (link != that.link) | ||
| 109 | return false; | ||
| 110 | if (mq != that.mq) | ||
| 111 | return false; | ||
| 112 | if (next_pc != that.next_pc) | ||
| 113 | return false; | ||
| 114 | return true; | ||
| 115 | } | ||
| 91 | }; | 116 | }; |
| 92 | 117 | ||
| 93 | void init_disasm_tables(); | 118 | void init_disasm_tables(); |
diff --git a/uarch/core.cpp b/uarch/core.cpp index 12f8b5b..7304442 100644 --- a/uarch/core.cpp +++ b/uarch/core.cpp | |||
| @@ -74,9 +74,16 @@ void decode_stage::clock() { | |||
| 74 | pc = r.new_pc; | 74 | pc = r.new_pc; |
| 75 | interrupt |= r.interrupt; | 75 | interrupt |= r.interrupt; |
| 76 | icount = c.icount; | 76 | icount = c.icount; |
| 77 | speculative_stores_sent = r.stores_sent; | ||
| 77 | } | 78 | } |
| 78 | 79 | ||
| 79 | if (c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) { | 80 | if (c.decode_store_completep.can_read()) { |
| 81 | ++stores_done; | ||
| 82 | assert((int)(speculative_stores_sent - stores_done) >= 0); | ||
| 83 | c.decode_store_completep.discard(); | ||
| 84 | } | ||
| 85 | |||
| 86 | if (speculative_stores_sent == stores_done && c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) { | ||
| 80 | auto b = c.fetch_bundlep.peek(); | 87 | auto b = c.fetch_bundlep.peek(); |
| 81 | 88 | ||
| 82 | if (b.gen != c.gen) { | 89 | if (b.gen != c.gen) { |
| @@ -118,6 +125,9 @@ void decode_stage::clock() { | |||
| 118 | 125 | ||
| 119 | pc = i.inst.next_pc; | 126 | pc = i.inst.next_pc; |
| 120 | 127 | ||
| 128 | speculative_stores_sent += i.inst.need_autoinc_store; | ||
| 129 | speculative_stores_sent += i.inst.need_exec_store; | ||
| 130 | |||
| 121 | if (i.inst.need_indirect_load) { | 131 | if (i.inst.need_indirect_load) { |
| 122 | memory::dram::command fr; | 132 | memory::dram::command fr; |
| 123 | fr.transaction = i.tr; | 133 | fr.transaction = i.tr; |
| @@ -180,6 +190,7 @@ void indir_stage::clock() { | |||
| 180 | sr.mask.fill(false); | 190 | sr.mask.fill(false); |
| 181 | sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; | 191 | sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; |
| 182 | sr.write = true; | 192 | sr.write = true; |
| 193 | sr.responsep = &c.decode_store_completep; | ||
| 183 | c.indir_mem_store_commandp.write(std::move(sr)); | 194 | c.indir_mem_store_commandp.write(std::move(sr)); |
| 184 | } else { | 195 | } else { |
| 185 | pte(i.tr, "I"); | 196 | pte(i.tr, "I"); |
| @@ -207,6 +218,7 @@ void exec_stage::clock() { | |||
| 207 | c.restarto.reset(); | 218 | c.restarto.reset(); |
| 208 | 219 | ||
| 209 | std::optional<infra::transaction> restarttr; | 220 | std::optional<infra::transaction> restarttr; |
| 221 | std::optional<instruction_context> insto; | ||
| 210 | 222 | ||
| 211 | bool progress = ctlregs[HALTED]; | 223 | bool progress = ctlregs[HALTED]; |
| 212 | 224 | ||
| @@ -226,7 +238,9 @@ void exec_stage::clock() { | |||
| 226 | sr.mask.fill(false); | 238 | sr.mask.fill(false); |
| 227 | sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; | 239 | sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; |
| 228 | sr.write = true; | 240 | sr.write = true; |
| 241 | sr.responsep = &c.decode_store_completep; | ||
| 229 | c.exec_mem_commandp.write(std::move(sr)); | 242 | c.exec_mem_commandp.write(std::move(sr)); |
| 243 | stores_sent += 2; // Original store sent by Indir stage plus unstore here | ||
| 230 | } | 244 | } |
| 231 | c.decode_to_exec_instp.discard(); | 245 | c.decode_to_exec_instp.discard(); |
| 232 | } else if (i.icount == c.icount) { | 246 | } else if (i.icount == c.icount) { |
| @@ -247,7 +261,9 @@ void exec_stage::clock() { | |||
| 247 | sr.mask.fill(false); | 261 | sr.mask.fill(false); |
| 248 | sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; | 262 | sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; |
| 249 | sr.write = true; | 263 | sr.write = true; |
| 264 | sr.responsep = &c.decode_store_completep; | ||
| 250 | c.exec_mem_commandp.write(std::move(sr)); | 265 | c.exec_mem_commandp.write(std::move(sr)); |
| 266 | stores_sent += 2; // Original store sent by Indir stage plus unstore here | ||
| 251 | } | 267 | } |
| 252 | c.indir_to_exec_instp.discard(); | 268 | c.indir_to_exec_instp.discard(); |
| 253 | } else if (i.icount == c.icount) { | 269 | } else if (i.icount == c.icount) { |
| @@ -277,49 +293,58 @@ void exec_stage::clock() { | |||
| 277 | pte(i.tr, "", fmt::format("loaddata={:04o}", *i.inst.data)); | 293 | pte(i.tr, "", fmt::format("loaddata={:04o}", *i.inst.data)); |
| 278 | } | 294 | } |
| 279 | 295 | ||
| 296 | // We are committed to complete execution of this instruction this cycle. | ||
| 297 | |||
| 298 | insto = std::move(i.inst); | ||
| 299 | auto &inst = *insto; | ||
| 300 | |||
| 280 | pte(i.tr, "E"); | 301 | pte(i.tr, "E"); |
| 281 | progress = true; | 302 | progress = true; |
| 282 | 303 | ||
| 283 | assert(i.pc == pc); | 304 | assert(i.pc == pc); |
| 284 | 305 | ||
| 285 | auto next_pc = i.inst.next_pc; | 306 | auto next_pc = inst.next_pc; |
| 286 | 307 | ||
| 287 | if (i.inst.need_read_acc) | 308 | if (inst.need_autoinc_store) |
| 288 | i.inst.acc = acc; | 309 | ++stores_sent; // It was sent by Indir stage |
| 289 | if (i.inst.need_read_link) | 310 | if (inst.need_read_acc) |
| 290 | i.inst.link = link; | 311 | inst.acc = acc; |
| 291 | if (i.inst.need_read_mq) | 312 | if (inst.need_read_link) |
| 292 | i.inst.mq = mq; | 313 | inst.link = link; |
| 293 | if (i.inst.read_ctlreg.has_value()) | 314 | if (inst.need_read_mq) |
| 294 | i.inst.ctlval = ctlregs[*i.inst.read_ctlreg]; | 315 | inst.mq = mq; |
| 316 | if (inst.read_ctlreg.has_value()) | ||
| 317 | inst.ctlval = ctlregs[*inst.read_ctlreg]; | ||
| 295 | 318 | ||
| 296 | i.inst.execute(); | 319 | inst.execute(); |
| 297 | 320 | ||
| 298 | if (i.inst.need_write_acc) | 321 | if (inst.need_write_acc) |
| 299 | acc = i.inst.acc.value(); | 322 | acc = inst.acc.value(); |
| 300 | if (i.inst.need_write_link) | 323 | if (inst.need_write_link) |
| 301 | link = i.inst.link.value(); | 324 | link = inst.link.value(); |
| 302 | if (i.inst.need_write_mq) | 325 | if (inst.need_write_mq) |
| 303 | mq = i.inst.mq.value(); | 326 | mq = inst.mq.value(); |
| 304 | if (i.inst.write_ctlreg.has_value()) { | 327 | if (inst.write_ctlreg.has_value()) { |
| 305 | ctlregs[*i.inst.write_ctlreg] = i.inst.ctlval.value(); | 328 | ctlregs[*inst.write_ctlreg] = inst.ctlval.value(); |
| 306 | restarttr = i.tr; | 329 | restarttr = i.tr; |
| 307 | } | 330 | } |
| 308 | 331 | ||
| 309 | if (i.inst.need_exec_store) { | 332 | if (inst.need_exec_store) { |
| 310 | pte(i.tr, "", fmt::format("store={:05o} storedata={:04o}", *i.inst.final_address, *i.inst.data)); | 333 | pte(i.tr, "", fmt::format("store={:05o} storedata={:04o}", *inst.final_address, *inst.data)); |
| 311 | memory::dram::command sr; | 334 | memory::dram::command sr; |
| 312 | sr.transaction = i.tr; | 335 | sr.transaction = i.tr; |
| 313 | sr.line_address = *i.inst.final_address >> memory::LINE_BYTES_LOG2; | 336 | sr.line_address = *inst.final_address >> memory::LINE_BYTES_LOG2; |
| 314 | sr.data[*i.inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = *i.inst.data; | 337 | sr.data[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = *inst.data; |
| 315 | sr.mask.fill(false); | 338 | sr.mask.fill(false); |
| 316 | sr.mask[*i.inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true; | 339 | sr.mask[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true; |
| 317 | sr.write = true; | 340 | sr.write = true; |
| 341 | sr.responsep = &c.decode_store_completep; | ||
| 318 | c.exec_mem_commandp.write(std::move(sr)); | 342 | c.exec_mem_commandp.write(std::move(sr)); |
| 343 | ++stores_sent; | ||
| 319 | } | 344 | } |
| 320 | 345 | ||
| 321 | assert(i.inst.next_pc == next_pc || i.inst.possibly_redirects); | 346 | assert(inst.next_pc == next_pc || inst.possibly_redirects); |
| 322 | pc = i.inst.next_pc; | 347 | pc = inst.next_pc; |
| 323 | 348 | ||
| 324 | if (pc != next_pc) { | 349 | if (pc != next_pc) { |
| 325 | pte(i.tr, "", fmt::format("jump={:05o}", pc)); | 350 | pte(i.tr, "", fmt::format("jump={:05o}", pc)); |
| @@ -345,6 +370,7 @@ bail_out: | |||
| 345 | r.tr = *restarttr; | 370 | r.tr = *restarttr; |
| 346 | r.new_pc = pc; | 371 | r.new_pc = pc; |
| 347 | r.interrupt = interrupt; | 372 | r.interrupt = interrupt; |
| 373 | r.stores_sent = stores_sent; | ||
| 348 | gen = ++c.gen; | 374 | gen = ++c.gen; |
| 349 | c.restarto = std::move(r); | 375 | c.restarto = std::move(r); |
| 350 | } | 376 | } |
| @@ -356,6 +382,12 @@ bail_out: | |||
| 356 | assert(c.checker.icount == c.icount); | 382 | assert(c.checker.icount == c.icount); |
| 357 | // std::cerr << fmt::format("icount={:} pc={:05o} checkerpc={:05o}\n", c.icount, pc, c.checker.pc); | 383 | // std::cerr << fmt::format("icount={:} pc={:05o} checkerpc={:05o}\n", c.icount, pc, c.checker.pc); |
| 358 | assert(pc == c.checker.pc); | 384 | assert(pc == c.checker.pc); |
| 385 | |||
| 386 | if (insto.has_value()) { | ||
| 387 | auto &inst = *insto; | ||
| 388 | assert(inst == c.checker.inst); | ||
| 389 | } | ||
| 390 | |||
| 359 | assert(acc == c.checker.acc); | 391 | assert(acc == c.checker.acc); |
| 360 | assert(link == c.checker.link); | 392 | assert(link == c.checker.link); |
| 361 | assert(mq == c.checker.mq); | 393 | assert(mq == c.checker.mq); |
diff --git a/uarch/core.h b/uarch/core.h index b53a205..21725b3 100644 --- a/uarch/core.h +++ b/uarch/core.h | |||
| @@ -17,6 +17,7 @@ struct restart { | |||
| 17 | infra::transaction tr; | 17 | infra::transaction tr; |
| 18 | unsigned int new_pc; | 18 | unsigned int new_pc; |
| 19 | bool interrupt; | 19 | bool interrupt; |
| 20 | unsigned int stores_sent = 0; | ||
| 20 | }; | 21 | }; |
| 21 | 22 | ||
| 22 | struct fetch_bundle { | 23 | struct fetch_bundle { |
| @@ -57,6 +58,9 @@ struct decode_stage : public infra::sim { | |||
| 57 | unsigned int pc; | 58 | unsigned int pc; |
| 58 | std::uint64_t icount; | 59 | std::uint64_t icount; |
| 59 | 60 | ||
| 61 | unsigned int speculative_stores_sent = 0; | ||
| 62 | unsigned int stores_done = 0; | ||
| 63 | |||
| 60 | decode_stage(core &c); | 64 | decode_stage(core &c); |
| 61 | 65 | ||
| 62 | void clock(); | 66 | void clock(); |
| @@ -77,6 +81,8 @@ struct exec_stage : public infra::sim { | |||
| 77 | 81 | ||
| 78 | unsigned int gen = 0; | 82 | unsigned int gen = 0; |
| 79 | 83 | ||
| 84 | unsigned int stores_sent = 0; | ||
| 85 | |||
| 80 | unsigned int acc; | 86 | unsigned int acc; |
| 81 | unsigned int link; | 87 | unsigned int link; |
| 82 | unsigned int mq; | 88 | unsigned int mq; |
| @@ -108,6 +114,7 @@ struct core { | |||
| 108 | 114 | ||
| 109 | infra::port<memory::dram::command> decode_mem_commandp; | 115 | infra::port<memory::dram::command> decode_mem_commandp; |
| 110 | infra::port<memory::dram::response> decode_mem_responsep; | 116 | infra::port<memory::dram::response> decode_mem_responsep; |
| 117 | infra::port<memory::dram::response> decode_store_completep; | ||
| 111 | infra::port<inst_bundle> decode_to_exec_instp; | 118 | infra::port<inst_bundle> decode_to_exec_instp; |
| 112 | 119 | ||
| 113 | infra::port<inst_bundle> indir_instp; | 120 | infra::port<inst_bundle> indir_instp; |
