summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Plan1
-rw-r--r--isa/decode.cpp6
-rw-r--r--isa/isa.h27
-rw-r--r--uarch/core.cpp84
-rw-r--r--uarch/core.h7
5 files changed, 95 insertions, 30 deletions
diff --git a/Plan b/Plan
index c7e833c..55dc73e 100644
--- a/Plan
+++ b/Plan
@@ -10,6 +10,7 @@ vim: set sw=8 noet :
10 of depending on the caller 10 of depending on the caller
11 * Make it complete 11 * Make it complete
12 * Make it pretty 12 * Make it pretty
13 * Deadman timer
13 * D-side cache 14 * D-side cache
14 * Store forwarding 15 * Store forwarding
15 * Cache consistency between I and D side 16 * Cache consistency between I and D side
diff --git a/isa/decode.cpp b/isa/decode.cpp
index f0cdca8..abcb3e3 100644
--- a/isa/decode.cpp
+++ b/isa/decode.cpp
@@ -88,15 +88,15 @@ instruction_context decode(std::uint_fast32_t flags, unsigned int pc, unsigned i
88#pragma GCC diagnostic pop 88#pragma GCC diagnostic pop
89 89
90 instruction_context inst; 90 instruction_context inst;
91 inst.bits = bits;
92 inst.next_pc = (pc & ~07777) | ((pc + 1) & 07777);
93
94 if (interrupt) { 91 if (interrupt) {
95 inst.bits = bits = 04000; 92 inst.bits = bits = 04000;
96 assert(df == 0); 93 assert(df == 0);
97 assert(ifb == 0); 94 assert(ifb == 0);
98 inst.next_pc = pc; 95 inst.next_pc = pc;
99 pc = 0; 96 pc = 0;
97 } else {
98 inst.bits = bits;
99 inst.next_pc = (pc & ~07777) | ((pc + 1) & 07777);
100 } 100 }
101 101
102 switch (bits >> 9) { 102 switch (bits >> 9) {
diff --git a/isa/isa.h b/isa/isa.h
index 4083e16..f94a117 100644
--- a/isa/isa.h
+++ b/isa/isa.h
@@ -57,6 +57,9 @@ static std::string opr_disasm_group2_neg[0366];
57static std::string opr_disasm_extended_arith[0376]; 57static std::string opr_disasm_extended_arith[0376];
58 58
59struct instruction_context { 59struct instruction_context {
60 // Known statically before decode time
61 unsigned int bits;
62
60 // Known statically at decode time 63 // Known statically at decode time
61 bool need_indirect_load = false; // final_address = mem[init_address] 64 bool need_indirect_load = false; // final_address = mem[init_address]
62 bool need_autoinc_store = false; // mem[init_address] += 1 65 bool need_autoinc_store = false; // mem[init_address] += 1
@@ -79,7 +82,6 @@ struct instruction_context {
79 void execute() { ef(*this); } 82 void execute() { ef(*this); }
80 83
81 // May change over the lifetime of the instruction execution 84 // May change over the lifetime of the instruction execution
82 unsigned int bits;
83 unsigned int next_pc; // includes IF 85 unsigned int next_pc; // includes IF
84 std::optional<unsigned int> init_address; // includes DF 86 std::optional<unsigned int> init_address; // includes DF
85 std::optional<unsigned int> final_address; // includes DF 87 std::optional<unsigned int> final_address; // includes DF
@@ -88,6 +90,29 @@ struct instruction_context {
88 std::optional<unsigned int> acc; 90 std::optional<unsigned int> acc;
89 std::optional<bool> link; 91 std::optional<bool> link;
90 std::optional<unsigned int> mq; 92 std::optional<unsigned int> mq;
93
94 // N.B. two "identical" instructions may compare unequal if they are at different points in their execution
95 bool operator==(const instruction_context &that) const {
96 if (bits != that.bits)
97 return false;
98 if (init_address != that.init_address)
99 return false;
100 if (final_address != that.final_address)
101 return false;
102 if (ctlval != that.ctlval)
103 return false;
104 if (data != that.data)
105 return false;
106 if (acc != that.acc)
107 return false;
108 if (link != that.link)
109 return false;
110 if (mq != that.mq)
111 return false;
112 if (next_pc != that.next_pc)
113 return false;
114 return true;
115 }
91}; 116};
92 117
93void init_disasm_tables(); 118void init_disasm_tables();
diff --git a/uarch/core.cpp b/uarch/core.cpp
index 12f8b5b..7304442 100644
--- a/uarch/core.cpp
+++ b/uarch/core.cpp
@@ -74,9 +74,16 @@ void decode_stage::clock() {
74 pc = r.new_pc; 74 pc = r.new_pc;
75 interrupt |= r.interrupt; 75 interrupt |= r.interrupt;
76 icount = c.icount; 76 icount = c.icount;
77 speculative_stores_sent = r.stores_sent;
77 } 78 }
78 79
79 if (c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) { 80 if (c.decode_store_completep.can_read()) {
81 ++stores_done;
82 assert((int)(speculative_stores_sent - stores_done) >= 0);
83 c.decode_store_completep.discard();
84 }
85
86 if (speculative_stores_sent == stores_done && c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) {
80 auto b = c.fetch_bundlep.peek(); 87 auto b = c.fetch_bundlep.peek();
81 88
82 if (b.gen != c.gen) { 89 if (b.gen != c.gen) {
@@ -118,6 +125,9 @@ void decode_stage::clock() {
118 125
119 pc = i.inst.next_pc; 126 pc = i.inst.next_pc;
120 127
128 speculative_stores_sent += i.inst.need_autoinc_store;
129 speculative_stores_sent += i.inst.need_exec_store;
130
121 if (i.inst.need_indirect_load) { 131 if (i.inst.need_indirect_load) {
122 memory::dram::command fr; 132 memory::dram::command fr;
123 fr.transaction = i.tr; 133 fr.transaction = i.tr;
@@ -180,6 +190,7 @@ void indir_stage::clock() {
180 sr.mask.fill(false); 190 sr.mask.fill(false);
181 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; 191 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true;
182 sr.write = true; 192 sr.write = true;
193 sr.responsep = &c.decode_store_completep;
183 c.indir_mem_store_commandp.write(std::move(sr)); 194 c.indir_mem_store_commandp.write(std::move(sr));
184 } else { 195 } else {
185 pte(i.tr, "I"); 196 pte(i.tr, "I");
@@ -207,6 +218,7 @@ void exec_stage::clock() {
207 c.restarto.reset(); 218 c.restarto.reset();
208 219
209 std::optional<infra::transaction> restarttr; 220 std::optional<infra::transaction> restarttr;
221 std::optional<instruction_context> insto;
210 222
211 bool progress = ctlregs[HALTED]; 223 bool progress = ctlregs[HALTED];
212 224
@@ -226,7 +238,9 @@ void exec_stage::clock() {
226 sr.mask.fill(false); 238 sr.mask.fill(false);
227 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; 239 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true;
228 sr.write = true; 240 sr.write = true;
241 sr.responsep = &c.decode_store_completep;
229 c.exec_mem_commandp.write(std::move(sr)); 242 c.exec_mem_commandp.write(std::move(sr));
243 stores_sent += 2; // Original store sent by Indir stage plus unstore here
230 } 244 }
231 c.decode_to_exec_instp.discard(); 245 c.decode_to_exec_instp.discard();
232 } else if (i.icount == c.icount) { 246 } else if (i.icount == c.icount) {
@@ -247,7 +261,9 @@ void exec_stage::clock() {
247 sr.mask.fill(false); 261 sr.mask.fill(false);
248 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; 262 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true;
249 sr.write = true; 263 sr.write = true;
264 sr.responsep = &c.decode_store_completep;
250 c.exec_mem_commandp.write(std::move(sr)); 265 c.exec_mem_commandp.write(std::move(sr));
266 stores_sent += 2; // Original store sent by Indir stage plus unstore here
251 } 267 }
252 c.indir_to_exec_instp.discard(); 268 c.indir_to_exec_instp.discard();
253 } else if (i.icount == c.icount) { 269 } else if (i.icount == c.icount) {
@@ -277,49 +293,58 @@ void exec_stage::clock() {
277 pte(i.tr, "", fmt::format("loaddata={:04o}", *i.inst.data)); 293 pte(i.tr, "", fmt::format("loaddata={:04o}", *i.inst.data));
278 } 294 }
279 295
296 // We are committed to complete execution of this instruction this cycle.
297
298 insto = std::move(i.inst);
299 auto &inst = *insto;
300
280 pte(i.tr, "E"); 301 pte(i.tr, "E");
281 progress = true; 302 progress = true;
282 303
283 assert(i.pc == pc); 304 assert(i.pc == pc);
284 305
285 auto next_pc = i.inst.next_pc; 306 auto next_pc = inst.next_pc;
286 307
287 if (i.inst.need_read_acc) 308 if (inst.need_autoinc_store)
288 i.inst.acc = acc; 309 ++stores_sent; // It was sent by Indir stage
289 if (i.inst.need_read_link) 310 if (inst.need_read_acc)
290 i.inst.link = link; 311 inst.acc = acc;
291 if (i.inst.need_read_mq) 312 if (inst.need_read_link)
292 i.inst.mq = mq; 313 inst.link = link;
293 if (i.inst.read_ctlreg.has_value()) 314 if (inst.need_read_mq)
294 i.inst.ctlval = ctlregs[*i.inst.read_ctlreg]; 315 inst.mq = mq;
316 if (inst.read_ctlreg.has_value())
317 inst.ctlval = ctlregs[*inst.read_ctlreg];
295 318
296 i.inst.execute(); 319 inst.execute();
297 320
298 if (i.inst.need_write_acc) 321 if (inst.need_write_acc)
299 acc = i.inst.acc.value(); 322 acc = inst.acc.value();
300 if (i.inst.need_write_link) 323 if (inst.need_write_link)
301 link = i.inst.link.value(); 324 link = inst.link.value();
302 if (i.inst.need_write_mq) 325 if (inst.need_write_mq)
303 mq = i.inst.mq.value(); 326 mq = inst.mq.value();
304 if (i.inst.write_ctlreg.has_value()) { 327 if (inst.write_ctlreg.has_value()) {
305 ctlregs[*i.inst.write_ctlreg] = i.inst.ctlval.value(); 328 ctlregs[*inst.write_ctlreg] = inst.ctlval.value();
306 restarttr = i.tr; 329 restarttr = i.tr;
307 } 330 }
308 331
309 if (i.inst.need_exec_store) { 332 if (inst.need_exec_store) {
310 pte(i.tr, "", fmt::format("store={:05o} storedata={:04o}", *i.inst.final_address, *i.inst.data)); 333 pte(i.tr, "", fmt::format("store={:05o} storedata={:04o}", *inst.final_address, *inst.data));
311 memory::dram::command sr; 334 memory::dram::command sr;
312 sr.transaction = i.tr; 335 sr.transaction = i.tr;
313 sr.line_address = *i.inst.final_address >> memory::LINE_BYTES_LOG2; 336 sr.line_address = *inst.final_address >> memory::LINE_BYTES_LOG2;
314 sr.data[*i.inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = *i.inst.data; 337 sr.data[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = *inst.data;
315 sr.mask.fill(false); 338 sr.mask.fill(false);
316 sr.mask[*i.inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true; 339 sr.mask[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true;
317 sr.write = true; 340 sr.write = true;
341 sr.responsep = &c.decode_store_completep;
318 c.exec_mem_commandp.write(std::move(sr)); 342 c.exec_mem_commandp.write(std::move(sr));
343 ++stores_sent;
319 } 344 }
320 345
321 assert(i.inst.next_pc == next_pc || i.inst.possibly_redirects); 346 assert(inst.next_pc == next_pc || inst.possibly_redirects);
322 pc = i.inst.next_pc; 347 pc = inst.next_pc;
323 348
324 if (pc != next_pc) { 349 if (pc != next_pc) {
325 pte(i.tr, "", fmt::format("jump={:05o}", pc)); 350 pte(i.tr, "", fmt::format("jump={:05o}", pc));
@@ -345,6 +370,7 @@ bail_out:
345 r.tr = *restarttr; 370 r.tr = *restarttr;
346 r.new_pc = pc; 371 r.new_pc = pc;
347 r.interrupt = interrupt; 372 r.interrupt = interrupt;
373 r.stores_sent = stores_sent;
348 gen = ++c.gen; 374 gen = ++c.gen;
349 c.restarto = std::move(r); 375 c.restarto = std::move(r);
350 } 376 }
@@ -356,6 +382,12 @@ bail_out:
356 assert(c.checker.icount == c.icount); 382 assert(c.checker.icount == c.icount);
357// std::cerr << fmt::format("icount={:} pc={:05o} checkerpc={:05o}\n", c.icount, pc, c.checker.pc); 383// std::cerr << fmt::format("icount={:} pc={:05o} checkerpc={:05o}\n", c.icount, pc, c.checker.pc);
358 assert(pc == c.checker.pc); 384 assert(pc == c.checker.pc);
385
386 if (insto.has_value()) {
387 auto &inst = *insto;
388 assert(inst == c.checker.inst);
389 }
390
359 assert(acc == c.checker.acc); 391 assert(acc == c.checker.acc);
360 assert(link == c.checker.link); 392 assert(link == c.checker.link);
361 assert(mq == c.checker.mq); 393 assert(mq == c.checker.mq);
diff --git a/uarch/core.h b/uarch/core.h
index b53a205..21725b3 100644
--- a/uarch/core.h
+++ b/uarch/core.h
@@ -17,6 +17,7 @@ struct restart {
17 infra::transaction tr; 17 infra::transaction tr;
18 unsigned int new_pc; 18 unsigned int new_pc;
19 bool interrupt; 19 bool interrupt;
20 unsigned int stores_sent = 0;
20}; 21};
21 22
22struct fetch_bundle { 23struct fetch_bundle {
@@ -57,6 +58,9 @@ struct decode_stage : public infra::sim {
57 unsigned int pc; 58 unsigned int pc;
58 std::uint64_t icount; 59 std::uint64_t icount;
59 60
61 unsigned int speculative_stores_sent = 0;
62 unsigned int stores_done = 0;
63
60 decode_stage(core &c); 64 decode_stage(core &c);
61 65
62 void clock(); 66 void clock();
@@ -77,6 +81,8 @@ struct exec_stage : public infra::sim {
77 81
78 unsigned int gen = 0; 82 unsigned int gen = 0;
79 83
84 unsigned int stores_sent = 0;
85
80 unsigned int acc; 86 unsigned int acc;
81 unsigned int link; 87 unsigned int link;
82 unsigned int mq; 88 unsigned int mq;
@@ -108,6 +114,7 @@ struct core {
108 114
109 infra::port<memory::dram::command> decode_mem_commandp; 115 infra::port<memory::dram::command> decode_mem_commandp;
110 infra::port<memory::dram::response> decode_mem_responsep; 116 infra::port<memory::dram::response> decode_mem_responsep;
117 infra::port<memory::dram::response> decode_store_completep;
111 infra::port<inst_bundle> decode_to_exec_instp; 118 infra::port<inst_bundle> decode_to_exec_instp;
112 119
113 infra::port<inst_bundle> indir_instp; 120 infra::port<inst_bundle> indir_instp;