summaryrefslogtreecommitdiff
path: root/uarch
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--uarch/core.cpp24
-rw-r--r--uarch/core.h7
2 files changed, 16 insertions, 15 deletions
diff --git a/uarch/core.cpp b/uarch/core.cpp
index 7304442..76af173 100644
--- a/uarch/core.cpp
+++ b/uarch/core.cpp
@@ -77,13 +77,7 @@ void decode_stage::clock() {
77 speculative_stores_sent = r.stores_sent; 77 speculative_stores_sent = r.stores_sent;
78 } 78 }
79 79
80 if (c.decode_store_completep.can_read()) { 80 if (c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) {
81 ++stores_done;
82 assert((int)(speculative_stores_sent - stores_done) >= 0);
83 c.decode_store_completep.discard();
84 }
85
86 if (speculative_stores_sent == stores_done && c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) {
87 auto b = c.fetch_bundlep.peek(); 81 auto b = c.fetch_bundlep.peek();
88 82
89 if (b.gen != c.gen) { 83 if (b.gen != c.gen) {
@@ -106,6 +100,11 @@ void decode_stage::clock() {
106 return; 100 return;
107 } 101 }
108 102
103 if (speculative_stores_sent != c.stores_done) {
104 pte(b.tr, "z");
105 return;
106 }
107
109 inst_bundle i; 108 inst_bundle i;
110 109
111 i.tr = infra::pt::child(b.tr); 110 i.tr = infra::pt::child(b.tr);
@@ -190,7 +189,6 @@ void indir_stage::clock() {
190 sr.mask.fill(false); 189 sr.mask.fill(false);
191 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; 190 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true;
192 sr.write = true; 191 sr.write = true;
193 sr.responsep = &c.decode_store_completep;
194 c.indir_mem_store_commandp.write(std::move(sr)); 192 c.indir_mem_store_commandp.write(std::move(sr));
195 } else { 193 } else {
196 pte(i.tr, "I"); 194 pte(i.tr, "I");
@@ -238,9 +236,9 @@ void exec_stage::clock() {
238 sr.mask.fill(false); 236 sr.mask.fill(false);
239 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; 237 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true;
240 sr.write = true; 238 sr.write = true;
241 sr.responsep = &c.decode_store_completep;
242 c.exec_mem_commandp.write(std::move(sr)); 239 c.exec_mem_commandp.write(std::move(sr));
243 stores_sent += 2; // Original store sent by Indir stage plus unstore here 240 stores_sent += 2; // Original store sent by Indir stage plus unstore here
241 c.stores_done += 2;
244 } 242 }
245 c.decode_to_exec_instp.discard(); 243 c.decode_to_exec_instp.discard();
246 } else if (i.icount == c.icount) { 244 } else if (i.icount == c.icount) {
@@ -261,9 +259,9 @@ void exec_stage::clock() {
261 sr.mask.fill(false); 259 sr.mask.fill(false);
262 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; 260 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true;
263 sr.write = true; 261 sr.write = true;
264 sr.responsep = &c.decode_store_completep;
265 c.exec_mem_commandp.write(std::move(sr)); 262 c.exec_mem_commandp.write(std::move(sr));
266 stores_sent += 2; // Original store sent by Indir stage plus unstore here 263 stores_sent += 2; // Original store sent by Indir stage plus unstore here
264 c.stores_done += 2;
267 } 265 }
268 c.indir_to_exec_instp.discard(); 266 c.indir_to_exec_instp.discard();
269 } else if (i.icount == c.icount) { 267 } else if (i.icount == c.icount) {
@@ -305,8 +303,10 @@ void exec_stage::clock() {
305 303
306 auto next_pc = inst.next_pc; 304 auto next_pc = inst.next_pc;
307 305
308 if (inst.need_autoinc_store) 306 if (inst.need_autoinc_store) {
309 ++stores_sent; // It was sent by Indir stage 307 ++stores_sent; // It was sent by Indir stage
308 ++c.stores_done;
309 }
310 if (inst.need_read_acc) 310 if (inst.need_read_acc)
311 inst.acc = acc; 311 inst.acc = acc;
312 if (inst.need_read_link) 312 if (inst.need_read_link)
@@ -338,9 +338,9 @@ void exec_stage::clock() {
338 sr.mask.fill(false); 338 sr.mask.fill(false);
339 sr.mask[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true; 339 sr.mask[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true;
340 sr.write = true; 340 sr.write = true;
341 sr.responsep = &c.decode_store_completep;
342 c.exec_mem_commandp.write(std::move(sr)); 341 c.exec_mem_commandp.write(std::move(sr));
343 ++stores_sent; 342 ++stores_sent;
343 ++c.stores_done;
344 } 344 }
345 345
346 assert(inst.next_pc == next_pc || inst.possibly_redirects); 346 assert(inst.next_pc == next_pc || inst.possibly_redirects);
diff --git a/uarch/core.h b/uarch/core.h
index 21725b3..b8473e6 100644
--- a/uarch/core.h
+++ b/uarch/core.h
@@ -59,7 +59,6 @@ struct decode_stage : public infra::sim {
59 std::uint64_t icount; 59 std::uint64_t icount;
60 60
61 unsigned int speculative_stores_sent = 0; 61 unsigned int speculative_stores_sent = 0;
62 unsigned int stores_done = 0;
63 62
64 decode_stage(core &c); 63 decode_stage(core &c);
65 64
@@ -114,7 +113,6 @@ struct core {
114 113
115 infra::port<memory::dram::command> decode_mem_commandp; 114 infra::port<memory::dram::command> decode_mem_commandp;
116 infra::port<memory::dram::response> decode_mem_responsep; 115 infra::port<memory::dram::response> decode_mem_responsep;
117 infra::port<memory::dram::response> decode_store_completep;
118 infra::port<inst_bundle> decode_to_exec_instp; 116 infra::port<inst_bundle> decode_to_exec_instp;
119 117
120 infra::port<inst_bundle> indir_instp; 118 infra::port<inst_bundle> indir_instp;
@@ -126,7 +124,10 @@ struct core {
126 infra::port<memory::dram::command> exec_mem_commandp; 124 infra::port<memory::dram::command> exec_mem_commandp;
127 infra::port<memory::dram::response> exec_mem_responsep; 125 infra::port<memory::dram::response> exec_mem_responsep;
128 126
129 // Construction order is execution order within a cycle, so this list should be back-to-front (for zero-cycle restarts) 127 // Global counters (should be Gray code in FPGA implementation, only do == comparisons)
128 unsigned int stores_done = 0;
129
130 // Construction order is execution order within a cycle, so this list should be back-to-front (for zero-cycle restarts and store count propagation)
130 exec_stage exec{*this}; 131 exec_stage exec{*this};
131 indir_stage indir{*this}; 132 indir_stage indir{*this};
132 decode_stage decode{*this}; 133 decode_stage decode{*this};