summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Plan18
m---------nanosim0
-rw-r--r--uarch/core.cpp99
-rw-r--r--uarch/core.h19
4 files changed, 93 insertions, 43 deletions
diff --git a/Plan b/Plan
index 13f50b4..a10fa68 100644
--- a/Plan
+++ b/Plan
@@ -1,14 +1,16 @@
1vim: set sw=8 noet : 1vim: set sw=8 noet :
2 2
3 * D-side cache 3 * Forwarding stores to eliminate "unstores"
4 * Store forwarding 4 * Store forwarding structure
5 * Store buffer allocation and senior store commit/deallocation
6 * SMC snoops?
7 * Generic "cache level" infrastructure
8 * Convert I-side cache
9 * Convert D-side cache
10 * Shared between exec and indir stages
11 * Arbitration to reduce total port count
12 * Real coherence protocol
5 * Cache consistency between I and D side 13 * Cache consistency between I and D side
6 * Senior store commit
7 * Shared between exec and indir stages
8 * Arbitration to reduce total port count
9 * Alternative: make separate indir and exec caches, depend on
10 cache consistency protocol
11 * Eliminate "unstores"
12 * Checker should let us check more data 14 * Checker should let us check more data
13 * Checker should print mismatches on its own instead 15 * Checker should print mismatches on its own instead
14 of depending on the caller 16 of depending on the caller
diff --git a/nanosim b/nanosim
Subproject df7554610fdaed78a3ae1116384a162e5e34ddd Subproject d0208c8159e7e454e9d3eec8f65a7050afa0e8a
diff --git a/uarch/core.cpp b/uarch/core.cpp
index ce58df2..e2f35e7 100644
--- a/uarch/core.cpp
+++ b/uarch/core.cpp
@@ -38,12 +38,12 @@ void fetch_stage::clock() {
38 38
39 if (c.fetch_mem_responsep.can_read()) { 39 if (c.fetch_mem_responsep.can_read()) {
40 auto r = c.fetch_mem_responsep.read(); 40 auto r = c.fetch_mem_responsep.read();
41 cache.handle_response(r); 41 c.icache.handle_response(r);
42 } 42 }
43 43
44 if (c.fetch_bundlep.can_write()) { 44 if (c.fetch_bundlep.can_write()) {
45 fetch_bundle b; 45 fetch_bundle b;
46 if (auto t = cache.fetchline(b.data, pc); t.has_value()) { 46 if (auto t = c.icache.fetchline(b.data, pc); t.has_value()) {
47 b.tr = infra::pt::toplevel(); 47 b.tr = infra::pt::toplevel();
48 b.gen = c.gen; 48 b.gen = c.gen;
49 b.pc = pc; 49 b.pc = pc;
@@ -57,7 +57,7 @@ void fetch_stage::clock() {
57 } 57 }
58 } 58 }
59 59
60 if (!outstandingfill && c.fetch_mem_commandp.can_write() && !cache.probe(pc)) { 60 if (!outstandingfill && c.fetch_mem_commandp.can_write() && !c.icache.probe(pc)) {
61 memory::dram::command fr; 61 memory::dram::command fr;
62 fr.transaction = infra::pt::toplevel(); 62 fr.transaction = infra::pt::toplevel();
63 pte(fr.transaction, "p", fmt::format("fpc={:05o}", pc)); 63 pte(fr.transaction, "p", fmt::format("fpc={:05o}", pc));
@@ -77,7 +77,7 @@ void decode_stage::clock() {
77 speculative_stores_sent = r.stores_sent; 77 speculative_stores_sent = r.stores_sent;
78 } 78 }
79 79
80 if (c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) { 80 if (c.fetch_bundlep.can_read() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) {
81 auto b = c.fetch_bundlep.peek(); 81 auto b = c.fetch_bundlep.peek();
82 82
83 if (b.gen != c.gen) { 83 if (b.gen != c.gen) {
@@ -129,14 +129,17 @@ void decode_stage::clock() {
129 speculative_stores_sent += i.inst.need_exec_store; 129 speculative_stores_sent += i.inst.need_exec_store;
130 130
131 if (i.inst.need_indirect_load) { 131 if (i.inst.need_indirect_load) {
132#if 0
132 memory::dram::command fr; 133 memory::dram::command fr;
133 fr.transaction = i.tr; 134 fr.transaction = i.tr;
134 fr.line_address = *i.inst.init_address >> memory::LINE_BYTES_LOG2; 135 fr.line_address = *i.inst.init_address >> memory::LINE_BYTES_LOG2;
135 fr.responsep = &c.indir_mem_responsep; 136 fr.responsep = &c.indir_mem_responsep;
136 pte(i.tr, "", fmt::format("iload={:05o}", *i.inst.init_address)); 137 pte(i.tr, "", fmt::format("iload={:05o}", *i.inst.init_address));
137 c.decode_mem_commandp.write(std::move(fr)); 138 c.decode_mem_commandp.write(std::move(fr));
139#endif
138 c.indir_instp.write(std::move(i)); 140 c.indir_instp.write(std::move(i));
139 } else { 141 } else {
142#if 0
140 if (i.inst.need_exec_load) { 143 if (i.inst.need_exec_load) {
141 memory::dram::command fr; 144 memory::dram::command fr;
142 fr.transaction = i.tr; 145 fr.transaction = i.tr;
@@ -145,6 +148,7 @@ void decode_stage::clock() {
145 pte(i.tr, "", fmt::format("load={:05o}", *i.inst.final_address)); 148 pte(i.tr, "", fmt::format("load={:05o}", *i.inst.final_address));
146 c.decode_mem_commandp.write(std::move(fr)); 149 c.decode_mem_commandp.write(std::move(fr));
147 } 150 }
151#endif
148 c.decode_to_exec_instp.write(std::move(i)); 152 c.decode_to_exec_instp.write(std::move(i));
149 } 153 }
150 154
@@ -160,7 +164,12 @@ void indir_stage::clock() {
160 gen = c.gen; 164 gen = c.gen;
161 } 165 }
162 166
163 if (c.indir_instp.can_read() && c.indir_mem_load_commandp.can_write() && c.indir_mem_store_commandp.can_write() && c.indir_to_exec_instp.can_write()) { 167 if (c.indir_mem_responsep.can_read()) {
168 auto r = c.indir_mem_responsep.read();
169 c.dcache.handle_response(r);
170 }
171
172 if (c.indir_instp.can_read() && c.indir_to_exec_instp.can_write()) {
164 auto &i = c.indir_instp.peek(); 173 auto &i = c.indir_instp.peek();
165 174
166 if (i.gen != gen && i.gen != c.gen) { 175 if (i.gen != gen && i.gen != c.gen) {
@@ -172,15 +181,23 @@ void indir_stage::clock() {
172 assert(i.gen == gen); 181 assert(i.gen == gen);
173 } 182 }
174 183
175 if (i.inst.need_indirect_load && !c.indir_mem_responsep.can_read())
176 return;
177
178 if (i.inst.need_indirect_load) { 184 if (i.inst.need_indirect_load) {
179 auto l = c.indir_mem_responsep.read(); 185 unsigned int addr;
180 if (l.line_address != i.inst.init_address.value() >> memory::LINE_BYTES_LOG2) 186 auto t = c.dcache.fetch(addr, i.inst.init_address.value());
187 if (!t.has_value()) {
188 if (c.indir_mem_load_commandp.can_write()) {
189 memory::dram::command fr;
190 fr.transaction = i.tr;
191 pte(fr.transaction, "1", fmt::format("indir load fill {:05o}", i.inst.init_address.value()));
192 fr.line_address = i.inst.init_address.value() >> memory::LINE_BYTES_LOG2;
193 fr.responsep = &c.indir_mem_responsep;
194 c.indir_mem_load_commandp.write(std::move(fr));
195 }
181 return; 196 return;
182 auto addr = l.data[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK]; 197 }
183 if (i.inst.need_autoinc_store) { 198 if (i.inst.need_autoinc_store) {
199 if (!c.indir_mem_store_commandp.can_write())
200 return;
184 addr = (addr + 1) & 07777; 201 addr = (addr + 1) & 07777;
185 pte(i.tr, "+", fmt::format("istore={:05o} istoredata={:04o}", *i.inst.init_address, addr)); 202 pte(i.tr, "+", fmt::format("istore={:05o} istoredata={:04o}", *i.inst.init_address, addr));
186 memory::dram::command sr; 203 memory::dram::command sr;
@@ -191,6 +208,8 @@ void indir_stage::clock() {
191 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; 208 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true;
192 sr.write = true; 209 sr.write = true;
193 c.indir_mem_store_commandp.write(std::move(sr)); 210 c.indir_mem_store_commandp.write(std::move(sr));
211 c.dcache.opportunistic_store(*i.inst.init_address, addr);
212 c.icache.opportunistic_store(*i.inst.init_address, addr);
194 } else { 213 } else {
195 pte(i.tr, "I"); 214 pte(i.tr, "I");
196 } 215 }
@@ -198,6 +217,7 @@ void indir_stage::clock() {
198 i.inst.final_address = (unsigned int)((df << 12) | addr); 217 i.inst.final_address = (unsigned int)((df << 12) | addr);
199 } 218 }
200 219
220#if 0
201 if (i.inst.need_exec_load) { 221 if (i.inst.need_exec_load) {
202 memory::dram::command fr; 222 memory::dram::command fr;
203 fr.transaction = i.tr; 223 fr.transaction = i.tr;
@@ -206,6 +226,7 @@ void indir_stage::clock() {
206 pte(i.tr, "", fmt::format("load={:05o}", *i.inst.final_address)); 226 pte(i.tr, "", fmt::format("load={:05o}", *i.inst.final_address));
207 c.indir_mem_load_commandp.write(std::move(fr)); 227 c.indir_mem_load_commandp.write(std::move(fr));
208 } 228 }
229#endif
209 230
210 c.indir_to_exec_instp.write(std::move(i)); 231 c.indir_to_exec_instp.write(std::move(i));
211 232
@@ -216,18 +237,24 @@ void indir_stage::clock() {
216void exec_stage::clock() { 237void exec_stage::clock() {
217 c.restarto.reset(); 238 c.restarto.reset();
218 239
240 if (c.exec_mem_responsep.can_read()) {
241 auto r = c.exec_mem_responsep.read();
242 c.dcache.handle_response(r);
243 }
244
219 std::optional<infra::transaction> restarttr; 245 std::optional<infra::transaction> restarttr;
220 std::optional<instruction_context> insto; 246 std::optional<instruction_context> insto;
221 247
222 bool progress = ctlregs[HALTED]; 248 bool progress = ctlregs[HALTED];
223 249
224 if (!ctlregs[HALTED] && (c.decode_to_exec_instp.can_read() || c.indir_to_exec_instp.can_read()) && c.exec_mem_commandp.can_write()) { 250 if (!ctlregs[HALTED] && (c.decode_to_exec_instp.can_read() || c.indir_to_exec_instp.can_read())) {
225 infra::port<inst_bundle> *instp = nullptr; 251 infra::port<inst_bundle> *instp = nullptr;
226 if (c.decode_to_exec_instp.can_read()) { 252 if (c.decode_to_exec_instp.can_read()) {
227 auto &i = c.decode_to_exec_instp.peek(); 253 auto &i = c.decode_to_exec_instp.peek();
228 if (i.gen != gen && i.gen != c.gen) { 254 if (i.gen != gen && i.gen != c.gen) {
229 pte(i.tr, "~");
230 if (i.inst.need_autoinc_store) { 255 if (i.inst.need_autoinc_store) {
256 if (!c.exec_mem_store_commandp.can_write())
257 return;
231 auto addr = (*i.inst.final_address - 1) & 07777; 258 auto addr = (*i.inst.final_address - 1) & 07777;
232 pte(i.tr, "U", fmt::format("unstore={:05o}, unstoredata={:04o}", *i.inst.init_address, addr)); 259 pte(i.tr, "U", fmt::format("unstore={:05o}, unstoredata={:04o}", *i.inst.init_address, addr));
233 memory::dram::command sr; 260 memory::dram::command sr;
@@ -237,10 +264,13 @@ void exec_stage::clock() {
237 sr.mask.fill(false); 264 sr.mask.fill(false);
238 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; 265 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true;
239 sr.write = true; 266 sr.write = true;
240 c.exec_mem_commandp.write(std::move(sr)); 267 c.exec_mem_store_commandp.write(std::move(sr));
241 stores_sent += 2; // Original store sent by Indir stage plus unstore here 268 stores_sent += 2; // Original store sent by Indir stage plus unstore here
242 c.stores_done += 2; 269 c.stores_done += 2;
270 c.dcache.opportunistic_store(*i.inst.init_address, addr);
271 c.icache.opportunistic_store(*i.inst.init_address, addr);
243 } 272 }
273 pte(i.tr, "~");
244 c.decode_to_exec_instp.discard(); 274 c.decode_to_exec_instp.discard();
245 } else if (i.icount == c.icount) { 275 } else if (i.icount == c.icount) {
246 instp = &c.decode_to_exec_instp; 276 instp = &c.decode_to_exec_instp;
@@ -249,8 +279,9 @@ void exec_stage::clock() {
249 if (c.indir_to_exec_instp.can_read()) { 279 if (c.indir_to_exec_instp.can_read()) {
250 auto &i = c.indir_to_exec_instp.peek(); 280 auto &i = c.indir_to_exec_instp.peek();
251 if (i.gen != gen && i.gen != c.gen) { 281 if (i.gen != gen && i.gen != c.gen) {
252 pte(i.tr, "~");
253 if (i.inst.need_autoinc_store) { 282 if (i.inst.need_autoinc_store) {
283 if (!c.exec_mem_store_commandp.can_write())
284 return;
254 auto addr = (*i.inst.final_address - 1) & 07777; 285 auto addr = (*i.inst.final_address - 1) & 07777;
255 pte(i.tr, "U", fmt::format("unstore={:05o}, unstoredata={:04o}", *i.inst.init_address, addr)); 286 pte(i.tr, "U", fmt::format("unstore={:05o}, unstoredata={:04o}", *i.inst.init_address, addr));
256 memory::dram::command sr; 287 memory::dram::command sr;
@@ -260,10 +291,13 @@ void exec_stage::clock() {
260 sr.mask.fill(false); 291 sr.mask.fill(false);
261 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; 292 sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true;
262 sr.write = true; 293 sr.write = true;
263 c.exec_mem_commandp.write(std::move(sr)); 294 c.exec_mem_store_commandp.write(std::move(sr));
264 stores_sent += 2; // Original store sent by Indir stage plus unstore here 295 stores_sent += 2; // Original store sent by Indir stage plus unstore here
265 c.stores_done += 2; 296 c.stores_done += 2;
297 c.dcache.opportunistic_store(*i.inst.init_address, addr);
298 c.icache.opportunistic_store(*i.inst.init_address, addr);
266 } 299 }
300 pte(i.tr, "~");
267 c.indir_to_exec_instp.discard(); 301 c.indir_to_exec_instp.discard();
268 } else if (i.icount == c.icount) { 302 } else if (i.icount == c.icount) {
269 instp = &c.indir_to_exec_instp; 303 instp = &c.indir_to_exec_instp;
@@ -271,8 +305,6 @@ void exec_stage::clock() {
271 } 305 }
272 if (!instp) 306 if (!instp)
273 goto bail_out; 307 goto bail_out;
274 if (!c.exec_mem_commandp.can_write())
275 goto bail_out;
276 auto &i = instp->peek(); 308 auto &i = instp->peek();
277 309
278 assert(i.gen == gen || i.gen == c.gen); 310 assert(i.gen == gen || i.gen == c.gen);
@@ -281,17 +313,27 @@ void exec_stage::clock() {
281 assert(i.gen == gen); 313 assert(i.gen == gen);
282 } 314 }
283 315
284 if (i.inst.need_exec_load && !c.exec_mem_responsep.can_read())
285 return;
286
287 if (i.inst.need_exec_load) { 316 if (i.inst.need_exec_load) {
288 auto l = c.exec_mem_responsep.read(); 317 unsigned int data;
289 if (l.line_address != i.inst.final_address.value() >> memory::LINE_BYTES_LOG2) 318 auto t = c.dcache.fetch(data, i.inst.final_address.value());
319 if (t.has_value()) {
320 i.inst.data = data;
321 } else {
322 if (c.exec_mem_load_commandp.can_write()) {
323 memory::dram::command fr;
324 fr.transaction = i.tr;
325 pte(fr.transaction, "2", fmt::format("exec load fill {:05o}", i.inst.final_address.value()));
326 fr.line_address = i.inst.final_address.value() >> memory::LINE_BYTES_LOG2;
327 fr.responsep = &c.exec_mem_responsep;
328 c.exec_mem_load_commandp.write(std::move(fr));
329 }
290 return; 330 return;
291 i.inst.data = l.data[*i.inst.final_address & memory::LINE_BYTE_OFFSET_MASK]; 331 }
292 pte(i.tr, "", fmt::format("loaddata={:04o}", *i.inst.data));
293 } 332 }
294 333
334 if (i.inst.need_exec_store && !c.exec_mem_store_commandp.can_write())
335 return;
336
295 // We are committed to complete execution of this instruction this cycle. 337 // We are committed to complete execution of this instruction this cycle.
296 338
297 insto = std::move(i.inst); 339 insto = std::move(i.inst);
@@ -304,6 +346,9 @@ void exec_stage::clock() {
304 346
305 auto next_pc = inst.next_pc; 347 auto next_pc = inst.next_pc;
306 348
349 if (inst.need_exec_load)
350 pte(i.tr, "", fmt::format("loaddata={:04o}", *i.inst.data));
351
307 if (inst.need_autoinc_store) { 352 if (inst.need_autoinc_store) {
308 ++stores_sent; // It was sent by Indir stage 353 ++stores_sent; // It was sent by Indir stage
309 ++c.stores_done; 354 ++c.stores_done;
@@ -339,9 +384,11 @@ void exec_stage::clock() {
339 sr.mask.fill(false); 384 sr.mask.fill(false);
340 sr.mask[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true; 385 sr.mask[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true;
341 sr.write = true; 386 sr.write = true;
342 c.exec_mem_commandp.write(std::move(sr)); 387 c.exec_mem_store_commandp.write(std::move(sr));
343 ++stores_sent; 388 ++stores_sent;
344 ++c.stores_done; 389 ++c.stores_done;
390 c.dcache.opportunistic_store(*inst.final_address, *inst.data);
391 c.icache.opportunistic_store(*inst.final_address, *inst.data);
345 } 392 }
346 393
347 assert(inst.next_pc == next_pc || inst.possibly_redirects); 394 assert(inst.next_pc == next_pc || inst.possibly_redirects);
diff --git a/uarch/core.h b/uarch/core.h
index ebfd388..a4ad0fe 100644
--- a/uarch/core.h
+++ b/uarch/core.h
@@ -38,8 +38,6 @@ struct inst_bundle {
38struct fetch_stage : public infra::sim { 38struct fetch_stage : public infra::sim {
39 core &c; 39 core &c;
40 40
41 memory::inline_cache<8, 2> cache;
42
43 unsigned int pc; 41 unsigned int pc;
44 42
45 bool didrestart = false; 43 bool didrestart = false;
@@ -99,6 +97,10 @@ struct core {
99 iomodel &system; 97 iomodel &system;
100 funcchecker checker; 98 funcchecker checker;
101 99
100 // <SETS_LOG2, WAYS>
101 memory::inline_cache<8, 2> icache;
102 memory::inline_cache<8, 2> dcache;
103
102 std::optional<restart> restarto; 104 std::optional<restart> restarto;
103 unsigned int gen = 0; 105 unsigned int gen = 0;
104 106
@@ -113,8 +115,6 @@ struct core {
113 infra::port<memory::dram::response> fetch_mem_responsep; 115 infra::port<memory::dram::response> fetch_mem_responsep;
114 infra::port<fetch_bundle> fetch_bundlep; 116 infra::port<fetch_bundle> fetch_bundlep;
115 117
116 infra::port<memory::dram::command> decode_mem_commandp;
117 infra::port<memory::dram::response> decode_mem_responsep;
118 infra::port<inst_bundle> decode_to_exec_instp; 118 infra::port<inst_bundle> decode_to_exec_instp;
119 119
120 infra::port<inst_bundle> indir_instp; 120 infra::port<inst_bundle> indir_instp;
@@ -123,7 +123,8 @@ struct core {
123 infra::port<memory::dram::response> indir_mem_responsep; 123 infra::port<memory::dram::response> indir_mem_responsep;
124 infra::port<inst_bundle> indir_to_exec_instp; 124 infra::port<inst_bundle> indir_to_exec_instp;
125 125
126 infra::port<memory::dram::command> exec_mem_commandp; 126 infra::port<memory::dram::command> exec_mem_load_commandp;
127 infra::port<memory::dram::command> exec_mem_store_commandp;
127 infra::port<memory::dram::response> exec_mem_responsep; 128 infra::port<memory::dram::response> exec_mem_responsep;
128 129
129 // Global counters (should be Gray code in FPGA implementation, only do == comparisons) 130 // Global counters (should be Gray code in FPGA implementation, only do == comparisons)
@@ -142,10 +143,10 @@ struct core {
142 { 143 {
143 mem.commandp = &mem_commandp; 144 mem.commandp = &mem_commandp;
144 mem_command_arb.outp = &mem_commandp; 145 mem_command_arb.outp = &mem_commandp;
145 mem_command_arb.peerp[0] = &exec_mem_commandp; 146 mem_command_arb.peerp[0] = &exec_mem_store_commandp;
146 mem_command_arb.peerp[1] = &indir_mem_store_commandp; 147 mem_command_arb.peerp[1] = &exec_mem_load_commandp;
147 mem_command_arb.peerp[2] = &indir_mem_load_commandp; 148 mem_command_arb.peerp[2] = &indir_mem_store_commandp;
148 mem_command_arb.peerp[3] = &decode_mem_commandp; 149 mem_command_arb.peerp[3] = &indir_mem_load_commandp;
149 mem_command_arb.peerp[4] = &fetch_mem_commandp; 150 mem_command_arb.peerp[4] = &fetch_mem_commandp;
150 } 151 }
151}; 152};