diff options
| author | Julian Blake Kongslie | 2023-01-22 14:27:27 -0800 |
|---|---|---|
| committer | Julian Blake Kongslie | 2023-01-22 14:27:27 -0800 |
| commit | 58da72b83d4f6ef1a729ef5fafc9cb64331af601 (patch) | |
| tree | 6f321bc34e4bfea3d2dc7ebd343acdeb4f9e5e25 /uarch/core.cpp | |
| parent | Add deadman timer (only ten cycles!) based on time between interruptable points. (diff) | |
| download | biggolf-58da72b83d4f6ef1a729ef5fafc9cb64331af601.tar.xz | |
Fix focal 69.
(by accident, by adding a d-side cache with write-through stores, and
propagating writes to the i-side cache)
Diffstat (limited to '')
| -rw-r--r-- | uarch/core.cpp | 99 |
1 files changed, 73 insertions, 26 deletions
diff --git a/uarch/core.cpp b/uarch/core.cpp index ce58df2..e2f35e7 100644 --- a/uarch/core.cpp +++ b/uarch/core.cpp | |||
| @@ -38,12 +38,12 @@ void fetch_stage::clock() { | |||
| 38 | 38 | ||
| 39 | if (c.fetch_mem_responsep.can_read()) { | 39 | if (c.fetch_mem_responsep.can_read()) { |
| 40 | auto r = c.fetch_mem_responsep.read(); | 40 | auto r = c.fetch_mem_responsep.read(); |
| 41 | cache.handle_response(r); | 41 | c.icache.handle_response(r); |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | if (c.fetch_bundlep.can_write()) { | 44 | if (c.fetch_bundlep.can_write()) { |
| 45 | fetch_bundle b; | 45 | fetch_bundle b; |
| 46 | if (auto t = cache.fetchline(b.data, pc); t.has_value()) { | 46 | if (auto t = c.icache.fetchline(b.data, pc); t.has_value()) { |
| 47 | b.tr = infra::pt::toplevel(); | 47 | b.tr = infra::pt::toplevel(); |
| 48 | b.gen = c.gen; | 48 | b.gen = c.gen; |
| 49 | b.pc = pc; | 49 | b.pc = pc; |
| @@ -57,7 +57,7 @@ void fetch_stage::clock() { | |||
| 57 | } | 57 | } |
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | if (!outstandingfill && c.fetch_mem_commandp.can_write() && !cache.probe(pc)) { | 60 | if (!outstandingfill && c.fetch_mem_commandp.can_write() && !c.icache.probe(pc)) { |
| 61 | memory::dram::command fr; | 61 | memory::dram::command fr; |
| 62 | fr.transaction = infra::pt::toplevel(); | 62 | fr.transaction = infra::pt::toplevel(); |
| 63 | pte(fr.transaction, "p", fmt::format("fpc={:05o}", pc)); | 63 | pte(fr.transaction, "p", fmt::format("fpc={:05o}", pc)); |
| @@ -77,7 +77,7 @@ void decode_stage::clock() { | |||
| 77 | speculative_stores_sent = r.stores_sent; | 77 | speculative_stores_sent = r.stores_sent; |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | if (c.fetch_bundlep.can_read() && c.decode_mem_commandp.can_write() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) { | 80 | if (c.fetch_bundlep.can_read() && c.indir_instp.can_write() && c.decode_to_exec_instp.can_write()) { |
| 81 | auto b = c.fetch_bundlep.peek(); | 81 | auto b = c.fetch_bundlep.peek(); |
| 82 | 82 | ||
| 83 | if (b.gen != c.gen) { | 83 | if (b.gen != c.gen) { |
| @@ -129,14 +129,17 @@ void decode_stage::clock() { | |||
| 129 | speculative_stores_sent += i.inst.need_exec_store; | 129 | speculative_stores_sent += i.inst.need_exec_store; |
| 130 | 130 | ||
| 131 | if (i.inst.need_indirect_load) { | 131 | if (i.inst.need_indirect_load) { |
| 132 | #if 0 | ||
| 132 | memory::dram::command fr; | 133 | memory::dram::command fr; |
| 133 | fr.transaction = i.tr; | 134 | fr.transaction = i.tr; |
| 134 | fr.line_address = *i.inst.init_address >> memory::LINE_BYTES_LOG2; | 135 | fr.line_address = *i.inst.init_address >> memory::LINE_BYTES_LOG2; |
| 135 | fr.responsep = &c.indir_mem_responsep; | 136 | fr.responsep = &c.indir_mem_responsep; |
| 136 | pte(i.tr, "", fmt::format("iload={:05o}", *i.inst.init_address)); | 137 | pte(i.tr, "", fmt::format("iload={:05o}", *i.inst.init_address)); |
| 137 | c.decode_mem_commandp.write(std::move(fr)); | 138 | c.decode_mem_commandp.write(std::move(fr)); |
| 139 | #endif | ||
| 138 | c.indir_instp.write(std::move(i)); | 140 | c.indir_instp.write(std::move(i)); |
| 139 | } else { | 141 | } else { |
| 142 | #if 0 | ||
| 140 | if (i.inst.need_exec_load) { | 143 | if (i.inst.need_exec_load) { |
| 141 | memory::dram::command fr; | 144 | memory::dram::command fr; |
| 142 | fr.transaction = i.tr; | 145 | fr.transaction = i.tr; |
| @@ -145,6 +148,7 @@ void decode_stage::clock() { | |||
| 145 | pte(i.tr, "", fmt::format("load={:05o}", *i.inst.final_address)); | 148 | pte(i.tr, "", fmt::format("load={:05o}", *i.inst.final_address)); |
| 146 | c.decode_mem_commandp.write(std::move(fr)); | 149 | c.decode_mem_commandp.write(std::move(fr)); |
| 147 | } | 150 | } |
| 151 | #endif | ||
| 148 | c.decode_to_exec_instp.write(std::move(i)); | 152 | c.decode_to_exec_instp.write(std::move(i)); |
| 149 | } | 153 | } |
| 150 | 154 | ||
| @@ -160,7 +164,12 @@ void indir_stage::clock() { | |||
| 160 | gen = c.gen; | 164 | gen = c.gen; |
| 161 | } | 165 | } |
| 162 | 166 | ||
| 163 | if (c.indir_instp.can_read() && c.indir_mem_load_commandp.can_write() && c.indir_mem_store_commandp.can_write() && c.indir_to_exec_instp.can_write()) { | 167 | if (c.indir_mem_responsep.can_read()) { |
| 168 | auto r = c.indir_mem_responsep.read(); | ||
| 169 | c.dcache.handle_response(r); | ||
| 170 | } | ||
| 171 | |||
| 172 | if (c.indir_instp.can_read() && c.indir_to_exec_instp.can_write()) { | ||
| 164 | auto &i = c.indir_instp.peek(); | 173 | auto &i = c.indir_instp.peek(); |
| 165 | 174 | ||
| 166 | if (i.gen != gen && i.gen != c.gen) { | 175 | if (i.gen != gen && i.gen != c.gen) { |
| @@ -172,15 +181,23 @@ void indir_stage::clock() { | |||
| 172 | assert(i.gen == gen); | 181 | assert(i.gen == gen); |
| 173 | } | 182 | } |
| 174 | 183 | ||
| 175 | if (i.inst.need_indirect_load && !c.indir_mem_responsep.can_read()) | ||
| 176 | return; | ||
| 177 | |||
| 178 | if (i.inst.need_indirect_load) { | 184 | if (i.inst.need_indirect_load) { |
| 179 | auto l = c.indir_mem_responsep.read(); | 185 | unsigned int addr; |
| 180 | if (l.line_address != i.inst.init_address.value() >> memory::LINE_BYTES_LOG2) | 186 | auto t = c.dcache.fetch(addr, i.inst.init_address.value()); |
| 187 | if (!t.has_value()) { | ||
| 188 | if (c.indir_mem_load_commandp.can_write()) { | ||
| 189 | memory::dram::command fr; | ||
| 190 | fr.transaction = i.tr; | ||
| 191 | pte(fr.transaction, "1", fmt::format("indir load fill {:05o}", i.inst.init_address.value())); | ||
| 192 | fr.line_address = i.inst.init_address.value() >> memory::LINE_BYTES_LOG2; | ||
| 193 | fr.responsep = &c.indir_mem_responsep; | ||
| 194 | c.indir_mem_load_commandp.write(std::move(fr)); | ||
| 195 | } | ||
| 181 | return; | 196 | return; |
| 182 | auto addr = l.data[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK]; | 197 | } |
| 183 | if (i.inst.need_autoinc_store) { | 198 | if (i.inst.need_autoinc_store) { |
| 199 | if (!c.indir_mem_store_commandp.can_write()) | ||
| 200 | return; | ||
| 184 | addr = (addr + 1) & 07777; | 201 | addr = (addr + 1) & 07777; |
| 185 | pte(i.tr, "+", fmt::format("istore={:05o} istoredata={:04o}", *i.inst.init_address, addr)); | 202 | pte(i.tr, "+", fmt::format("istore={:05o} istoredata={:04o}", *i.inst.init_address, addr)); |
| 186 | memory::dram::command sr; | 203 | memory::dram::command sr; |
| @@ -191,6 +208,8 @@ void indir_stage::clock() { | |||
| 191 | sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; | 208 | sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; |
| 192 | sr.write = true; | 209 | sr.write = true; |
| 193 | c.indir_mem_store_commandp.write(std::move(sr)); | 210 | c.indir_mem_store_commandp.write(std::move(sr)); |
| 211 | c.dcache.opportunistic_store(*i.inst.init_address, addr); | ||
| 212 | c.icache.opportunistic_store(*i.inst.init_address, addr); | ||
| 194 | } else { | 213 | } else { |
| 195 | pte(i.tr, "I"); | 214 | pte(i.tr, "I"); |
| 196 | } | 215 | } |
| @@ -198,6 +217,7 @@ void indir_stage::clock() { | |||
| 198 | i.inst.final_address = (unsigned int)((df << 12) | addr); | 217 | i.inst.final_address = (unsigned int)((df << 12) | addr); |
| 199 | } | 218 | } |
| 200 | 219 | ||
| 220 | #if 0 | ||
| 201 | if (i.inst.need_exec_load) { | 221 | if (i.inst.need_exec_load) { |
| 202 | memory::dram::command fr; | 222 | memory::dram::command fr; |
| 203 | fr.transaction = i.tr; | 223 | fr.transaction = i.tr; |
| @@ -206,6 +226,7 @@ void indir_stage::clock() { | |||
| 206 | pte(i.tr, "", fmt::format("load={:05o}", *i.inst.final_address)); | 226 | pte(i.tr, "", fmt::format("load={:05o}", *i.inst.final_address)); |
| 207 | c.indir_mem_load_commandp.write(std::move(fr)); | 227 | c.indir_mem_load_commandp.write(std::move(fr)); |
| 208 | } | 228 | } |
| 229 | #endif | ||
| 209 | 230 | ||
| 210 | c.indir_to_exec_instp.write(std::move(i)); | 231 | c.indir_to_exec_instp.write(std::move(i)); |
| 211 | 232 | ||
| @@ -216,18 +237,24 @@ void indir_stage::clock() { | |||
| 216 | void exec_stage::clock() { | 237 | void exec_stage::clock() { |
| 217 | c.restarto.reset(); | 238 | c.restarto.reset(); |
| 218 | 239 | ||
| 240 | if (c.exec_mem_responsep.can_read()) { | ||
| 241 | auto r = c.exec_mem_responsep.read(); | ||
| 242 | c.dcache.handle_response(r); | ||
| 243 | } | ||
| 244 | |||
| 219 | std::optional<infra::transaction> restarttr; | 245 | std::optional<infra::transaction> restarttr; |
| 220 | std::optional<instruction_context> insto; | 246 | std::optional<instruction_context> insto; |
| 221 | 247 | ||
| 222 | bool progress = ctlregs[HALTED]; | 248 | bool progress = ctlregs[HALTED]; |
| 223 | 249 | ||
| 224 | if (!ctlregs[HALTED] && (c.decode_to_exec_instp.can_read() || c.indir_to_exec_instp.can_read()) && c.exec_mem_commandp.can_write()) { | 250 | if (!ctlregs[HALTED] && (c.decode_to_exec_instp.can_read() || c.indir_to_exec_instp.can_read())) { |
| 225 | infra::port<inst_bundle> *instp = nullptr; | 251 | infra::port<inst_bundle> *instp = nullptr; |
| 226 | if (c.decode_to_exec_instp.can_read()) { | 252 | if (c.decode_to_exec_instp.can_read()) { |
| 227 | auto &i = c.decode_to_exec_instp.peek(); | 253 | auto &i = c.decode_to_exec_instp.peek(); |
| 228 | if (i.gen != gen && i.gen != c.gen) { | 254 | if (i.gen != gen && i.gen != c.gen) { |
| 229 | pte(i.tr, "~"); | ||
| 230 | if (i.inst.need_autoinc_store) { | 255 | if (i.inst.need_autoinc_store) { |
| 256 | if (!c.exec_mem_store_commandp.can_write()) | ||
| 257 | return; | ||
| 231 | auto addr = (*i.inst.final_address - 1) & 07777; | 258 | auto addr = (*i.inst.final_address - 1) & 07777; |
| 232 | pte(i.tr, "U", fmt::format("unstore={:05o}, unstoredata={:04o}", *i.inst.init_address, addr)); | 259 | pte(i.tr, "U", fmt::format("unstore={:05o}, unstoredata={:04o}", *i.inst.init_address, addr)); |
| 233 | memory::dram::command sr; | 260 | memory::dram::command sr; |
| @@ -237,10 +264,13 @@ void exec_stage::clock() { | |||
| 237 | sr.mask.fill(false); | 264 | sr.mask.fill(false); |
| 238 | sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; | 265 | sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; |
| 239 | sr.write = true; | 266 | sr.write = true; |
| 240 | c.exec_mem_commandp.write(std::move(sr)); | 267 | c.exec_mem_store_commandp.write(std::move(sr)); |
| 241 | stores_sent += 2; // Original store sent by Indir stage plus unstore here | 268 | stores_sent += 2; // Original store sent by Indir stage plus unstore here |
| 242 | c.stores_done += 2; | 269 | c.stores_done += 2; |
| 270 | c.dcache.opportunistic_store(*i.inst.init_address, addr); | ||
| 271 | c.icache.opportunistic_store(*i.inst.init_address, addr); | ||
| 243 | } | 272 | } |
| 273 | pte(i.tr, "~"); | ||
| 244 | c.decode_to_exec_instp.discard(); | 274 | c.decode_to_exec_instp.discard(); |
| 245 | } else if (i.icount == c.icount) { | 275 | } else if (i.icount == c.icount) { |
| 246 | instp = &c.decode_to_exec_instp; | 276 | instp = &c.decode_to_exec_instp; |
| @@ -249,8 +279,9 @@ void exec_stage::clock() { | |||
| 249 | if (c.indir_to_exec_instp.can_read()) { | 279 | if (c.indir_to_exec_instp.can_read()) { |
| 250 | auto &i = c.indir_to_exec_instp.peek(); | 280 | auto &i = c.indir_to_exec_instp.peek(); |
| 251 | if (i.gen != gen && i.gen != c.gen) { | 281 | if (i.gen != gen && i.gen != c.gen) { |
| 252 | pte(i.tr, "~"); | ||
| 253 | if (i.inst.need_autoinc_store) { | 282 | if (i.inst.need_autoinc_store) { |
| 283 | if (!c.exec_mem_store_commandp.can_write()) | ||
| 284 | return; | ||
| 254 | auto addr = (*i.inst.final_address - 1) & 07777; | 285 | auto addr = (*i.inst.final_address - 1) & 07777; |
| 255 | pte(i.tr, "U", fmt::format("unstore={:05o}, unstoredata={:04o}", *i.inst.init_address, addr)); | 286 | pte(i.tr, "U", fmt::format("unstore={:05o}, unstoredata={:04o}", *i.inst.init_address, addr)); |
| 256 | memory::dram::command sr; | 287 | memory::dram::command sr; |
| @@ -260,10 +291,13 @@ void exec_stage::clock() { | |||
| 260 | sr.mask.fill(false); | 291 | sr.mask.fill(false); |
| 261 | sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; | 292 | sr.mask[*i.inst.init_address & memory::LINE_BYTE_OFFSET_MASK] = true; |
| 262 | sr.write = true; | 293 | sr.write = true; |
| 263 | c.exec_mem_commandp.write(std::move(sr)); | 294 | c.exec_mem_store_commandp.write(std::move(sr)); |
| 264 | stores_sent += 2; // Original store sent by Indir stage plus unstore here | 295 | stores_sent += 2; // Original store sent by Indir stage plus unstore here |
| 265 | c.stores_done += 2; | 296 | c.stores_done += 2; |
| 297 | c.dcache.opportunistic_store(*i.inst.init_address, addr); | ||
| 298 | c.icache.opportunistic_store(*i.inst.init_address, addr); | ||
| 266 | } | 299 | } |
| 300 | pte(i.tr, "~"); | ||
| 267 | c.indir_to_exec_instp.discard(); | 301 | c.indir_to_exec_instp.discard(); |
| 268 | } else if (i.icount == c.icount) { | 302 | } else if (i.icount == c.icount) { |
| 269 | instp = &c.indir_to_exec_instp; | 303 | instp = &c.indir_to_exec_instp; |
| @@ -271,8 +305,6 @@ void exec_stage::clock() { | |||
| 271 | } | 305 | } |
| 272 | if (!instp) | 306 | if (!instp) |
| 273 | goto bail_out; | 307 | goto bail_out; |
| 274 | if (!c.exec_mem_commandp.can_write()) | ||
| 275 | goto bail_out; | ||
| 276 | auto &i = instp->peek(); | 308 | auto &i = instp->peek(); |
| 277 | 309 | ||
| 278 | assert(i.gen == gen || i.gen == c.gen); | 310 | assert(i.gen == gen || i.gen == c.gen); |
| @@ -281,17 +313,27 @@ void exec_stage::clock() { | |||
| 281 | assert(i.gen == gen); | 313 | assert(i.gen == gen); |
| 282 | } | 314 | } |
| 283 | 315 | ||
| 284 | if (i.inst.need_exec_load && !c.exec_mem_responsep.can_read()) | ||
| 285 | return; | ||
| 286 | |||
| 287 | if (i.inst.need_exec_load) { | 316 | if (i.inst.need_exec_load) { |
| 288 | auto l = c.exec_mem_responsep.read(); | 317 | unsigned int data; |
| 289 | if (l.line_address != i.inst.final_address.value() >> memory::LINE_BYTES_LOG2) | 318 | auto t = c.dcache.fetch(data, i.inst.final_address.value()); |
| 319 | if (t.has_value()) { | ||
| 320 | i.inst.data = data; | ||
| 321 | } else { | ||
| 322 | if (c.exec_mem_load_commandp.can_write()) { | ||
| 323 | memory::dram::command fr; | ||
| 324 | fr.transaction = i.tr; | ||
| 325 | pte(fr.transaction, "2", fmt::format("exec load fill {:05o}", i.inst.final_address.value())); | ||
| 326 | fr.line_address = i.inst.final_address.value() >> memory::LINE_BYTES_LOG2; | ||
| 327 | fr.responsep = &c.exec_mem_responsep; | ||
| 328 | c.exec_mem_load_commandp.write(std::move(fr)); | ||
| 329 | } | ||
| 290 | return; | 330 | return; |
| 291 | i.inst.data = l.data[*i.inst.final_address & memory::LINE_BYTE_OFFSET_MASK]; | 331 | } |
| 292 | pte(i.tr, "", fmt::format("loaddata={:04o}", *i.inst.data)); | ||
| 293 | } | 332 | } |
| 294 | 333 | ||
| 334 | if (i.inst.need_exec_store && !c.exec_mem_store_commandp.can_write()) | ||
| 335 | return; | ||
| 336 | |||
| 295 | // We are committed to complete execution of this instruction this cycle. | 337 | // We are committed to complete execution of this instruction this cycle. |
| 296 | 338 | ||
| 297 | insto = std::move(i.inst); | 339 | insto = std::move(i.inst); |
| @@ -304,6 +346,9 @@ void exec_stage::clock() { | |||
| 304 | 346 | ||
| 305 | auto next_pc = inst.next_pc; | 347 | auto next_pc = inst.next_pc; |
| 306 | 348 | ||
| 349 | if (inst.need_exec_load) | ||
| 350 | pte(i.tr, "", fmt::format("loaddata={:04o}", *i.inst.data)); | ||
| 351 | |||
| 307 | if (inst.need_autoinc_store) { | 352 | if (inst.need_autoinc_store) { |
| 308 | ++stores_sent; // It was sent by Indir stage | 353 | ++stores_sent; // It was sent by Indir stage |
| 309 | ++c.stores_done; | 354 | ++c.stores_done; |
| @@ -339,9 +384,11 @@ void exec_stage::clock() { | |||
| 339 | sr.mask.fill(false); | 384 | sr.mask.fill(false); |
| 340 | sr.mask[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true; | 385 | sr.mask[*inst.final_address & memory::LINE_BYTE_OFFSET_MASK] = true; |
| 341 | sr.write = true; | 386 | sr.write = true; |
| 342 | c.exec_mem_commandp.write(std::move(sr)); | 387 | c.exec_mem_store_commandp.write(std::move(sr)); |
| 343 | ++stores_sent; | 388 | ++stores_sent; |
| 344 | ++c.stores_done; | 389 | ++c.stores_done; |
| 390 | c.dcache.opportunistic_store(*inst.final_address, *inst.data); | ||
| 391 | c.icache.opportunistic_store(*inst.final_address, *inst.data); | ||
| 345 | } | 392 | } |
| 346 | 393 | ||
| 347 | assert(inst.next_pc == next_pc || inst.possibly_redirects); | 394 | assert(inst.next_pc == next_pc || inst.possibly_redirects); |
