From 1aeb760d093189486efbf5adf3292881eda94eb0 Mon Sep 17 00:00:00 2001 From: Julian Blake Kongslie Date: Sun, 24 Jul 2022 14:59:03 -0700 Subject: Writeback cache using explicit altsyncram instead of inferred memory. --- .gitignore | 1 + PLAN | 24 +++++++ hdl/defs.svh | 3 +- hdl/mem_broadcast.sv | 2 + hdl/mem_cache.sv | 191 ++++++++++++++++++++++++++++++++++++++------------- hdl/top.sv | 6 +- 6 files changed, 175 insertions(+), 52 deletions(-) diff --git a/.gitignore b/.gitignore index 298becb..1922d9a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ /build /db +/greybox_tmp /incremental_db /pdp8.* diff --git a/PLAN b/PLAN index e083e8e..60adf5f 100644 --- a/PLAN +++ b/PLAN @@ -1,4 +1,28 @@ 0. writeback cache + [✔️] Cache dirty bits + [✔️] Cache evicting dirty data on fills that would replace + [✔️] Cache not immediately forwarding writes + [✔️] Fix mem_cache to actually instantiate memory correctly + [✔️] Run Quartus in Windows to generate a Verilog template for manual instantiation of RAM blocks + [X] Try to use asynchronous clears for reset instead of occupying a port for SETS cycles NOPE + [✔️] Need at least one port capable of read-before-write + [✔️] Maybe don't need a second port if the first port can make write optional + [✔️] We might need to split our accesses across two cycles + [X] If so, can we infer the correct logic without explicit instantiation of the megafunction? NOPE + [X] Can we do asynchronous clear without explicit instantiation of the megafunction? NOPE + [✔️] Copy from said template into mem_cache.sv instead of trying to use inference + --- + [ ] Arbiter sending snoops to caches in response to CLI writes + [ ] Cache updating itself to clean state for write snoops + --- + [ ] Arbiter sending snoops to caches in response to CLI reads + [ ] Arbiter waiting for snoop responses from caches for CLI reads + [ ] Arbiter sending correct data for CLI reads (snoop responses in preference over RAM response) + [ ] Cache sending snoop responses for read snoops + --- + [ ] Cache forwarding snoops upstream + [ ] Core updating itself for write snoops (no-op) + [ ] Core sending snoop responses for read snoops (always no data) 1. pipelining that works with SMC / start working on minhdl version of the core 2. write an SPI or I2C master on the FPGA to sample analog inputs 3. support wider-than-single-word cache lines diff --git a/hdl/defs.svh b/hdl/defs.svh index 73fddaf..dc4a243 100644 --- a/hdl/defs.svh +++ b/hdl/defs.svh @@ -5,7 +5,7 @@ `define PDP_ADDRESS_BITS 15 -`define NUM_PDPS 4 +`define NUM_PDPS 8 `define UART_BYTE_BITS 8 @@ -56,5 +56,6 @@ typedef struct packed { typedef struct packed { pdp_line_address_t address; bit snoop; + bit data_valid; ram_line_t data; } mem_to_core_t; diff --git a/hdl/mem_broadcast.sv b/hdl/mem_broadcast.sv index 599be28..e86873e 100644 --- a/hdl/mem_broadcast.sv +++ b/hdl/mem_broadcast.sv @@ -48,6 +48,8 @@ module mem_broadcast if (!pdp_valid[ram_data.tag-1]) begin pdp_valid[ram_data.tag-1] = 1; pdp_data[ram_data.tag-1].address = hold_data.address[`PDP_ADDRESS_BITS-1:$clog2(`RAM_LINE_WORDS)]; + pdp_data[ram_data.tag-1].snoop = 0; + pdp_data[ram_data.tag-1].data_valid = 1; pdp_data[ram_data.tag-1].data = hold_data.data; hold_valid = 0; end diff --git a/hdl/mem_cache.sv b/hdl/mem_cache.sv index 181d8d7..e7fcac7 100644 --- a/hdl/mem_cache.sv +++ b/hdl/mem_cache.sv @@ -6,8 +6,6 @@ module mem_cache ( input bit clock , input bit reset - , input bit clear - , output bit core_command_ready , input bit core_command_valid , input core_to_mem_t core_command_data @@ -32,6 +30,7 @@ module mem_cache typedef struct packed { bit valid; + bit dirty; address_tag_t address; } tag_t; @@ -40,73 +39,171 @@ module mem_cache ram_line_t data; } cache_entry_t; - (* ramstyle = "no_rw_check, M9K" *) cache_entry_t cache [(1<