summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Blake Kongslie2022-07-24 14:59:03 -0700
committerJulian Blake Kongslie2022-07-24 14:59:03 -0700
commit1aeb760d093189486efbf5adf3292881eda94eb0 (patch)
tree2cdd822ad198816875eb70a5783f59ee61908982
parentRename memory message types for more clarity. (diff)
downloadmultipdp8-1aeb760d093189486efbf5adf3292881eda94eb0.tar.xz
Writeback cache using explicit altsyncram instead of inferred memory.HEADmain
Diffstat (limited to '')
-rw-r--r--.gitignore1
-rw-r--r--PLAN24
-rw-r--r--hdl/defs.svh3
-rw-r--r--hdl/mem_broadcast.sv2
-rw-r--r--hdl/mem_cache.sv189
-rw-r--r--hdl/top.sv6
6 files changed, 174 insertions, 51 deletions
diff --git a/.gitignore b/.gitignore
index 298becb..1922d9a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
1/build 1/build
2/db 2/db
3/greybox_tmp
3/incremental_db 4/incremental_db
4/pdp8.* 5/pdp8.*
diff --git a/PLAN b/PLAN
index e083e8e..60adf5f 100644
--- a/PLAN
+++ b/PLAN
@@ -1,4 +1,28 @@
10. writeback cache 10. writeback cache
2 [✔️] Cache dirty bits
3 [✔️] Cache evicting dirty data on fills that would replace
4 [✔️] Cache not immediately forwarding writes
5 [✔️] Fix mem_cache to actually instantiate memory correctly
6 [✔️] Run Quartus in Windows to generate a Verilog template for manual instantiation of RAM blocks
7 [X] Try to use asynchronous clears for reset instead of occupying a port for SETS cycles NOPE
8 [✔️] Need at least one port capable of read-before-write
9 [✔️] Maybe don't need a second port if the first port can make write optional
10 [✔️] We might need to split our accesses across two cycles
11 [X] If so, can we infer the correct logic without explicit instantiation of the megafunction? NOPE
12 [X] Can we do asynchronous clear without explicit instantiation of the megafunction? NOPE
13 [✔️] Copy from said template into mem_cache.sv instead of trying to use inference
14 ---
15 [ ] Arbiter sending snoops to caches in response to CLI writes
16 [ ] Cache updating itself to clean state for write snoops
17 ---
18 [ ] Arbiter sending snoops to caches in response to CLI reads
19 [ ] Arbiter waiting for snoop responses from caches for CLI reads
20 [ ] Arbiter sending correct data for CLI reads (snoop responses in preference over RAM response)
21 [ ] Cache sending snoop responses for read snoops
22 ---
23 [ ] Cache forwarding snoops upstream
24 [ ] Core updating itself for write snoops (no-op)
25 [ ] Core sending snoop responses for read snoops (always no data)
21. pipelining that works with SMC / start working on minhdl version of the core 261. pipelining that works with SMC / start working on minhdl version of the core
32. write an SPI or I2C master on the FPGA to sample analog inputs 272. write an SPI or I2C master on the FPGA to sample analog inputs
43. support wider-than-single-word cache lines 283. support wider-than-single-word cache lines
diff --git a/hdl/defs.svh b/hdl/defs.svh
index 73fddaf..dc4a243 100644
--- a/hdl/defs.svh
+++ b/hdl/defs.svh
@@ -5,7 +5,7 @@
5 5
6`define PDP_ADDRESS_BITS 15 6`define PDP_ADDRESS_BITS 15
7 7
8`define NUM_PDPS 4 8`define NUM_PDPS 8
9 9
10`define UART_BYTE_BITS 8 10`define UART_BYTE_BITS 8
11 11
@@ -56,5 +56,6 @@ typedef struct packed {
56typedef struct packed { 56typedef struct packed {
57 pdp_line_address_t address; 57 pdp_line_address_t address;
58 bit snoop; 58 bit snoop;
59 bit data_valid;
59 ram_line_t data; 60 ram_line_t data;
60} mem_to_core_t; 61} mem_to_core_t;
diff --git a/hdl/mem_broadcast.sv b/hdl/mem_broadcast.sv
index 599be28..e86873e 100644
--- a/hdl/mem_broadcast.sv
+++ b/hdl/mem_broadcast.sv
@@ -48,6 +48,8 @@ module mem_broadcast
48 if (!pdp_valid[ram_data.tag-1]) begin 48 if (!pdp_valid[ram_data.tag-1]) begin
49 pdp_valid[ram_data.tag-1] = 1; 49 pdp_valid[ram_data.tag-1] = 1;
50 pdp_data[ram_data.tag-1].address = hold_data.address[`PDP_ADDRESS_BITS-1:$clog2(`RAM_LINE_WORDS)]; 50 pdp_data[ram_data.tag-1].address = hold_data.address[`PDP_ADDRESS_BITS-1:$clog2(`RAM_LINE_WORDS)];
51 pdp_data[ram_data.tag-1].snoop = 0;
52 pdp_data[ram_data.tag-1].data_valid = 1;
51 pdp_data[ram_data.tag-1].data = hold_data.data; 53 pdp_data[ram_data.tag-1].data = hold_data.data;
52 hold_valid = 0; 54 hold_valid = 0;
53 end 55 end
diff --git a/hdl/mem_cache.sv b/hdl/mem_cache.sv
index 181d8d7..e7fcac7 100644
--- a/hdl/mem_cache.sv
+++ b/hdl/mem_cache.sv
@@ -6,8 +6,6 @@ module mem_cache
6 ( input bit clock 6 ( input bit clock
7 , input bit reset 7 , input bit reset
8 8
9 , input bit clear
10
11 , output bit core_command_ready 9 , output bit core_command_ready
12 , input bit core_command_valid 10 , input bit core_command_valid
13 , input core_to_mem_t core_command_data 11 , input core_to_mem_t core_command_data
@@ -32,6 +30,7 @@ module mem_cache
32 30
33 typedef struct packed { 31 typedef struct packed {
34 bit valid; 32 bit valid;
33 bit dirty;
35 address_tag_t address; 34 address_tag_t address;
36 } tag_t; 35 } tag_t;
37 36
@@ -40,73 +39,171 @@ module mem_cache
40 ram_line_t data; 39 ram_line_t data;
41 } cache_entry_t; 40 } cache_entry_t;
42 41
43 (* ramstyle = "no_rw_check, M9K" *) cache_entry_t cache [(1<<SET_BITS)-1:0]; 42 struct packed {
43 set_t address;
44 bit read_enable;
45 bit write_enable;
46 cache_entry_t read_data;
47 cache_entry_t write_data;
48 } cache[1:0];
44 49
45 bit outstanding_fill; 50 altsyncram
51 #( .address_reg_b("CLOCK0")
52 , .clock_enable_input_a("BYPASS"), .clock_enable_input_b("BYPASS")
53 , .clock_enable_output_a("BYPASS"), .clock_enable_output_b("BYPASS")
54 , .indata_reg_b("CLOCK0")
55 , .numwords_a(1 << SET_BITS), .numwords_b(1 << SET_BITS)
56 , .operation_mode("BIDIR_DUAL_PORT")
57 , .outdata_aclr_a("NONE"), .outdata_aclr_b("NONE")
58 , .outdata_reg_a("UNREGISTERED"), .outdata_reg_b("UNREGISTERED")
59 , .power_up_uninitialized("TRUE")
60 , .ram_block_type("M9K")
61 , .read_during_write_mode_mixed_ports("OLD_DATA")
62 , .read_during_write_mode_port_a("OLD_DATA"), .read_during_write_mode_port_b("OLD_DATA")
63 , .widthad_a(SET_BITS), .widthad_b(SET_BITS)
64 , .width_a($bits(cache_entry_t)), .width_b($bits(cache_entry_t))
65 , .width_byteena_a(1), .width_byteena_b(1)
66 , .wrcontrol_wraddress_reg_b("CLOCK0")
67 ) cache_controller
68 ( .address_a(cache[0].address), .address_b(cache[1].address)
69 , .clock0(~clock)
70 , .data_a(cache[0].write_data), .data_b(cache[1].write_data)
71 , .rden_a(cache[0].read_enable), .rden_b(cache[1].read_enable)
72 , .wren_a(cache[0].write_enable), .wren_b(cache[1].write_enable)
73 , .q_a(cache[0].read_data), .q_b(cache[1].read_data)
74 , .aclr0(1'b0), .aclr1(1'b0)
75 , .addressstall_a(1'b0), .addressstall_b(1'b0)
76 , .byteena_a(1'b1), .byteena_b(1'b1)
77 , .clock1(1'b1)
78 , .clocken0(1'b1), .clocken1(1'b1), .clocken2(1'b1), .clocken3(1'b1)
79 , .eccstatus()
80 );
46 81
47 bit [SET_BITS:0] reset_entry; 82 bit [SET_BITS:0] reset_entry;
48 83
84 // "The" fill buffer
85 address_tag_t working_tag;
86 set_t working_set;
87
88 (* syn_encoding = "one-hot" *) enum int unsigned
89 { AWAIT_CORE_COMMAND
90 , AWAIT_CACHE
91 , SEND_FILL_REQUEST
92 , AWAIT_RAM_RESPONSE
93 } state;
94
49 always @(posedge clock) begin 95 always @(posedge clock) begin
50 if (reset) begin 96 if (reset) begin
51 core_command_ready = 0; 97 core_command_ready = 0;
52 ram_command_valid = 0; 98 ram_command_valid = 0;
53 ram_response_ready = 0; 99 ram_response_ready = 0;
54 core_response_valid = 0; 100 core_response_valid = 0;
55 outstanding_fill = 0;
56 reset_entry = 0; 101 reset_entry = 0;
102 cache[0].address = 0; cache[1].address = 0;
103 cache[0].read_enable = 0; cache[1].read_enable = 0;
104 cache[0].write_enable = 0; cache[1].write_enable = 0;
105 cache[0].write_data = 0; cache[1].write_data = 0;
106 state = state.first;
57 end else begin 107 end else begin
58 if (clear)
59 reset_entry = 0;
60
61 if (ram_command_ready && ram_command_valid) 108 if (ram_command_ready && ram_command_valid)
62 ram_command_valid = 0; 109 ram_command_valid = 0;
63 if (core_response_ready && core_response_valid) 110 if (core_response_ready && core_response_valid)
64 core_response_valid = 0; 111 core_response_valid = 0;
65 112
66 if (!outstanding_fill && !reset_entry[SET_BITS]) begin 113 if (!reset_entry[SET_BITS]) begin
67 cache[reset_entry[SET_BITS-1:0]] = 0; 114 cache[0].address = reset_entry[SET_BITS-1:0];
68 ++reset_entry; 115 cache[1].address = reset_entry[SET_BITS-1:0] + 1;
69 end else if (ram_response_ready && ram_response_valid && outstanding_fill) begin 116 cache[0].read_enable = 0; cache[1].read_enable = 0;
70 automatic address_tag_t tag; 117 cache[0].write_enable = 1; cache[1].write_enable = 1;
71 automatic set_t set; 118 cache[0].write_data = 0; cache[1].write_data = 0;
72 automatic cache_entry_t entry; 119 reset_entry += 2;
73 {tag, set} = ram_response_data.address; 120 end else begin
74 entry.tag.valid = 1; 121 case (state)
75 entry.tag.address = tag; 122
76 entry.data = ram_response_data.data; 123 AWAIT_CORE_COMMAND: begin
77 cache[set] = entry; 124 cache[0].read_enable = 0;
78 core_response_valid = 1; 125 cache[0].write_enable = 0;
79 core_response_data = ram_response_data; 126
80 outstanding_fill = 0; 127 if (core_command_ready && core_command_valid) begin
81 end else if (core_command_ready && core_command_valid) begin 128 {working_tag, working_set} = core_command_data.address;
82 automatic address_tag_t tag; 129 cache[0].address = working_set;
83 automatic set_t set; 130 cache[0].read_enable = 1;
84 {tag, set} = core_command_data.address; 131 cache[0].write_enable = core_command_data.write;
85 if (core_command_data.write) begin 132 cache[0].write_data.tag.valid = 1;
86 automatic cache_entry_t entry; 133 cache[0].write_data.tag.dirty = 1;
87 entry.tag.valid = 1; 134 cache[0].write_data.tag.address = working_tag;
88 entry.tag.address = tag; 135 cache[0].write_data.data = core_command_data.data;
89 // FIXME masked stores 136 state = AWAIT_CACHE;
90 entry.data = core_command_data.data; 137 end
91 cache[set] = entry; 138 end
92 ram_command_valid = 1; 139
93 ram_command_data = core_command_data; 140 AWAIT_CACHE: begin
94 end else begin 141 if (cache[0].read_data.tag.valid && cache[0].read_data.tag.dirty && cache[0].read_data.tag.address != working_tag) begin
95 automatic cache_entry_t entry = cache[set]; 142 ram_command_valid = 1;
96 if (entry.tag.valid && entry.tag.address == tag) begin 143 ram_command_data.address = {cache[0].read_data.tag.address, working_set};
144 ram_command_data.write = 1;
145 ram_command_data.snoop_response = 0;
146 ram_command_data.data = cache[0].read_data.data;
147 ram_command_data.mask = ~0;
148 state = cache[0].write_enable ? AWAIT_CORE_COMMAND : SEND_FILL_REQUEST;
149 end else if (cache[0].write_enable) begin
150 core_command_ready = !core_response_valid && !ram_command_valid;
151 state = AWAIT_CORE_COMMAND;
152 end else if (cache[0].read_data.tag.valid && cache[0].read_data.tag.address == working_tag) begin
97 core_response_valid = 1; 153 core_response_valid = 1;
98 core_response_data.address = {tag, set}; 154 core_response_data.address = {working_tag, working_set};
99 core_response_data.data = entry.data; 155 core_response_data.snoop = 0;
156 core_response_data.data_valid = 1;
157 core_response_data.data = cache[0].read_data.data;
158 state = AWAIT_CORE_COMMAND;
100 end else begin 159 end else begin
101 ram_command_valid = 1; 160 ram_command_valid = 1;
102 ram_command_data = core_command_data; 161 ram_command_data.address = {working_tag, working_set};
103 outstanding_fill = 1; 162 ram_command_data.write = 0;
163 ram_command_data.snoop_response = 0;
164 state = AWAIT_RAM_RESPONSE;
104 end 165 end
166
167 cache[0].read_enable = 0;
168 cache[0].write_enable = 0;
105 end 169 end
106 end
107 170
108 core_command_ready = reset_entry[SET_BITS] && !ram_command_valid && !core_response_valid && !outstanding_fill; 171 SEND_FILL_REQUEST: begin
109 ram_response_ready = !core_response_valid; 172 cache[0].read_enable = 0;
173 cache[0].write_enable = 0;
174
175 if (!ram_command_valid) begin
176 ram_command_valid = 1;
177 ram_command_data.address = {working_tag, working_set};
178 ram_command_data.write = 0;
179 ram_command_data.snoop_response = 0;
180 state = AWAIT_RAM_RESPONSE;
181 end
182 end
183
184 AWAIT_RAM_RESPONSE: begin
185 cache[0].read_enable = 0;
186 cache[0].write_enable = 0;
187
188 if (ram_response_valid && ram_response_data.address == {working_tag, working_set} && ram_response_data.data_valid) begin
189 core_response_valid = 1;
190 core_response_data = ram_response_data;
191 cache[0].address = working_set;
192 cache[0].read_enable = 0;
193 cache[0].write_enable = 1;
194 cache[0].write_data.tag.valid = 1;
195 cache[0].write_data.tag.dirty = 0;
196 cache[0].write_data.tag.address = working_tag;
197 cache[0].write_data.data = ram_response_data.data;
198 state = AWAIT_CORE_COMMAND;
199 end
200 end
201
202 endcase
203
204 core_command_ready = state == AWAIT_CORE_COMMAND && !core_response_valid && !ram_command_valid;
205 ram_response_ready = state == AWAIT_RAM_RESPONSE && !core_response_valid && !ram_command_valid;
206 end
110 end 207 end
111 end 208 end
112 209
diff --git a/hdl/top.sv b/hdl/top.sv
index 96b7510..7d6ba8e 100644
--- a/hdl/top.sv
+++ b/hdl/top.sv
@@ -493,9 +493,7 @@ module top
493`else 493`else
494 mem_cache cache 494 mem_cache cache
495 ( .clock(internal_clock) 495 ( .clock(internal_clock)
496 , .reset(internal_reset) 496 , .reset(internal_reset || clear_caches)
497
498 , .clear(clear_caches)
499 497
500 , .core_command_ready(cache_command_ready) 498 , .core_command_ready(cache_command_ready)
501 , .core_command_valid(cache_command_valid) 499 , .core_command_valid(cache_command_valid)
@@ -517,7 +515,7 @@ module top
517 515
518 core cpu 516 core cpu
519 ( .clk(internal_clock) 517 ( .clk(internal_clock)
520 , .reset(internal_reset) 518 , .reset(internal_reset || clear_caches)
521 519
522 , .uart_tx_ready(tx_ready) 520 , .uart_tx_ready(tx_ready)
523 , .uart_tx_valid(tx_valid) 521 , .uart_tx_valid(tx_valid)