From 82cc71261d3d32012d33d3bebe56ca5e3b0bcdbd Mon Sep 17 00:00:00 2001 From: Julian Blake Kongslie Date: Sun, 2 Oct 2022 15:32:49 -0700 Subject: Initial commit. --- BigGolf | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++++ Makefile | 52 ++++++++++++++++++++ infra/arbiter.h | 42 ++++++++++++++++ infra/pipetrace.cpp | 9 ++++ infra/pipetrace.h | 37 ++++++++++++++ infra/port.h | 45 ++++++++++++++++++ infra/queue.h | 29 +++++++++++ infra/sim.cpp | 9 ++++ infra/sim.h | 38 +++++++++++++++ infra/stat.h | 35 ++++++++++++++ isa/checker.cpp | 44 +++++++++++++++++ isa/decode.cpp | 96 +++++++++++++++++++++++++++++++++++++ isa/isa.h | 44 +++++++++++++++++ memory/dram.h | 96 +++++++++++++++++++++++++++++++++++++ memory/line.h | 15 ++++++ pt | 104 ++++++++++++++++++++++++++++++++++++++++ test | 58 ++++++++++++++++++++++ 17 files changed, 888 insertions(+) create mode 100644 BigGolf create mode 100644 Makefile create mode 100644 infra/arbiter.h create mode 100644 infra/pipetrace.cpp create mode 100644 infra/pipetrace.h create mode 100644 infra/port.h create mode 100644 infra/queue.h create mode 100644 infra/sim.cpp create mode 100644 infra/sim.h create mode 100644 infra/stat.h create mode 100644 isa/checker.cpp create mode 100644 isa/decode.cpp create mode 100644 isa/isa.h create mode 100644 memory/dram.h create mode 100644 memory/line.h create mode 100755 pt create mode 100755 test diff --git a/BigGolf b/BigGolf new file mode 100644 index 0000000..9a308bd --- /dev/null +++ b/BigGolf @@ -0,0 +1,135 @@ +The "Big Golf" Microarchitecture + + + +Allowed memory combinations: + * Any two loads + * Any two stores with different addresses (n.b. LLC is limited to 1 eviction per cycle) + * Any load with any younger store + +Instruction opcodes: + 0 AND logical AND from memory to accumulator + 1 TAD Two's-complement ADd from memory to accumulator + 2 ISZ Increment and Skip if Zero + 3 DCA Deposit and Clear Accumulator + 4 JMS JuMp Subroutine + 5 JMP JuMP + 6 IOT In-Out Transfer (device accesses) + 7 OPR microsequenced OPeRations (miscellaneous, like clear/rotate/etc) + +Memory transactions: Opcodes that do it: (second set is the indirect versions) + * Fetch instruction 01234567 01234567 + * Indirect address load 0123 + * Autoincrement store 0123 + * Execution load 012 012 45 + * Execution store 234 234 + + + +┌─────┐ ┌──────┐ ┌────┐ +│Fetch├──────►│Decode│ ┌►│Exec│ +└─────┘ └──────┘ │ └────┘ + │ + next_pc ┌──init_indirect_load │ init_execution_store + │ init_execution_load──┤ retire + │ init_execution_store │ + │ retire │ + │ rubberband_stall(1/2)│ + │ │ + │ ┌───────┐ │ + └►│Autoinc│ │ + └───────┘ │ + │ + ┌──init_autoinc_store │ + │ init_execution_load──┤ + │ init_execution_store │ + │ retire │ + │ │ + │ ┌─────┐ │ + └►│Indir│ │ + └─────┘ │ + │ + init_execution_load──┘ + init_execution_store + retire + + + +Possible arbitration techniques: + * Rubberband stalling in Decode + positional arbitration + * Age/address/operation comparison without rubberbanding + * Longer clock cycles, or + * Extra cycle + +What to do with cache misses? + * Stall entire pipeline to maintain simpler ordering constraints + * If only loads are missing, allow everything else to proceed? + * Always allow Fetch to proceed? + +Need separate logic to detect SMC clobbers *anyway* + + + +OPR opcodes: + + "group 1" + _0___1___2_ _3_ _4_ _5_ _6_ _7_ _8_ _9_ _10 _11 + | | | | | | |RAR|RAL| 0 | | + | 1 1 1 | 0 |CLA|CLL|CMA|CML|RTR|RTL| 1 |IAC| + |___|___|___|___|___|___|___|___|___|___|___|___| + + CLA CLear Accumulator + CLL CLear Link + CMA CoMplement Accumulator + CML CoMplement Link + RAR Rotate Accumulator Right (if bit 10 is 0) + RAL Rotate Accumulator Left (if bit 10 is 0) + RTR Rotate (Twice) accumulator and link Right (if bit 10 is 1) + RTL Rotate (Twice) accumulator and link Left (if bit 10 is 1) + IAC Increment ACcumulator + BSW Byte Swap word in accumulator (if bits 8 and 9 are 0, and bit 10 is 1) + + Logical order of operations: + CLA, CLL + CMA, CML + IAC + RAR, RAL, RTR, RTL, BSW + + "group 2" + _0___1___2_ _3_ _4_ _5_ _6_ _7_ _8_ _9_ _10 _11 + | | | |SMA|SZA|SNL| 0 | | | | + | 1 1 1 | 1 |CLA|SPA|SNA|SZL| 1 |OSR|HLT| 0 | + |___|___|___|___|___|___|___|___|___|___|___|___| + + SMA Skip on Minus Accumulator (skip if high bit of accumulator is set) (if bit 8 is 0) + SPA Skip on Plus Accumulator (skip if high bit of accumulator is clear) (if bit 8 is 1) + SZA Skip on Zero Accumulator (if bit 8 is 0) + SNA Skip on Nonzero Accumulator (if bit 8 is 1) + SNL Skip on Nonzero Link (if bit 8 is 0) + SZL Skip on Zero Link (if bit 8 is 1) + OSR bitwise Or Switch Register into accumulator + HLT HaLT processor + CLA CLear Accumulator + + Logical order of operations: + SMA, SZA, SNL + SPA, SNA, SZL + CLA + OSR, HLT + + "mq" + _0___1___2_ _3_ _4_ _5_ _6_ _7_ _8_ _9_ _10 _11 + | | | | | | | | | | | + | 1 1 1 | 1 |CLA|MQA| |MQL| | | | 1 | + |___|___|___|___|___|___|___|___|___|___|___|___| + + CLA CLear Accumulator + MQL MQ Loads from Accumulator + MQA bitwise or MQ into Accumulator + + bits 6,8,9,10 are used for extended arithmetic instructions + see https://homepage.divms.uiowa.edu/~jones/pdp8/refcard/74.html + + Logical order of operations: + CLA + MQA, MQL (simultaneous parallel assignment) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..863826d --- /dev/null +++ b/Makefile @@ -0,0 +1,52 @@ +BUILD := build + +override PACKAGES := fmt + +WARNINGS := -Wall -Werror +OPTIMIZE := -Og -flto +DEBUG := -g + +CXX := g++ +CXXFLAGS := $(WARNINGS) $(OPTIMIZE) $(DEBUG) + +SED := sed + +XXD := xxd + +override CXXFLAGS += -std=c++20 + +override COMPILE_FLAGS := -MMD -MP -I. +override LINK_FLAGS := + +ifneq "$(strip $(PACKAGES))" "" + override COMPILE_FLAGS += $(shell pkg-config --cflags $(PACKAGES)) + override LINK_FLAGS += -Wl,--start-group $(shell pkg-config --libs $(PACKAGES)) -Wl,--end-group +endif + +default: $(BUILD)/minigolf +.PHONY: default + +clean: + rm -rf $(BUILD) +.PHONY: clean + +.SUFFIXES: + +override SOURCES := $(shell find -\( -name build -prune -\) -o -\( -name \*.cpp -print -\)) + +override OBJECTS := $(addprefix $(BUILD)/, $(addsuffix .o, $(basename $(SOURCES)))) +override DEPENDS := $(addprefix $(BUILD)/, $(addsuffix .d, $(basename $(SOURCES)))) + +-include $(DEPENDS) + +$(BUILD)/%.o: %.cpp + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) $(COMPILE_FLAGS) -c -o $@ $< + +$(BUILD)/minigolf: $(OBJECTS) + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) -o $@ -Wl,--start-group $+ -Wl,--end-group $(LINK_FLAGS) + +$(BUILD)/%.bin: %.hex + @mkdir -p $(dir $@) + $(SED) -e "s/\\s*#.*//" $< | $(XXD) -r -p > $@ diff --git a/infra/arbiter.h b/infra/arbiter.h new file mode 100644 index 0000000..5dd1647 --- /dev/null +++ b/infra/arbiter.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include + +#include "infra/sim.h" + +namespace infra { + template struct priority_arbiter : public sim { + std::array, peers> peerp; + port *outp = nullptr; + + void clock() { + for (unsigned int i = 0; i < peers; ++i) { + if (outp->can_write() && peerp[i].can_read()) + outp->write(peerp[i].read()); + } + } + }; + + template struct round_robin_arbiter : public sim { + std::array, peers> peerp; + port *outp = nullptr; + unsigned int initial = 0; + + void clock() { + bool initially_empty = outp->can_write(); + for (unsigned int i = initial; i < peers; ++i) { + if (outp->can_write() && peerp[i].can_read()) + outp->write(peerp[i].read()); + } + for (unsigned int i = 0; i < initial; ++i) { + if (outp->can_write() && peerp[i].can_read()) + outp->write(peerp[i].read()); + } + if (initially_empty && !outp->can_write()) + if (++initial == peers) + initial = 0; + } + }; +} diff --git a/infra/pipetrace.cpp b/infra/pipetrace.cpp new file mode 100644 index 0000000..e6642ef --- /dev/null +++ b/infra/pipetrace.cpp @@ -0,0 +1,9 @@ +#include +#include + +#include "infra/pipetrace.h" + +namespace infra { + std::ostream *pt::ptfile = nullptr; + std::uint64_t pt::next_record = 0; +} diff --git a/infra/pipetrace.h b/infra/pipetrace.h new file mode 100644 index 0000000..656b9b9 --- /dev/null +++ b/infra/pipetrace.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include +#include +#include + +namespace infra { + struct transaction { + std::uint64_t record = ~(std::uint64_t)0; + }; + + struct pt { + static std::ostream *ptfile; + + static std::uint64_t next_record; + + static transaction toplevel() { + transaction t; + t.record = next_record++; + return t; + } + + static transaction child(const transaction &p) { + transaction t; + t.record = next_record++; + if (ptfile) + *ptfile << fmt::format("{} parent {}\n", t.record, p.record); + return t; + } + + static void event(const transaction &t, const char *event, std::uint64_t time, const std::string &data) { + if (ptfile) + *ptfile << fmt::format("@{} {} {} {}\n", time, t.record, event, data); + } + }; +} diff --git a/infra/port.h b/infra/port.h new file mode 100644 index 0000000..06a3aa5 --- /dev/null +++ b/infra/port.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include + +#include "infra/sim.h" + +namespace infra { + template struct port : public sim { + std::optional consumer_side; + std::optional producer_side; + + bool can_read() { return consumer_side.has_value(); } + bool can_write() { return !producer_side.has_value(); } + + T read() { + assert(can_read()); + auto x = std::move(*consumer_side); + consumer_side.reset(); + return x; + } + + const T & peek() { + assert(can_read()); + return *consumer_side; + } + + void discard() { + consumer_side.reset(); + } + + void write(T &&x) { + assert(can_write()); + producer_side = std::move(x); + } + + void unclock() { + if (!consumer_side && producer_side) { + consumer_side = std::move(*producer_side); + producer_side.reset(); + } + } + }; +} diff --git a/infra/queue.h b/infra/queue.h new file mode 100644 index 0000000..1e490bc --- /dev/null +++ b/infra/queue.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include +#include + +#include "infra/port.h" +#include "infra/sim.h" + +namespace infra { + template struct queue : public sim { + port input; + port *output = nullptr; + std::deque elements; + + void clock() { + if (input.can_read() && elements.size() < size) { + auto x = input.read(); + elements.emplace_back(std::move(x)); + } + if (output->can_write() && !elements.empty()) { + auto &x = elements.front(); + output->write(std::move(x)); + elements.pop_front(); + } + } + }; +} diff --git a/infra/sim.cpp b/infra/sim.cpp new file mode 100644 index 0000000..21acc8c --- /dev/null +++ b/infra/sim.cpp @@ -0,0 +1,9 @@ +#include +#include + +#include "infra/sim.h" + +namespace infra { + std::vector sim::sims; + std::uint64_t sim::now = 0; +} diff --git a/infra/sim.h b/infra/sim.h new file mode 100644 index 0000000..185916a --- /dev/null +++ b/infra/sim.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include + +#include "infra/pipetrace.h" + +namespace infra { + struct sim { + virtual void clock() {} + virtual void unclock() {} + + static std::vector sims; + + static std::uint64_t now; + + sim() { + sims.emplace_back(this); + } + + virtual ~sim() { + std::erase(sims, this); + } + + static void advance() { + for (auto &s : sims) + s->clock(); + for (auto &s : sims) + s->unclock(); + ++now; + } + + void pte(const transaction &t, const char *event, const std::string &data) { + pt::event(t, event, now, data); + } + }; +} diff --git a/infra/stat.h b/infra/stat.h new file mode 100644 index 0000000..f1ca75a --- /dev/null +++ b/infra/stat.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + +#include "infra/sim.h" + +namespace infra { + struct stat : public sim { + std::string name; + std::uint64_t numerator = 0; + std::uint64_t denominator = 0; + + stat(std::string name) + : name(std::move(name)) + { } + + ~stat() { + fmt::print("# {} {}\n", name, (double)numerator/(double)denominator); + } + + void unclock() { + ++denominator; + } + + stat & operator++() { + ++numerator; + return *this; + } + + stat & operator++(int) { + return operator++(); + } + }; +} diff --git a/isa/checker.cpp b/isa/checker.cpp new file mode 100644 index 0000000..cd802a8 --- /dev/null +++ b/isa/checker.cpp @@ -0,0 +1,44 @@ +#include + +#include "isa/isa.h" + +void checker::execute() { + assert(!halt); + auto int_enable_delay = ctlregs[ctlreg::INT_ENABLE] >> 1; + if (ctlregs[ctlreg::INT_ENABLE] & 1) { + // check for interrupt + } + ctlregs[ctlreg::INT_ENABLE] = (int_enable_delay << 1) | int_enable_delay; + inst = decode(ctlregs[ctlreg::DATA_INSTRUCTION_FIELD_BUFFER], + pc, + mem.fetch(pc)); + auto next_pc = inst.next_pc; + if (inst.need_indirect_load) { + auto addr = mem.fetch(inst.init_address.value()); + if (inst.need_autoinc_store) + mem.store(*inst.init_address, (addr + 1) & 07777); + inst.final_address = addr; + } else { + assert(!inst.need_autoinc_store); + } + if (inst.need_exec_load) + inst.data = mem.fetch(inst.final_address.value()); + if (inst.need_read_acc) + inst.acc = acc; + if (inst.need_read_link) + inst.link = link; + if (inst.read_ctlreg.has_value()) + inst.ctlval = ctlregs[*inst.read_ctlreg]; + inst.execute(); + if (inst.need_write_acc) + acc = inst.acc.value(); + if (inst.need_write_link) + link = inst.link.value(); + if (inst.write_ctlreg.has_value()) + ctlregs[*inst.write_ctlreg] = inst.ctlval.value(); + if (inst.need_exec_store) + mem.store(inst.final_address.value(), inst.data.value()); + assert(inst.next_pc == next_pc || inst.possibly_redirects); + pc = inst.next_pc; + halt = inst.halt; +} diff --git a/isa/decode.cpp b/isa/decode.cpp new file mode 100644 index 0000000..8a85d41 --- /dev/null +++ b/isa/decode.cpp @@ -0,0 +1,96 @@ +#include + +#include "isa/isa.h" + +instruction_context decode(unsigned int dfifb, unsigned int pc, unsigned int bits) +{ + instruction_context inst; + + auto df = dfifb >> 3; + auto ifb = dfifb & 00007; + + inst.next_pc = (pc & ~07777) | ((pc + 1) & 07777); + + switch (bits >> 9) { + case 0: // AND + inst.need_exec_load = true; + inst.need_read_acc = true; + inst.need_write_acc = true; + inst.ef = [](auto &ctx) { + ctx.acc = ctx.acc.value() & ctx.data.value(); + }; + break; + case 1: // TAD + inst.need_exec_load = true; + inst.need_read_acc = true; + inst.need_read_link = true; + inst.need_write_acc = true; + inst.need_write_link = true; + inst.ef = [](auto &ctx) { + unsigned int sum = (ctx.link.value() << 12) + ctx.acc.value() + ctx.data.value(); + ctx.link = (sum >> 12) & 1; + ctx.acc = sum & 07777; + }; + break; + case 2: // ISZ + inst.need_exec_load = true; + inst.need_exec_store = true; + inst.possibly_redirects = true; + inst.ef = [](auto &ctx) { + ctx.data = (ctx.data.value() + 1) & 07777; + if (*ctx.data) + ctx.next_pc = (ctx.next_pc & ~07777) | ((ctx.next_pc + 1) & 07777); + }; + break; + case 3: // DCA + inst.need_read_acc = true; + inst.need_write_acc = true; + inst.need_exec_store = true; + inst.ef = [](auto &ctx) { + ctx.data = ctx.acc.value(); + ctx.acc = 0; + }; + break; + case 4: // JMS + inst.need_exec_store = true; + inst.possibly_redirects = true; + inst.ef = [ifb](auto &ctx) { + ctx.data = ctx.next_pc; + ctx.next_pc = (ifb << 12) | ((ctx.final_address.value() + 1) & 07777); + }; + break; + case 5: // JMP + inst.possibly_redirects = true; + inst.ef = [ifb](auto &ctx) { + ctx.next_pc = (ifb << 12) | (ctx.final_address.value() & 07777); + }; + break; + case 6: // IOT + inst.ef = [bits](auto &ctx) { + assert(false); + }; + break; + case 7: // OPR + inst.ef = [bits](auto &ctx) { + assert(false); + }; + break; + } + + // Instructions with memory operands may be direct or indirect + if (inst.need_exec_load || inst.need_exec_store || inst.possibly_redirects) { + auto addr = (df << 12) | ((bits & 00200) ? (next_pc & 07600) : 0) | (bits & 00177); + if (bits & 00400) { + inst.need_indirect_load = true; + inst.init_address = addr; + } else { + inst.final_address = addr; + } + } + + // Non-jump indirect memory operands may be autoincrementing depending on operand bits + if (!inst.possibly_redirects && inst.need_indirect_load && ((bits & 00170) == 00010)) + inst.need_autoinc_store = true; + + return inst; +} diff --git a/isa/isa.h b/isa/isa.h new file mode 100644 index 0000000..3effb5b --- /dev/null +++ b/isa/isa.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include + +#include "infra/pipetrace.h" + +enum class ctlreg { + DATA_INSTRUCTION_FIELD_BUFFER, // (df << 3) | if_buffer + DATA_INSTRUCTION_FIELD_SAVED, // (df_saved << 3) | if_saved + INT_ENABLE, // (int_enable_delay << 1) | int_enable +}; + +struct instruction_context { + infra::transaction transaction; + + // Known statically at decode time + bool need_indirect_load = false; // final_address = mem[init_address] + bool need_autoinc_store = false; // mem[init_address] += 1 + bool need_exec_load = false; // data = mem[final_address] + bool need_read_acc = false; // acc = %acc + bool need_read_link = false; // link = %link + std::optional read_ctlreg; // ctlval = %[read_ctlreg] + bool need_write_acc = false; // %acc = acc + bool need_write_link = false; // %link = link + std::optional write_ctlreg; // %[write_ctlreg] = ctlval + bool need_exec_store = false; // mem[final_address] = data + bool possibly_redirects = false; // %pc = next_pc + + std::function ef; + void execute() { ef(*this); } + + // May change over the lifetime of the instruction execution + unsigned int next_pc; // includes IF + std::optional init_address; // includes DF + std::optional final_address; // includes DF + std::optional ctlval; + std::optional data; + std::optional acc; + std::optional link; + bool halt = false; +}; + +instruction_context decode(unsigned int df, unsigned int pc, unsigned int bits); diff --git a/memory/dram.h b/memory/dram.h new file mode 100644 index 0000000..f59c7a6 --- /dev/null +++ b/memory/dram.h @@ -0,0 +1,96 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "memory/line.h" + +namespace memory { + struct dram : public infra::sim { + static constexpr std::uint64_t PAGE_LINES_LOG2 = 20 - LINE_BYTES_LOG2; + static constexpr std::uint64_t PAGE_LINES = 1 << PAGE_LINES_LOG2; + static constexpr std::uint64_t PAGE_LINE_OFFSET_MASK = PAGE_LINES - 1; + static constexpr std::uint64_t PAGE_BYTES_LOG2 = PAGE_LINES_LOG2 + LINE_BYTES_LOG2; + static constexpr std::uint64_t PAGE_BYTES = 1 << PAGE_BYTES_LOG2; + static constexpr std::uint64_t PAGE_BYTE_OFFSET_MASK = PAGE_BYTES - 1; + + typedef std::array page; + + std::map image; + + struct response { + infra::transaction transaction; + std::uint64_t line_address; + line data; + }; + + struct command { + infra::transaction transaction; + std::uint64_t line_address; + line data; + std::array mask; + bool write = false; + infra::port *responsep = nullptr; + }; + + infra::port commandp; + + void clock() { + if (commandp.can_read()) { + const auto &c = commandp.peek(); + if (!c.responsep || c.responsep->can_write()) { + auto page_address = c.line_address >> PAGE_LINES_LOG2; + auto page_line = c.line_address & PAGE_LINE_OFFSET_MASK; + if (c.write) { + pte(c.transaction, "s", fmt::format("store {:x}-{:x}", page_address, page_line)); + if (c.responsep) { + response r; + r.transaction = c.transaction; + r.line_address = c.line_address; + r.data = c.data; + c.responsep->write(std::move(r)); + } + auto [p, emplaced] = image.try_emplace(page_address); + if (emplaced) + for (unsigned int i = 0; i < PAGE_LINES; ++i) + p->second[i].fill(0); + auto &l = p->second[page_line]; + for (unsigned int i = 0; i < LINE_BYTES; ++i) + if (c.mask[i]) + l[i] = c.data[i]; + } else { + pte(c.transaction, "f", fmt::format("fill {:x}-{:x}", page_address, page_line)); + if (c.responsep) { + response r; + r.transaction = c.transaction; + r.line_address = c.line_address; + if (auto p = image.find(page_address); p != image.end()) + r.data = p->second[page_line]; + else + r.data.fill(0); + c.responsep->write(std::move(r)); + } + } + commandp.discard(); + } + } + } + + void load(std::istream &fh) { + for (unsigned int page = 0; ; ++page) { + auto [p, emplaced] = image.try_emplace(page); + if (emplaced) + for (unsigned int i = 0; i < PAGE_LINES; ++i) + p->second[i].fill(0); + for (unsigned int line = 0; line < PAGE_LINES; ++line) + if (!fh.read(reinterpret_cast(p->second[line].data()), LINE_BYTES)) + return; + } + } + }; +} diff --git a/memory/line.h b/memory/line.h new file mode 100644 index 0000000..3377ec8 --- /dev/null +++ b/memory/line.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +#include "infra/port.h" +#include "infra/sim.h" + +namespace memory { + constexpr std::uint64_t LINE_BYTES_LOG2 = 4; + constexpr std::uint64_t LINE_BYTES = 1 << LINE_BYTES_LOG2; + constexpr std::uint64_t LINE_BYTE_OFFSET_MASK = LINE_BYTES - 1; + + typedef std::array line; +} diff --git a/pt b/pt new file mode 100755 index 0000000..206795f --- /dev/null +++ b/pt @@ -0,0 +1,104 @@ +#!/usr/bin/ruby + +$filter = ARGV + +$parents = {} +$events = {} +$has = {} +$data = {} +$horiz = {} +$maxtime = -1 + +$stats = false + +$stdin.each_line do | line | + case line + + when /^(\d+) parent (\d+)$/ + child = $1.to_i + parent = $2.to_i + $parents[child] = parent + + when /^@(\d+) (\d+) (\S*) (.*)$/ + time = $1.to_i + rec = $2.to_i + event = $3 + data = $4 + if event.size > 0 + $events[rec] ||= {} + $events[rec][time] = event + $has[rec] ||= {} + $has[rec][event] = true + $horiz[event] ||= "" + $horiz[event] = $horiz[event].ljust(time) + $horiz[event][time] = event + end + if data.size > 0 + $data[rec] ||= "" + $data[rec] += " #{event}@#{time}:" if event.size > 0 + $data[rec] += " #{data}" + end + $maxtime = [$maxtime, time+1].max + + when /^#\s*(.*)$/ + $stats = true + $stdout.write("#{$1}\n") + + else + raise "Unexpected line: #{line}" + end +end + +$stdout.write("\n") if $stats + +$hier = {} +$hier_direct = {} + +$events.each_key do | rec | + subhier = {} + $hier_direct[rec] = subhier + if $parents.key?(rec) + $hier_direct[$parents[rec]][rec] = subhier + else + $hier[rec] = subhier + end +end + +$order = [] + +def flatten(hier) + hier.each do | rec, subhier | + $order << rec + flatten(subhier) + end +end +flatten($hier) + +rwidth = $order.map { | x | x.to_s.size }.max + +$horiz.keys.sort.each do | occ | + $stdout.write(" " * rwidth + " #{$horiz[occ].ljust($maxtime)}") + count = $horiz[occ].delete(" ").size + $stdout.write(" #{($maxtime.to_f / count.to_f).round(2).to_s.rjust(5)} cyc/evt\n") +end +$stdout.write("\n") + +mwidth = 0 + +$order.each do | rec | + estr = "" + filter_match = $filter.empty? + $has[rec].each_key do | event | + filter_match ||= $filter.include?(event) + end + next unless filter_match + $events[rec].keys.sort.each do | time | + estr = estr.ljust(time + 1, estr.size == 0 ? " " : "-") + estr[time] = $events[rec][time] if $events[rec][time].size > 0 + end + estr += " " * 5 + estr = estr.ljust(mwidth - 1) + estr = estr.ljust(estr.size + 20 - estr.size % 20) + mwidth = [mwidth, estr.size].max + $stdout.write(rec.to_s.rjust(rwidth) + ": #{estr}#{$data[rec]}\n") +end diff --git a/test b/test new file mode 100755 index 0000000..d70c720 --- /dev/null +++ b/test @@ -0,0 +1,58 @@ +#!/bin/bash + +set -eu + +FILTER=() + +while [[ $# != 0 ]]; do + if [[ $1 == "-h" ]]; then + cat <