From 3e1e7838732d30bc2725f2196c4c3ae4af1ae159 Mon Sep 17 00:00:00 2001 From: gatecat Date: Fri, 25 Aug 2023 12:04:39 +0200 Subject: [PATCH] himbaechel: Initial timing support Signed-off-by: gatecat --- .../viaduct/example/synth_viaduct_example.tcl | 2 +- himbaechel/arch.cc | 178 ++++++++++++ himbaechel/arch.h | 97 ++++++- himbaechel/archdefs.h | 1 + himbaechel/chipdb.h | 17 +- himbaechel/himbaechel_dbgen/bba.py | 3 + himbaechel/himbaechel_dbgen/chip.py | 272 +++++++++++++++++- himbaechel/main.cc | 3 + himbaechel/uarch/example/example_arch_gen.py | 38 ++- 9 files changed, 586 insertions(+), 25 deletions(-) diff --git a/generic/viaduct/example/synth_viaduct_example.tcl b/generic/viaduct/example/synth_viaduct_example.tcl index a9d18f5612..051423b91c 100644 --- a/generic/viaduct/example/synth_viaduct_example.tcl +++ b/generic/viaduct/example/synth_viaduct_example.tcl @@ -2,7 +2,7 @@ # tcl synth_viaduct_example.tcl {out.json} yosys read_verilog -lib [file dirname [file normalize $argv0]]/example_prims.v -yosys hierarchy -check +yosys hierarchy -check -top top yosys proc yosys flatten yosys tribuf -logic diff --git a/himbaechel/arch.cc b/himbaechel/arch.cc index f7ca2357ee..df576333d0 100644 --- a/himbaechel/arch.cc +++ b/himbaechel/arch.cc @@ -63,6 +63,35 @@ Arch::Arch(ArchArgs args) IdString::initialize_add(this, chip_info->extra_constids->bba_ids[i].get(), i + chip_info->extra_constids->known_id_count); } + // Select speed grade + if (args.speed.empty()) { + if (chip_info->speed_grades.ssize() == 0) { + // no timing information and no speed grade specified + speed_grade = nullptr; + } else if (chip_info->speed_grades.ssize() == 1) { + // speed grade not specified but only one available; use it + speed_grade = &(chip_info->speed_grades[0]); + } else { + std::string available_speeds = ""; + for (const auto &speed_data : chip_info->speed_grades) { + if (!available_speeds.empty()) + available_speeds += ", "; + available_speeds += IdString(speed_data.name).c_str(this); + } + log_error("Speed grade must be specified using --speed (available options: %s).\n", + available_speeds.c_str()); + } + } else { + for (const auto &speed_data : chip_info->speed_grades) { + if (IdString(speed_data.name) == id(args.speed)) { + speed_grade = &speed_data; + break; + } + } + if (!speed_grade) { + log_error("Speed grade '%s' not found in database.\n", args.speed.c_str()); + } + } init_tiles(); } @@ -167,6 +196,7 @@ bool Arch::place() bool Arch::route() { + set_fast_pip_delays(true); uarch->preRoute(); std::string router = str_or_default(settings, id("router"), defaultRouter); bool result; @@ -181,6 +211,7 @@ bool Arch::route() uarch->postRoute(); getCtx()->settings[getCtx()->id("route")] = 1; archInfoToAttributes(); + set_fast_pip_delays(false); return result; } @@ -190,6 +221,8 @@ void Arch::assignArchInfo() for (auto &cell : cells) { CellInfo *ci = cell.second.get(); ci->flat_index = cell_idx++; + if (speed_grade && ci->timing_index == -1) + ci->timing_index = get_cell_timing_idx(ci->type); for (auto &port : ci->ports) { // Default 1:1 cell:bel mapping if (!ci->cell_bel_pins.count(port.first)) @@ -259,4 +292,149 @@ const std::vector Arch::availablePlacers = {"sa", "heap"}; const std::string Arch::defaultRouter = "router1"; const std::vector Arch::availableRouters = {"router1", "router2"}; +void Arch::set_fast_pip_delays(bool fast_mode) +{ + if (fast_mode && !fast_pip_delays) { + // Have to rebuild these structures + drive_res.clear(); + load_cap.clear(); + for (auto &net : nets) { + for (auto &wire_pair : net.second->wires) { + PipId pip = wire_pair.second.pip; + if (pip == PipId()) + continue; + auto &pip_data = chip_pip_info(chip_info, pip); + auto pip_tmg = get_pip_timing(pip_data); + if (pip_tmg != nullptr) { + WireId src = getPipSrcWire(pip), dst = getPipDstWire(pip); + load_cap[src] += pip_tmg->in_cap.slow_max; + drive_res[dst] = (((pip_tmg->flags & 1) || !drive_res.count(src)) ? 0 : drive_res.at(src)) + + pip_tmg->out_res.slow_max; + } + } + } + } + fast_pip_delays = fast_mode; +} + +// Helper for cell timing lookups +namespace { +template +int db_binary_search(const RelSlice &list, Tgetter key_getter, Tkey key) +{ + if (list.ssize() < 7) { + for (int i = 0; i < list.ssize(); i++) { + if (key_getter(list[i]) == key) { + return i; + } + } + } else { + int b = 0, e = list.ssize() - 1; + while (b <= e) { + int i = (b + e) / 2; + if (key_getter(list[i]) == key) { + return i; + } + if (key_getter(list[i]) > key) + e = i - 1; + else + b = i + 1; + } + } + return -1; +} +} // namespace + +int Arch::get_cell_timing_idx(IdString type_variant) const +{ + return db_binary_search( + speed_grade->cell_types, [](const CellTimingPOD &ct) { return ct.type_variant; }, type_variant.index); +} + +bool Arch::lookup_cell_delay(int type_idx, IdString from_port, IdString to_port, DelayQuad &delay) const +{ + NPNR_ASSERT(type_idx != -1); + const auto &ct = speed_grade->cell_types[type_idx]; + int to_pin_idx = db_binary_search( + ct.pins, [](const CellPinTimingPOD &pd) { return pd.pin; }, to_port.index); + if (to_pin_idx == -1) + return false; + const auto &tp = ct.pins[to_pin_idx]; + int arc_idx = db_binary_search( + tp.comb_arcs, [](const CellPinCombArcPOD &arc) { return arc.input; }, from_port.index); + if (arc_idx == -1) + return false; + delay = DelayQuad(tp.comb_arcs[arc_idx].delay.fast_min, tp.comb_arcs[arc_idx].delay.slow_max); + return true; +} + +const RelSlice *Arch::lookup_cell_seq_timings(int type_idx, IdString port) const +{ + NPNR_ASSERT(type_idx != -1); + const auto &ct = speed_grade->cell_types[type_idx]; + int pin_idx = db_binary_search( + ct.pins, [](const CellPinTimingPOD &pd) { return pd.pin; }, port.index); + if (pin_idx == -1) + return nullptr; + return &ct.pins[pin_idx].reg_arcs; +} + +TimingPortClass Arch::lookup_port_tmg_type(int type_idx, IdString port, PortType dir) const +{ + + NPNR_ASSERT(type_idx != -1); + const auto &ct = speed_grade->cell_types[type_idx]; + int pin_idx = db_binary_search( + ct.pins, [](const CellPinTimingPOD &pd) { return pd.pin; }, port.index); + if (pin_idx == -1) + return (dir == PORT_OUT) ? TMG_IGNORE : TMG_COMB_INPUT; + auto &pin = ct.pins[pin_idx]; + + if (dir == PORT_IN) { + if (pin.flags & CellPinTimingPOD::FLAG_CLK) + return TMG_CLOCK_INPUT; + return pin.reg_arcs.ssize() > 0 ? TMG_REGISTER_INPUT : TMG_COMB_INPUT; + } else { + // If a clock-to-out entry exists, then this is a register output + return pin.reg_arcs.ssize() > 0 ? TMG_REGISTER_OUTPUT : TMG_COMB_OUTPUT; + } +} + +// TODO: adding uarch overrides for these? +bool Arch::getCellDelay(const CellInfo *cell, IdString fromPort, IdString toPort, DelayQuad &delay) const +{ + if (cell->timing_index == -1) + return false; + return lookup_cell_delay(cell->timing_index, fromPort, toPort, delay); +} +TimingPortClass Arch::getPortTimingClass(const CellInfo *cell, IdString port, int &clockInfoCount) const +{ + if (cell->timing_index == -1) + return TMG_IGNORE; + auto type = lookup_port_tmg_type(cell->timing_index, port, cell->ports.at(port).type); + clockInfoCount = 0; + if (type == TMG_REGISTER_INPUT || type == TMG_REGISTER_OUTPUT) { + auto reg_arcs = lookup_cell_seq_timings(cell->timing_index, port); + if (reg_arcs) + clockInfoCount = reg_arcs->ssize(); + } + return type; +} +TimingClockingInfo Arch::getPortClockingInfo(const CellInfo *cell, IdString port, int index) const +{ + TimingClockingInfo result; + NPNR_ASSERT(cell->timing_index != -1); + auto reg_arcs = lookup_cell_seq_timings(cell->timing_index, port); + NPNR_ASSERT(reg_arcs); + const auto &arc = (*reg_arcs)[index]; + + result.clock_port = IdString(arc.clock); + result.edge = ClockEdge(arc.edge); + result.setup = DelayPair(arc.setup.fast_min, arc.setup.slow_max); + result.hold = DelayPair(arc.hold.fast_min, arc.hold.slow_max); + result.clockToQ = DelayQuad(arc.clk_q.fast_min, arc.clk_q.slow_max); + + return result; +} + NEXTPNR_NAMESPACE_END diff --git a/himbaechel/arch.h b/himbaechel/arch.h index 9a664b17e6..d2e2e78d5c 100644 --- a/himbaechel/arch.h +++ b/himbaechel/arch.h @@ -388,6 +388,7 @@ struct ArchArgs std::string uarch; std::string chipdb; std::string device; + std::string speed; dict options; }; @@ -426,6 +427,7 @@ struct Arch : BaseArch boost::iostreams::mapped_file_source blob_file; const ChipInfoPOD *chip_info; const PackageInfoPOD *package_info = nullptr; + const SpeedGradePOD *speed_grade = nullptr; // Unlike Viaduct, we are not -generic based and therefore uarch must be non-nullptr std::unique_ptr uarch; @@ -522,7 +524,27 @@ struct Arch : BaseArch { return normalise_wire(pip.tile, chip_pip_info(chip_info, pip).dst_wire); } - DelayQuad getPipDelay(PipId pip) const override { return DelayQuad(100); } + DelayQuad getPipDelay(PipId pip) const override + { + auto &pip_data = chip_pip_info(chip_info, pip); + auto pip_tmg = get_pip_timing(pip_data); + if (pip_tmg != nullptr) { + // TODO: multi corner analysis + WireId src = getPipSrcWire(pip); + uint64_t input_res = fast_pip_delays ? 0 : (drive_res.count(src) ? drive_res.at(src) : 0); + uint64_t input_cap = fast_pip_delays ? 0 : (load_cap.count(src) ? load_cap.at(src) : 0); + auto src_tmg = get_node_timing(src); + if (src_tmg != nullptr) + input_res += (src_tmg->res.slow_max / 2); + // Scale delay (fF * mOhm -> ps) + delay_t total_delay = (input_res * input_cap) / uint64_t(1e6); + total_delay += pip_tmg->int_delay.slow_max; + return DelayQuad(total_delay); + } else { + // Pip with no specified delay. Return a notional value so the router still has something to work with. + return DelayQuad(100); + } + } DownhillPipRange getPipsDownhill(WireId wire) const override { return DownhillPipRange(chip_info, get_tile_wire_range(wire)); @@ -547,11 +569,29 @@ struct Arch : BaseArch } void bindPip(PipId pip, NetInfo *net, PlaceStrength strength) override { + if (!fast_pip_delays) { + auto &pip_data = chip_pip_info(chip_info, pip); + auto pip_tmg = get_pip_timing(pip_data); + if (pip_tmg != nullptr) { + WireId src = getPipSrcWire(pip); + load_cap[src] += pip_tmg->in_cap.slow_max; + drive_res[getPipDstWire(pip)] = + (((pip_tmg->flags & 1) || !drive_res.count(src)) ? 0 : drive_res.at(src)) + + pip_tmg->out_res.slow_max; + } + } uarch->notifyPipChange(pip, net); BaseArch::bindPip(pip, net, strength); } void unbindPip(PipId pip) override { + if (!fast_pip_delays) { + auto &pip_data = chip_pip_info(chip_info, pip); + auto pip_tmg = get_pip_timing(pip_data); + if (pip_tmg != nullptr) { + load_cap[getPipSrcWire(pip)] -= pip_tmg->in_cap.slow_max; + } + } uarch->notifyPipChange(pip, nullptr); BaseArch::unbindPip(pip); } @@ -667,10 +707,65 @@ struct Arch : BaseArch } } + // ------------------------------------------------- + + const PipTimingPOD *get_pip_timing(const PipDataPOD &pip_data) const + { + int32_t idx = pip_data.timing_idx; + if (speed_grade && idx >= 0 && idx < speed_grade->pip_classes.ssize()) + return &(speed_grade->pip_classes[idx]); + else + return nullptr; + } + + const NodeTimingPOD *get_node_timing(WireId wire) const + { + int idx = -1; + if (!speed_grade) + return nullptr; + if (is_nodal_wire(chip_info, wire.tile, wire.index)) { + auto &shape = chip_node_shape(chip_info, wire.tile, wire.index); + idx = shape.timing_idx; + } else { + auto &wire_data = chip_wire_info(chip_info, wire); + idx = wire_data.timing_idx; + } + if (idx >= 0 && idx < speed_grade->node_classes.ssize()) + return &(speed_grade->node_classes[idx]); + else + return nullptr; + } + + // ------------------------------------------------- + + // Given cell type and variant, get the index inside the speed grade timing data + int get_cell_timing_idx(IdString type_variant) const; + // Return true and set delay if a comb path exists in a given cell timing index + bool lookup_cell_delay(int type_idx, IdString from_port, IdString to_port, DelayQuad &delay) const; + // Get setup and hold time and associated clock for a given cell timing index and signal + const RelSlice *lookup_cell_seq_timings(int type_idx, IdString port) const; + // Attempt to look up port type based on timing database + TimingPortClass lookup_port_tmg_type(int type_idx, IdString port, PortType dir) const; + + // ------------------------------------------------- + + bool getCellDelay(const CellInfo *cell, IdString fromPort, IdString toPort, DelayQuad &delay) const override; + // Get the port class, also setting clockInfoCount to the number of TimingClockingInfos associated with a port + TimingPortClass getPortTimingClass(const CellInfo *cell, IdString port, int &clockInfoCount) const override; + // Get the TimingClockingInfo of a port + TimingClockingInfo getPortClockingInfo(const CellInfo *cell, IdString port, int index) const override; + // ------------------------------------------------- void init_tiles(); + void set_fast_pip_delays(bool fast_mode); std::vector tile_name; dict tile_name2idx; + + // Load capacitance and drive resistance for nodes + // TODO: does this `dict` hurt routing performance too much? + bool fast_pip_delays = false; + dict drive_res; + dict load_cap; }; NEXTPNR_NAMESPACE_END diff --git a/himbaechel/archdefs.h b/himbaechel/archdefs.h index b3ede1e25f..14c4b11fdc 100644 --- a/himbaechel/archdefs.h +++ b/himbaechel/archdefs.h @@ -98,6 +98,7 @@ struct ArchNetInfo struct ArchCellInfo : BaseClusterInfo { int flat_index; + int timing_index = -1; dict> cell_bel_pins; }; diff --git a/himbaechel/chipdb.h b/himbaechel/chipdb.h index 07cf09ae23..622486247d 100644 --- a/himbaechel/chipdb.h +++ b/himbaechel/chipdb.h @@ -62,7 +62,8 @@ NPNR_PACKED_STRUCT(struct BelPinRefPOD { NPNR_PACKED_STRUCT(struct TileWireDataPOD { int32_t name; int32_t wire_type; - int32_t flags; // 32 bits of arbitrary data + int32_t flags; // 32 bits of arbitrary data + int32_t timing_idx; // used only when the wire is not part of a node, otherwise node idx applies RelSlice pips_uphill; RelSlice pips_downhill; RelSlice bel_pins; @@ -85,7 +86,7 @@ NPNR_PACKED_STRUCT(struct RelTileWireRefPOD { NPNR_PACKED_STRUCT(struct NodeShapePOD { RelSlice tile_wires; - int32_t timing_index; + int32_t timing_idx; }); NPNR_PACKED_STRUCT(struct TileTypePOD { @@ -151,12 +152,6 @@ NPNR_PACKED_STRUCT(struct TimingValue { int32_t slow_max; }); -NPNR_PACKED_STRUCT(struct BelPinTimingPOD { - TimingValue in_cap; - TimingValue drive_res; - TimingValue delay; -}); - NPNR_PACKED_STRUCT(struct PipTimingPOD { TimingValue int_delay; TimingValue in_cap; @@ -186,19 +181,19 @@ NPNR_PACKED_STRUCT(struct CellPinCombArcPOD { NPNR_PACKED_STRUCT(struct CellPinTimingPOD { int32_t pin; + int32_t flags; RelSlice comb_arcs; RelSlice reg_arcs; + static constexpr int32_t FLAG_CLK = 1; }); NPNR_PACKED_STRUCT(struct CellTimingPOD { - int32_t type; - int32_t variant; + int32_t type_variant; RelSlice pins; }); NPNR_PACKED_STRUCT(struct SpeedGradePOD { int32_t name; - RelSlice bel_pin_classes; RelSlice pip_classes; RelSlice node_classes; RelSlice cell_types; diff --git a/himbaechel/himbaechel_dbgen/bba.py b/himbaechel/himbaechel_dbgen/bba.py index de9dca31db..7a2de8265d 100644 --- a/himbaechel/himbaechel_dbgen/bba.py +++ b/himbaechel/himbaechel_dbgen/bba.py @@ -17,10 +17,13 @@ def str(self, s, comment=""): def label(self, s): print(f"label {s}", file=self.f) def u8(self, n, comment=""): + assert isinstance(n, int), n print(f"u8 {n} {comment}", file=self.f) def u16(self, n, comment=""): + assert isinstance(n, int), n print(f"u16 {n} {comment}", file=self.f) def u32(self, n, comment=""): + assert isinstance(n, int), n print(f"u32 {n} {comment}", file=self.f) def pop(self): print("pop", file=self.f) diff --git a/himbaechel/himbaechel_dbgen/chip.py b/himbaechel/himbaechel_dbgen/chip.py index 9e4c7c25e7..d10118592e 100644 --- a/himbaechel/himbaechel_dbgen/chip.py +++ b/himbaechel/himbaechel_dbgen/chip.py @@ -25,7 +25,7 @@ def serialise_lists(self, context: str, bba: BBAWriter): def serialise(self, context: str, bba: BBAWriter): pass -@dataclass(eq=True, frozen=True) +@dataclass(eq=True, order=True, frozen=True) class IdString: index: int = 0 @@ -144,6 +144,7 @@ class TileWireData: name: IdString wire_type: IdString flags: int = 0 + timing_idx: int = -1 # not serialised, but used to build the global constant networks const_val: int = -1 @@ -167,6 +168,7 @@ def serialise(self, context: str, bba: BBAWriter): bba.u32(self.name.index) bba.u32(self.wire_type.index) bba.u32(self.flags) + bba.u32(self.timing_idx) bba.slice(f"{context}_pips_uh", len(self.pips_uphill)) bba.slice(f"{context}_pips_dh", len(self.pips_downhill)) bba.slice(f"{context}_bel_pins", len(self.bel_pins)) @@ -191,6 +193,7 @@ def serialise(self, context: str, bba: BBAWriter): @dataclass class TileType(BBAStruct): strs: StringPool + tmg: "TimingPool" type_name: IdString bels: list[BelData] = field(default_factory=list) pips: list[PipData] = field(default_factory=list) @@ -223,11 +226,12 @@ def create_wire(self, name: str, type: str=""): self._wire2idx[wire.name] = wire.index self.wires.append(wire) return wire - def create_pip(self, src: str, dst: str): + def create_pip(self, src: str, dst: str, timing_class: str=""): # Create a pip between two tile wires in the tile type. Both wires should exist already. src_idx = self._wire2idx[self.strs.id(src)] dst_idx = self._wire2idx[self.strs.id(dst)] - pip = PipData(index=len(self.pips), src_wire=src_idx, dst_wire=dst_idx) + pip = PipData(index=len(self.pips), src_wire=src_idx, dst_wire=dst_idx, + timing_idx=self.tmg.pip_class_idx(timing_class)) self.wires[src_idx].pips_downhill.append(pip.index) self.wires[dst_idx].pips_uphill.append(pip.index) self.pips.append(pip) @@ -294,6 +298,8 @@ def serialise(self, context: str, bba: BBAWriter): @dataclass class NodeShape(BBAStruct): wires: list[TileWireRef] = field(default_factory=list) + timing_index: int = -1 + def key(self): m = hashlib.sha1() for wire in self.wires: @@ -310,7 +316,7 @@ def serialise_lists(self, context: str, bba: BBAWriter): bba.u16(0) # alignment def serialise(self, context: str, bba: BBAWriter): bba.slice(f"{context}_wires", len(self.wires)) - bba.u32(-1) # timing index (not yet used) + bba.u32(self.timing_index) # timing index (not yet used) MODE_TILE_WIRE = 0x7000 MODE_IS_ROOT = 0x7001 @@ -430,6 +436,247 @@ def serialise(self, context: str, bba: BBAWriter): bba.u32(self.name.index) bba.slice(f"{context}_pads", len(self.pads)) +class TimingValue(BBAStruct): + def __init__(self, fast_min=0, fast_max=None, slow_min=None, slow_max=None): + self.fast_min = fast_min + self.fast_max = fast_max or fast_min + self.slow_min = slow_min or self.fast_min + self.slow_max = slow_max or self.fast_max + + def serialise_lists(self, context: str, bba: BBAWriter): + pass + def serialise(self, context: str, bba: BBAWriter): + bba.u32(self.fast_min) + bba.u32(self.fast_max) + bba.u32(self.slow_min) + bba.u32(self.slow_max) + +@dataclass +class PipTiming(BBAStruct): + int_delay: TimingValue = field(default_factory=TimingValue) # internal fixed delay in ps + in_cap: TimingValue = field(default_factory=TimingValue) # internal capacitance in notional femtofarads + out_res: TimingValue = field(default_factory=TimingValue) # drive/output resistance in notional milliohms + flags: int = 0 # is_buffered etc + def serialise_lists(self, context: str, bba: BBAWriter): + pass + def serialise(self, context: str, bba: BBAWriter): + self.int_delay.serialise(context, bba) + self.in_cap.serialise(context, bba) + self.out_res.serialise(context, bba) + bba.u32(self.flags) + +@dataclass +class NodeTiming(BBAStruct): + res: TimingValue = field(default_factory=TimingValue) # wire resistance in notional milliohms + cap: TimingValue = field(default_factory=TimingValue) # wire capacitance in notional femtofarads + delay: TimingValue = field(default_factory=TimingValue) # fixed wire delay in ps + def serialise_lists(self, context: str, bba: BBAWriter): + pass + def serialise(self, context: str, bba: BBAWriter): + self.res.serialise(context, bba) + self.cap.serialise(context, bba) + self.delay.serialise(context, bba) + +@dataclass +class ClockEdge(Enum): + RISING = 0 + FALLING = 1 + +@dataclass +class CellPinRegArc(BBAStruct): + clock: int + edge: ClockEdge + setup: TimingValue = field(default_factory=TimingValue) # setup time in ps + hold: TimingValue = field(default_factory=TimingValue) # hold time in ps + clk_q: TimingValue = field(default_factory=TimingValue) # clock to output time in ps + def serialise_lists(self, context: str, bba: BBAWriter): + pass + def serialise(self, context: str, bba: BBAWriter): + bba.u32(self.clock.index) + bba.u32(self.edge.value) + self.setup.serialise(context, bba) + self.hold.serialise(context, bba) + self.clk_q.serialise(context, bba) + +@dataclass +class CellPinCombArc(BBAStruct): + from_pin: int + delay: TimingValue = field(default_factory=TimingValue) + def serialise_lists(self, context: str, bba: BBAWriter): + pass + def serialise(self, context: str, bba: BBAWriter): + bba.u32(self.from_pin.index) + self.delay.serialise(context, bba) + +@dataclass +class CellPinTiming(BBAStruct): + pin: int + flags: int = 0 + comb_arcs: list[CellPinCombArc] = field(default_factory=list) # sorted by from_pin ID index + reg_arcs: list[CellPinRegArc] = field(default_factory=list) # sorted by clock ID index + + def set_clock(self): + self.flags |= 1 + + def finalise(self): + self.comb_arcs.sort(key=lambda a: a.from_pin) + self.reg_arcs.sort(key=lambda a: a.clock) + + def serialise_lists(self, context: str, bba: BBAWriter): + bba.label(f"{context}_comb") + for i, a in enumerate(self.comb_arcs): + a.serialise(f"{context}_comb{i}", bba) + bba.label(f"{context}_reg") + for i, a in enumerate(self.reg_arcs): + a.serialise(f"{context}_reg{i}", bba) + def serialise(self, context: str, bba: BBAWriter): + bba.u32(self.pin.index) # pin idstring + bba.u32(self.flags) + bba.slice(f"{context}_comb", len(self.comb_arcs)) + bba.slice(f"{context}_reg", len(self.reg_arcs)) + +class CellTiming(BBAStruct): + def __init__(self, strs: StringPool, type_variant: str): + self.strs = strs + self.type_variant = strs.id(type_variant) + self.pin_data = {} + + # combinational timing through a cell (like a LUT delay) + def add_comb_arc(self, from_pin: str, to_pin: str, delay: TimingValue): + if to_pin not in self.pin_data: + self.pin_data[to_pin] = CellPinTiming(pin=self.strs.id(to_pin)) + self.pin_data[to_pin].comb_arcs.append(CellPinCombArc(from_pin=self.strs.id(from_pin), delay=delay)) + + # register input style timing (like a DFF input) + def add_setup_hold(self, clock: str, input_pin: str, edge: ClockEdge, setup: TimingValue, hold: TimingValue): + if input_pin not in self.pin_data: + self.pin_data[input_pin] = CellPinTiming(pin=self.strs.id(input_pin)) + if clock not in self.pin_data: + self.pin_data[clock] = CellPinTiming(pin=self.strs.id(clock)) + self.pin_data[input_pin].reg_arcs.append(CellPinRegArc(clock=self.strs.id(clock), edge=edge, setup=setup, hold=hold)) + self.pin_data[clock].set_clock() + + # register output style timing (like a DFF output) + def add_clock_out(self, clock: str, output_pin: str, edge: ClockEdge, delay: TimingValue): + if output_pin not in self.pin_data: + self.pin_data[output_pin] = CellPinTiming(pin=self.strs.id(output_pin)) + if clock not in self.pin_data: + self.pin_data[clock] = CellPinTiming(pin=self.strs.id(clock)) + self.pin_data[output_pin].reg_arcs.append(CellPinRegArc(clock=self.strs.id(clock), edge=edge, clk_q=delay)) + self.pin_data[clock].set_clock() + + def finalise(self): + self.pins = list(self.pin_data.values()) + self.pins.sort(key=lambda p: p.pin) + for pin in self.pins: + pin.finalise() + + def serialise_lists(self, context: str, bba: BBAWriter): + for i, p in enumerate(self.pins): + p.serialise_lists(f"{context}_pin{i}", bba) + bba.label(f"{context}_pins") + for i, p in enumerate(self.pins): + p.serialise(f"{context}_pin{i}", bba) + def serialise(self, context: str, bba: BBAWriter): + bba.u32(self.type_variant.index) # type idstring + bba.slice(f"{context}_pins", len(self.pins)) + +@dataclass +class SpeedGrade(BBAStruct): + name: int + pip_classes: list[PipTiming|None] = field(default_factory=list) + node_classes: list[NodeTiming|None] = field(default_factory=list) + cell_types: list[CellTiming] = field(default_factory=list) # sorted by (cell_type, variant) ID tuple + + def finalise(self): + self.cell_types.sort(key=lambda ty: ty.type_variant) + for ty in self.cell_types: + ty.finalise() + + def serialise_lists(self, context: str, bba: BBAWriter): + for i, t in enumerate(self.cell_types): + t.serialise_lists(f"{context}_cellty{i}", bba) + bba.label(f"{context}_pip_classes") + for i, p in enumerate(self.pip_classes): + p.serialise(f"{context}_pipc{i}", bba) + bba.label(f"{context}_node_classes") + for i, n in enumerate(self.node_classes): + n.serialise(f"{context}_nodec{i}", bba) + bba.label(f"{context}_cell_types") + for i, t in enumerate(self.cell_types): + t.serialise(f"{context}_cellty{i}", bba) + def serialise(self, context: str, bba: BBAWriter): + bba.u32(self.name.index) # speed grade idstring + bba.slice(f"{context}_pip_classes", len(self.pip_classes)) + bba.slice(f"{context}_node_classes", len(self.node_classes)) + bba.slice(f"{context}_cell_types", len(self.cell_types)) + +class TimingPool(BBAStruct): + def __init__(self, strs: StringPool): + self.strs = strs + self.speed_grades = [] + self.speed_grade_idx = {} + self.pip_classes = {} + self.node_classes = {} + + def set_speed_grades(self, speed_grades: list): + assert len(self.speed_grades) == 0 + self.speed_grades = [SpeedGrade(name=self.strs.id(g)) for g in speed_grades] + self.speed_grade_idx = {g: i for i, g in enumerate(speed_grades)} + + def pip_class_idx(self, name: str): + if name == "": + return -1 + elif name in self.pip_classes: + return self.pip_classes[name] + else: + idx = len(self.pip_classes) + self.pip_classes[name] = idx + return idx + + def node_class_idx(self, name: str): + if name == "": + return -1 + elif name in self.node_classes: + return self.node_classes[name] + else: + idx = len(self.node_classes) + self.node_classes[name] = idx + return idx + + def set_pip_class(self, grade: str, name: str, delay: TimingValue, + in_cap: Optional[TimingValue]=None, out_res: Optional[TimingValue]=None, + is_buffered=True): + idx = self.pip_class_idx(name) + sg = self.speed_grades[self.speed_grade_idx[grade]] + if idx >= len(sg.pip_classes): + sg.pip_classes += [None for i in range(1 + idx - len(sg.pip_classes))] + assert sg.pip_classes[idx] is None, f"attempting to set pip class {name} in speed grade {grade} twice" + sg.pip_classes[idx] = PipTiming(int_delay=delay, in_cap=in_cap, out_res=out_res, flags=(1 if is_buffered else 0)) + + def set_bel_pin_class(self, grade: str, name: str, delay: TimingValue, + in_cap: Optional[TimingValue]=None, out_res: Optional[TimingValue]=None): + # bel pin classes are shared with pip classes, but this alias adds a bit of extra clarity + set_pip_class(self, grade, name, delay, in_cap, out_res, is_buffered=True) + + def set_node_class(self, grade: str, name: str, delay: TimingValue, + res: Optional[TimingValue]=None, cap: Optional[TimingValue]=None): + idx = self.node_class_idx(name) + sg = self.speed_grades[self.speed_grade_idx[grade]] + if idx >= len(sg.node_classes): + sg.node_classes += [None for i in range(1 + idx - len(sg.node_classes))] + assert sg.node_classes[idx] is None, f"attempting to set node class {name} in speed grade {grade} twice" + sg.node_classes[idx] = NodeTiming(delay=delay, res=res, cap=cap) + + def add_cell_variant(self, speed_grade: str, name: str): + cell = CellTiming(self.strs, name) + self.speed_grades[self.speed_grade_idx[speed_grade]].cell_types.append(cell) + return cell + + def finalise(self): + for sg in self.speed_grades: + sg.finalise() + class Chip: def __init__(self, uarch: str, name: str, width: int, height: int): self.strs = StringPool() @@ -446,8 +693,9 @@ def __init__(self, uarch: str, name: str, width: int, height: int): self.tile_shapes_idx = dict() self.packages = [] self.extra_data = None + self.timing = TimingPool(self.strs) def create_tile_type(self, name: str): - tt = TileType(self.strs, self.strs.id(name)) + tt = TileType(self.strs, self.timing, self.strs.id(name)) self.tile_type_idx[name] = len(self.tile_types) self.tile_types.append(tt) return tt @@ -456,6 +704,9 @@ def set_tile_type(self, x: int, y: int, type: str): def tile_type_at(self, x: int, y: int): assert self.tiles[y][x].type_idx is not None, f"tile type at ({x}, {y}) must be set" return self.tile_types[self.tiles[y][x].type_idx] + def set_speed_grades(self, speed_grades: list): + self.timing.set_speed_grades(speed_grades) + return self.timing def add_node(self, wires: list[NodeWire]): # add a node - joining between multiple tile wires into a single connection (from nextpnr's point of view) # all the tile wires must exist, and the tile types must be set, first @@ -529,7 +780,8 @@ def serialise(self, bba: BBAWriter): for y, row in enumerate(self.tiles): for x, tinst in enumerate(row): tinst.serialise_lists(f"tinst_{x}_{y}", bba) - + for i, sg in enumerate(self.timing.speed_grades): + sg.serialise_lists(f"sg{i}", bba) self.strs.serialise_lists(f"constids", bba) if self.extra_data is not None: self.extra_data.serialise_lists("extra_data", bba) @@ -552,7 +804,9 @@ def serialise(self, bba: BBAWriter): for y, row in enumerate(self.tiles): for x, tinst in enumerate(row): tinst.serialise(f"tinst_{x}_{y}", bba) - + bba.label(f"speed_grades") + for i, sg in enumerate(self.timing.speed_grades): + sg.serialise(f"sg{i}", bba) bba.label(f"constids") self.strs.serialise(f"constids", bba) @@ -573,8 +827,7 @@ def serialise(self, bba: BBAWriter): # packages bba.slice("packages", len(self.packages)) # speed grades: not yet used - bba.u32(0) - bba.u32(0) + bba.slice("speed_grades", len(self.timing.speed_grades)) # db-defined constids bba.ref("constids") # extra data @@ -584,6 +837,7 @@ def serialise(self, bba: BBAWriter): bba.u32(0) def write_bba(self, filename): + self.timing.finalise() with open(filename, "w") as f: bba = BBAWriter(f) bba.pre('#include \"nextpnr.h\"') diff --git a/himbaechel/main.cc b/himbaechel/main.cc index 64c2c0f3f8..d66b3d9c8d 100644 --- a/himbaechel/main.cc +++ b/himbaechel/main.cc @@ -49,6 +49,7 @@ po::options_description HimbaechelCommandHandler::getArchOptions() po::options_description specific("Architecture specific options"); specific.add_options()("uarch", po::value(), uarch_help.c_str()); specific.add_options()("chipdb", po::value(), "path to chip database file"); + specific.add_options()("speed", po::value(), "device speed grade"); specific.add_options()("vopt,o", po::value>(), "options to pass to the himbächel uarch"); return specific; @@ -70,6 +71,8 @@ std::unique_ptr HimbaechelCommandHandler::createContext(dict(); chipArgs.chipdb = vm["chipdb"].as(); + if (vm.count("speed")) + chipArgs.speed = vm["speed"].as(); if (vm.count("vopt")) { std::vector options = vm["vopt"].as>(); for (const auto &opt : options) { diff --git a/himbaechel/uarch/example/example_arch_gen.py b/himbaechel/uarch/example/example_arch_gen.py index 026c2551aa..81d5c7dc35 100644 --- a/himbaechel/uarch/example/example_arch_gen.py +++ b/himbaechel/uarch/example/example_arch_gen.py @@ -40,15 +40,15 @@ def create_switch_matrix(tt: TileType, inputs: list[str], outputs: list[str]): # input pips for i, w in enumerate(inputs): for j in range((i % Si), Wl, Si): - tt.create_pip(f"SWITCH{j}", w) + tt.create_pip(f"SWITCH{j}", w, timing_class="SWINPUT") # output pips for i, w in enumerate(outputs): for j in range((i % Sq), Wl, Sq): - tt.create_pip(w, f"SWITCH{j}") + tt.create_pip(w, f"SWITCH{j}", timing_class="SWINPUT") # neighbour local pips for i in range(Wl): for j, (d, dx, dy) in enumerate(dirs): - tt.create_pip(f"{d}{(i + j) % Wl}", f"SWITCH{i}") + tt.create_pip(f"{d}{(i + j) % Wl}", f"SWITCH{i}", timing_class="SWNEIGH") # clock "ladder" if not tt.has_wire("CLK"): tt.create_wire(f"CLK", "TILE_CLK") @@ -185,7 +185,37 @@ def create_nodes(ch): clk_node.append(NodeWire(x, y+1, "CLK_PREV")) ch.add_node(clk_node) +def set_timings(ch): + speed = "DEFAULT" + tmg = ch.set_speed_grades([speed]) + # --- Routing Delays --- + # Notes: A simpler timing model could just use intrinsic delay and ignore R and Cs. + # R and C values don't have to be physically realistic, just in agreement with themselves to provide + # a meaningful scaling of delay with fanout. Units are subject to change. + tmg.set_pip_class(grade=speed, name="SWINPUT", + delay=TimingValue(80), # 80ps intrinstic delay + in_cap=TimingValue(5000), # 5pF + out_res=TimingValue(1000), # 1ohm + ) + tmg.set_pip_class(grade=speed, name="SWOUTPUT", + delay=TimingValue(100), # 100ps intrinstic delay + in_cap=TimingValue(5000), # 5pF + out_res=TimingValue(800), # 0.8ohm + ) + tmg.set_pip_class(grade=speed, name="SWNEIGH", + delay=TimingValue(120), # 120ps intrinstic delay + in_cap=TimingValue(7000), # 7pF + out_res=TimingValue(1200), # 1.2ohm + ) + # TODO: also support node/wire delays and add an example of them + # --- Cell delays --- + lut = ch.timing.add_cell_variant(speed, "LUT4") + for j in range(K): + lut.add_comb_arc(f"I[{j}]", "F", TimingValue(150 + j * 15)) + dff = ch.timing.add_cell_variant(speed, "DFF") + dff.add_setup_hold("CLK", "D", ClockEdge.RISING, TimingValue(150), TimingValue(25)) + dff.add_clock_out("CLK", "Q", ClockEdge.RISING, TimingValue(200)) def main(): ch = Chip("example", "EX1", X, Y) @@ -211,6 +241,8 @@ def main(): ch.set_tile_type(x, y, "LOGIC") # Create nodes between tiles create_nodes(ch) + set_timings(ch) ch.write_bba(sys.argv[1]) + if __name__ == '__main__': main()