Files
rsim
rust_cheri_compressed_cap
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
#![allow(non_camel_case_types)]

use crate::processor::elements::cheri::SafeTaggedCap;
use std::ops::Range;
use std::marker::PhantomData;
use crate::processor::isa_mods::*;
use crate::processor::exceptions::IllegalInstructionException::*;
use super::csrs::CSRProvider;
use std::cmp::min;
use anyhow::{Context, Result};

use crate::processor::decode::{Opcode,InstructionBits};

mod types;
pub use types::*;

mod conns;
pub use conns::*;

mod decode;
pub use decode::*;

mod registers;
pub use registers::*;

/// The Vector Unit for the processor.
/// Stores all vector state, including registers.
/// This requires a [VecMemInterface<uXLEN, TElem>] and [VecRegInterface<uXLEN>] to access other resources.
pub struct Rvv<uXLEN: PossibleXlen, TElem> {
    vreg: Box<dyn VectorRegisterFile<TElem>>,

    vtype: VType,
    vl: u32,

    /// This is used by the hardware to support resuming vector instructions after traps.
    /// e.g. if a vector load hits a page fault at element #N, set vstart to N before taking the trap,
    /// and the load will resume from vstart when you get back.
    /// Reset to zero after every vector load instruction.
    /// 
    /// This potentially impacts fast paths, 
    /// e.g. if a fast-path load pulls full lines from memory into a vector register, vstart must be 0.
    vstart: u32,

    _phantom_xlen: PhantomData<uXLEN>,
}
/// RISC-V Vector unit for RV32 ISAs
pub type Rv32v = Rvv<u32, u128>;
/// RISC-V Vector unit for RV64 non-CHERI ISAs
pub type Rv64v = Rvv<u64, u128>;
/// RISC-V Vector unit for RV64 + CHERI ISAs
pub type Rv64Cheriv = Rvv<u64, SafeTaggedCap>;

impl<uXLEN: PossibleXlen, TElem> Rvv<uXLEN, TElem> {
    /// Returns an initialized vector unit.
    pub fn new(vreg: Box<dyn VectorRegisterFile<TElem>>) -> Self {
        Rvv {
            vreg,

            vtype: VType::illegal(),
            vl: 0,
            vstart: 0,

            _phantom_xlen: PhantomData,
        }
    }

    /// Reset the vector unit's state
    pub fn reset(&mut self) {
        self.vreg.reset();
        self.vtype = VType::illegal();
        self.vl = 0;
        self.vstart = 0;
    }

    /// (Internal) Execute a configuration instruction, e.g. vsetvli family
    /// Requires a [VecRegInterface].
    /// 
    /// # Arguments
    /// 
    /// * `inst_kind` - Which kind of configuration instruction to execute
    /// * `inst` - Decoded instruction bits
    /// * `conn` - Connection to external resources
    fn exec_config(&mut self, inst_kind: ConfigKind, inst: InstructionBits, sreg: &mut dyn VecRegInterface<uXLEN>) -> Result<()> {
        if let InstructionBits::VType{rd, funct3, rs1, rs2, zimm11, zimm10, ..} = inst {
            assert_eq!(funct3, 0b111);

            // avl = application vector length
            // e.g. the total number of elements to process
            // Either read it from a register, or from an immediate.
            // See Section 6.2 of the spec.
            let avl = match inst_kind {
                ConfigKind::vsetvli | ConfigKind::vsetvl => { // vsetvli, vsetvl
                    // Read AVL from a register
                    if rs1 != 0 {
                        // default case, just read it out
                        sreg.sreg_read_xlen(rs1)?.into()
                    } else {
                        if rd != 0 {
                            // rs1 == 0, rd != 0
                            // => set the AVL to the maximum possible value,
                            // use that to calculate the maximum number of elements in this configuration,
                            // which will get written out to rd.
                            u64::MAX
                        } else {
                            // Request the same vector length, even if the vtype is changing.
                            self.vl as u64
                        }
                    }
                } ,
                ConfigKind::vsetivli => { // vsetivli
                    // Read AVL from an immediate
                    // Use rs1 as a 5-bit immediate
                    rs1 as u64
                }
            };

            // Depending on the instruction, the vtype selection is different
            // See RISC-V V spec, section 6
            let vtype_bits = match inst_kind {
                ConfigKind::vsetvli => {
                    zimm11 as u64
                },
                ConfigKind::vsetivli => {
                    zimm10 as u64
                },
                ConfigKind::vsetvl => {
                    sreg.sreg_read_xlen(rs2)?.into()
                },
            };
            // Try to parse vtype bits
            let req_vtype = VType::decode(vtype_bits as u32)?;

            // Calculate the maximum number of elements per register group
            // (under some configurations, e.g. Sew=8,Lmul=1/4,Vlen=32, this could be < 1 which is illegal)
            let elems_per_group = req_vtype.elems_per_group();

            let vtype_supported = elems_per_group > 0;

            if vtype_supported {
                self.vtype = req_vtype;
                // TODO - section 6.3 shows more constraints on setting VL
                self.vl = min(elems_per_group, avl as u32);

                sreg.sreg_write_xlen(rd, self.vl.into())?;
            } else {
                self.vtype = VType::illegal();
                // TODO - move this bail to the next vector instruction that executes
                // Setting vtype to an illegal type is fine, but trying to do anything (other than reconfigure) with invalid vtype isn't
                bail!("Valid but unsupported vtype: {:b} -> {:?}, elems_per_group {}", vtype_bits, req_vtype, elems_per_group);
            }

            Ok(())
        } else {
            unreachable!("vector::exec_config instruction MUST be InstructionBits::VType, got {:?} instead", inst);
        }
    }

    /// Find the last segment index that isn't masked out.
    /// Used to find a tight range of the segments/elements that will actually be processed.
    fn get_active_segment_range(&mut self, vm: bool, evl: u32) -> Option<Range<u32>> {
        // Find the smallest not-masked-out segments >= vreg
        // Find the largest not-masked-out segments < evl

        // Take range vstart-evl
        // remove masked-out segments
        // take minimum
        // In theory, this could be replaced with a lowest-bit detection (with a shift to remove segments < vstart)
        let start = (self.vstart..evl)
            .filter_map(|i| match self.vreg.seg_masked_out(vm, i) {
                true => None,
                false => Some(i as u32)
            })
            .min();

        // Take range vstart-evl
        // remove masked-out segments
        // take maximum
        // In theory, this could be replaced with a highest-bit detection (with some kind of mask/shift to remove segments >= than evl?)
        let final_accessed = (self.vstart..evl)
            .filter_map(|i| match self.vreg.seg_masked_out(vm, i) {
                true => None,
                false => Some(i as u32)
            })
            .max();

        // If the ranges weren't empty, i.e. at least one element in vstart..evl is active, return a range containing that segment.
        // Otherwise no segments will be accessed.
        match (start, final_accessed) {
            (Some(start), Some(final_accessed)) => Some(Range::<u32> {
                start,
                end: final_accessed + 1 // Exclusive range, needs to contain final_accessed
            }),
            _ => None
        }
    }

    /// Try doing fast-path capability checks for accesses for a vector load/store.
    /// Fast-paths exist for all accesses, although in hardware some may not be as fast as others.
    /// Return values:
    /// - Ok(true) if the fast-path check raised no capability exceptions
    ///   - Therefore the full access should not raise any capability exceptions
    /// - Ok(false) if the fast-path check failed in a tolerable manner 
    ///   - Therefore the full access *may* raise a capability exception
    ///   - A tolerable fast-path failure = fault-only-first, which might absorb the exception.
    /// - Err() if the fast-path check failed in a not-tolerable manner
    /// panics if all elements are masked out
    fn fast_check_load_store(&mut self, addr_provenance: (u64, Provenance), rs2: u8, vm: bool, op: DecodedMemOp, sreg: &mut dyn VecRegInterface<uXLEN>) -> (Result<bool>, Range<u64>) {
        let (base_addr, provenance) = addr_provenance;

        use DecodedMemOp::*;
        let mut is_fault_only_first = false;
        // Calculate an address range that tightly encompasses the access.
        let addr_range = match op {
            Strided{stride, eew, evl, nf, ..} => {
                // Calculate the range of not-masked-out segments
                // active_vstart = the smallest segment >= vstart that isn't masked out
                // active_evl = (the largest segment < evl that isn't masked out) + 1
                let Range{ start: active_vstart, end: active_evl } = self.get_active_segment_range(vm, evl).unwrap();

                // todo!("negative range");
                let offset_range = Range::<u64> {
                    start: active_vstart as u64 * stride,
                    // The index of the final segment = (evl - 1)
                    // The start of the final segment = (evl - 1) * stride
                    // The end of the final segment = (evl - 1) * stride + (nf * eew)
                    end: (active_evl as u64 - 1) * stride + (nf as u64) * eew.width_in_bytes()
                };
                Range::<u64> {
                    start: base_addr + offset_range.start,
                    end: base_addr + offset_range.end,
                }
            },
            FaultOnlyFirst{evl, nf, eew, ..} => {
                is_fault_only_first = true;

                // Calculate the range of not-masked-out segments
                let Range{ start: active_vstart, end: active_evl } = self.get_active_segment_range(vm, evl).unwrap();

                let offset_range = Range::<u64> {
                    start: (active_vstart as u64) * (nf as u64) * eew.width_in_bytes(),
                    // The index of the final segment = (evl - 1)
                    // The start of the final segment = (evl - 1) * stride
                    // The end of the final segment = (evl - 1) * stride + (nf * eew)
                    // stride = eew * nf
                    // => The end of the final segment = (evl - 1) * eew * nf + eew * nf
                    // = evl * eew * nf
                    end:   (active_evl as u64)    * (nf as u64) * eew.width_in_bytes()
                };
                Range::<u64> {
                    start: base_addr + offset_range.start,
                    end: base_addr + offset_range.end,
                }
            },
            Indexed{evl, nf, eew, index_ew, ..} => {
                // Calculate the range of not-masked-out segments
                let Range{ start: active_vstart, end: active_evl } = self.get_active_segment_range(vm, evl).unwrap();

                let mut offsets = vec![];
                for i_segment in active_vstart..active_evl {
                    offsets.push(self.vreg.load_vreg_elem_int(index_ew, rs2, i_segment).unwrap());
                }

                let offset_range = Range::<u64> {
                    start: *offsets.iter().min().unwrap() as u64,
                    end: *offsets.iter().max().unwrap() as u64 + (nf as u64 * eew.width_in_bytes()),
                };
                Range::<u64> {
                    start: base_addr + offset_range.start,
                    end: base_addr + offset_range.end,
                }
            }
            WholeRegister{eew, ..} => {
                // Can't be masked out
                // op.evl() accounts for the number of registers
                let index_range = Range::<u64> {
                    start: 0,
                    end: (op.evl() as u64)
                };
                Range::<u64> {
                    start: base_addr + index_range.start * eew.width_in_bytes(),
                    end: base_addr + index_range.end * eew.width_in_bytes(),
                }
            }
            ByteMask{evl, ..} => {
                // Can't be masked out
                // bytemask does not have segment support
                let index_range = Range::<u64> {
                    start: self.vstart as u64,
                    end: (evl as u64)
                };
                Range::<u64> {
                    start: base_addr + index_range.start,
                    end: base_addr + index_range.end,
                }
            }
        };

        let check_result = sreg.check_addr_range_against_provenance(addr_range.clone(), provenance, op.dir());
        match check_result {
            Ok(()) => {
                // if that range check succeeded, we can return true
                return (Ok(true), addr_range);
            }
            Err(e) => {
                // the full range encountered a capability exception
                // if this is a fault-only-first operation, that's ok - it will handle that
                if is_fault_only_first {
                    return (Ok(false), addr_range);
                }
                // we aren't in a state that can tolerate errors.
                // this instruction will not succeed.
                // raise the exception.
                return (Err(e), addr_range);
            }
        }
    }

    /// Converts a decoded memory operation to the list of accesses it performs.
    fn get_load_store_accesses(&mut self, rd: u8, addr_p: (u64, Provenance), rs2: u8, vm: bool, op: DecodedMemOp) -> Result<Vec<(VectorElem, u64)>> {
        let mut map = vec![];

        let (base_addr, _) = addr_p;

        use DecodedMemOp::*;
        match op {
            Strided{stride, evl, nf, eew, emul, ..} => {
                // For each segment
                for i_segment in self.vstart..evl {
                    let seg_addr = base_addr + (i_segment as u64 * stride);

                    // If we aren't masked out...
                    if !self.vreg.seg_masked_out(vm, i_segment) {
                        // For each field
                        let mut field_addr = seg_addr;
                        for i_field in 0..nf {
                            // ... perform the access
                            let vec_elem = VectorElem::check_with_lmul(
                                rd + (i_field * emul.num_registers_consumed()),
                                eew, emul,
                                i_segment
                            );
                            map.push((vec_elem, field_addr));
                            // and increment the address
                            field_addr += eew.width_in_bytes();
                        }
                    }
                }
            }
            FaultOnlyFirst{evl, nf, eew, emul} => {
                // We don't handle the exceptions here
                // This just lists the accesses that will be attempted
                // This is exactly the same code as for Strided, but it calculates the stride
                let stride = eew.width_in_bytes() * (nf as u64);

                // For each segment
                for i_segment in self.vstart..evl {
                    let seg_addr = base_addr + (i_segment as u64 * stride);

                    // If we aren't masked out...
                    if !self.vreg.seg_masked_out(vm, i_segment) {
                        // For each field
                        let mut field_addr = seg_addr;
                        for i_field in 0..nf {
                            // ... perform the access
                            let vec_elem = VectorElem::check_with_lmul(
                                rd + (i_field * emul.num_registers_consumed()),
                                eew, emul,
                                i_segment
                            );
                            map.push((vec_elem, field_addr));
                            // and increment the address
                            field_addr += eew.width_in_bytes();
                        }
                    }
                }
            }
            Indexed{index_ew, evl, nf, eew, emul, ..} => {
                // i = element index in logical vector (which includes groups)
                for i_segment in self.vstart..evl {
                    // Get our index
                    let seg_offset = self.vreg.load_vreg_elem_int(index_ew, rs2, i_segment)?;
                    let seg_addr = base_addr + seg_offset as u64;

                    // If we aren't masked out...
                    if !self.vreg.seg_masked_out(vm, i_segment) {
                        // For each field
                        let mut field_addr = seg_addr;
                        for i_field in 0..nf {
                            // ... perform the access
                            let vec_elem = VectorElem::check_with_lmul(
                                rd + (i_field * emul.num_registers_consumed()),
                                eew, emul,
                                i_segment
                            );
                            map.push((vec_elem, field_addr));
                            // and increment the address
                            field_addr += eew.width_in_bytes();
                        }
                    }
                }
            }
            WholeRegister{num_regs, eew, ..} => {
                if vm == false {
                    // There are no masked variants of this instruction
                    bail!("WholeRegister operations cannot be masked")
                }

                let mut addr = base_addr;
                let vl = op.evl();
                for i in 0..vl {
                    let vec_elem = VectorElem::check_with_num_regs(rd, eew, num_regs, i as u32);
                    map.push((vec_elem, addr));
                    addr += eew.width_in_bytes();
                }
            }
            ByteMask{evl, ..} => {
                if vm == false {
                    // vlm, vsm cannot be masked out
                    bail!("ByteMask operations cannot be masked")
                }

                let mut addr = base_addr;
                for i in self.vstart..evl {
                    let vec_elem = VectorElem::check_with_lmul(
                        rd,
                        Sew::e8, Lmul::e1,
                        i
                    );
                    map.push((vec_elem, addr));
                    addr += 1;
                }
            }
        };

        Ok(map)
    }

    /// Execute a decoded memory access, assuming all access checks have already been performed.
    fn exec_load_store(&mut self, expected_addr_range: Range<u64>, rd: u8, rs1: u8, rs2: u8, vm: bool, op: DecodedMemOp, sreg: &mut dyn VecRegInterface<uXLEN>, mem: &mut dyn VecMemInterface<uXLEN, TElem>) -> Result<()> {
        // Determine which accesses we need to do
        let addr_p = sreg.get_addr_provenance(rs1)?;
        let accesses = self.get_load_store_accesses(rd, addr_p, rs2, vm, op)?;
        let (_, provenance) = addr_p;

        // Check the fast-path range is a tight range, equal to the min/max accessed addresses
        // Get minimum and maximum element access addresses
        let min_addr = accesses.iter().map(|(_, addr)| *addr).min().unwrap();
        // For the maximum, take the maximum of (address + width of element) to get the top of the exclusive range of accessed bytes
        let max_addr = accesses.iter().map(|(elem, addr)| addr + elem.eew.width_in_bytes()).max().unwrap();
        if expected_addr_range.start != min_addr || expected_addr_range.end != max_addr {
            bail!("Computed fast-path address range 0x{:x}-{:x} doesn't match the min/max accessed addresses 0x{:x}-{:x}",
                expected_addr_range.start, expected_addr_range.end,
                min_addr, max_addr
            );
        }

        use DecodedMemOp::*;
        match op {
            Strided{dir, ..} | Indexed{dir, ..} | WholeRegister{dir, ..} | ByteMask{dir, ..} => {
                // For each access...
                for (VectorElem{ base_reg, eew, elem_within_group, ..}, addr) in accesses {
                    let addr_p = (addr, provenance);
                    // Perform the access!
                    match dir {
                        MemOpDir::Load => self.load_to_vreg(mem, eew, addr_p, base_reg, elem_within_group)
                            .with_context(|| format!("Failure on element {}", elem_within_group))?,
                        MemOpDir::Store => self.store_to_mem(mem, eew, addr_p, base_reg, elem_within_group)
                            .with_context(|| format!("Failure on element {}", elem_within_group))?
                    }
                }
            }
            FaultOnlyFirst{..} => {
                // For each access...
                for (VectorElem{ base_reg, eew, elem_within_group, ..}, addr) in accesses {
                    let addr_p = (addr, provenance);
                    // Perform the access
                    let load_fault: Result<()> = 
                        self.load_to_vreg(mem, eew, addr_p, base_reg, elem_within_group);
                    
                    // Check for faults
                    if elem_within_group == 0 {
                        // Any potentially faulted load should fault as normal if i == 0
                        load_fault?;
                    } else if load_fault.is_err() {
                        use crate::processor::exceptions::{MemoryException, CapabilityException};
                        // There was *some* error from the load, check if it was a memory fault
                        let load_err = load_fault.unwrap_err();
                        // Only shrink the vlen if it's a MemoryException related to an invalid address...
                        let mut error_reduces_vlen = match load_err.downcast_ref::<MemoryException>() {
                            Some(MemoryException::AddressUnmapped{..}) => true,
                            _ => false
                        };
                        // .. or a CapabilityException
                        match load_err.downcast_ref::<CapabilityException>() {
                            Some(_) => { error_reduces_vlen = true; },
                            _ => {}
                        };
                        if error_reduces_vlen {
                            // "vector length vl is reduced to the index of the 
                            // element that would have raised an exception"
                            self.vl = elem_within_group;
                            // exception received, finish instruction
                            break;
                        } else {
                            // Re-raise exception
                            return Err(load_err)
                        }
                    }
                }
            }
        };
        Ok(())
    }

    /// Load a value of width `eew` from a given address `addr` 
    /// into a specific element `idx_from_base` of a vector register group starting at `vd_base`
    fn load_to_vreg(&mut self, mem: &mut dyn VecMemInterface<uXLEN, TElem>, eew: Sew, addr_provenance: (u64, Provenance), vd_base: u8, idx_from_base: u32) -> Result<()> {
        let val = mem.load_from_memory(eew, addr_provenance)?;
        self.vreg.store_vreg_elem(eew, vd_base, idx_from_base, val)?;
        Ok(())
    }
    /// Stores a value of width `eew` from a specific element `idx_from_base` of a 
    /// vector register group starting at `vd_base` into a given address `addr` 
    fn store_to_mem(&mut self, mem: &mut dyn VecMemInterface<uXLEN, TElem>, eew: Sew, addr_provenance: (u64, Provenance), vd_base: u8, idx_from_base: u32) -> Result<()> {
        let val = self.vreg.load_vreg_elem(eew, vd_base, idx_from_base)?;
        mem.store_to_memory(eew, val, addr_provenance)?;
        Ok(())
    }

    /// Dump vector unit state to standard output.
    pub fn dump(&self) {
        self.vreg.dump();
        println!("vl: {}\nvtype: {:?}", self.vl, self.vtype);
    }
}

/// Helper type combining [VecRegInterface] and [VecMemInterface]
pub type VecInterface<'a, uXLEN, TElem> = (
    &'a mut dyn VecRegInterface<uXLEN>,
    &'a mut dyn VecMemInterface<uXLEN, TElem>
);

impl<uXLEN: PossibleXlen, TElem> IsaMod<VecInterface<'_, uXLEN, TElem>> for Rvv<uXLEN, TElem> {
    type Pc = ();
    fn will_handle(&self, opcode: Opcode, inst: InstructionBits) -> bool {
        use crate::processor::decode::Opcode::*;
        match (opcode, inst) {
            // Delegate all instructions under the Vector opcode to the vector unit
            (Vector, _) => true,

            (LoadFP | StoreFP, InstructionBits::FLdStType{width, ..}) => {
                // Check the access width
                match width {
                    0b0001 | 0b0010 | 0b0011 | 0b0100 => false,
                    0b1000..=0b1111 => false,

                    // This width corresponds to a vector, delegate this instruction to the vector unit
                    _ => true
                }
            },

            _ => false
        }
    }
    
    /// Execute a vector-specific instruction, e.g. vector arithmetic, loads, configuration
    /// Requires a [VecRegInterface] and a [VecMemInterface].
    /// 
    /// # Arguments
    /// 
    /// * `opcode` - The major opcode of the instruction
    /// * `inst` - Decoded instruction bits
    /// * `inst_bits` - Raw instruction bits (TODO - we shouldn't need this)
    /// * `conn` - Connection to external resources
    fn execute(&mut self, opcode: Opcode, inst: InstructionBits, inst_bits: u32, conn: VecInterface<'_, uXLEN, TElem>) -> ProcessorResult<Option<()>> {
        let (sreg, mem) = conn;
        use Opcode::*;
        match (opcode, inst) {
            (Vector, InstructionBits::VType{funct3, funct6, rs1, rs2, rd, vm, ..}) => {
                match funct3 {
                    0b111 => {
                        // Configuration family - vsetvli etc.
                        let inst_kind = match bits!(inst_bits, 30:31) {
                            0b00 | 0b01 => ConfigKind::vsetvli,
                            0b11 => ConfigKind::vsetivli,
                            0b10 => ConfigKind::vsetvl,

                            invalid => panic!("impossible top 2 bits {:2b}", invalid)
                        };
                        self.exec_config(inst_kind, inst, sreg)?
                    }

                    0b000 => {
                        // Vector-Vector int
                        let vs1 = rs1;
                        let vd = rd;

                        match funct6 {
                            0b010111 => {
                                // vmv.v.v
                                if !vm {
                                    bail!("vector-vector move can't be masked");
                                }

                                for i in self.vstart..self.vl {
                                    let val = self.vreg.load_vreg_elem(self.vtype.vsew, vs1, i)?;
                                    self.vreg.store_vreg_elem(self.vtype.vsew, vd, i, val)?;
                                }
                            }
                            _ => bail!("Unsupported OPIVV funct6 {:b}", funct6)
                        }
                    }

                    // 0b010 => {
                    //     // Vector-Vector Move
                    // }

                    0b011 => {
                        // Vector-immediate
                        // TODO - this assumes no sign extending?
                        let imm = rs1 as u128;

                        match funct6 {
                            0b011000 => {
                                // VMSEQ
                                // Mask Set-if-EQual
                                // This cannot itself be masked
                                let mut val: uVLEN = 0;
                                for i in self.vstart..self.vl {
                                    let reg_val = self.vreg.load_vreg_elem_int(self.vtype.vsew, rs2, i)?;
                                    if reg_val == imm {
                                        val |= (1 as uVLEN) << i;
                                    }
                                }
                                self.vreg.store_vreg_int(rd, val)?;
                            }
                            0b011001 => {
                                // VMSNE
                                // Mask Set-if-Not-Equal
                                // This cannot itself be masked
                                let mut val: uVLEN = 0;
                                for i in self.vstart..self.vl {
                                    if self.vreg.load_vreg_elem_int(self.vtype.vsew, rs2, i)? != imm {
                                        val |= (1 as uVLEN) << i;
                                    }
                                }
                                self.vreg.store_vreg_int(rd, val)?;
                            }

                            0b010111 => {
                                if (!vm) && rd == 0 {
                                    bail!(UnsupportedParam("Can't handle vmerge on the mask register, because it uses the mask register :)".to_string()));
                                }

                                // vmerge or vmv
                                // if masked, vmerge, else vmv
                                for i in self.vstart..self.vl {
                                    let val = if self.vreg.seg_masked_out(vm, i) {
                                        // if masked out, this must be vmerge, write new value in
                                        self.vreg.load_vreg_elem_int(self.vtype.vsew, rs2, i)?
                                    } else {
                                        // either vmerge + not masked, or vmv
                                        // either way, write immediate
                                        imm
                                    };
                                    self.vreg.store_vreg_elem_int(self.vtype.vsew, rd, i, val)?;
                                }
                            }

                            0b100111 => {
                                if vm == true {
                                    // vmv<nr>r.v (section 16.6)
                                    // copy whole registers/register groups

                                    // By the spec, nr = simm5?
                                    // No such field, but section 11.8  mentions it.
                                    // I imagine it's a leftover from a previous draft.
                                    // rs1 looks right for this case, but need to double check.
                                    let nr = rs1 + 1;
                                    let emul = match nr {
                                        1 => Lmul::e1,
                                        2 => Lmul::e2,
                                        4 => Lmul::e4,
                                        8 => Lmul::e8,

                                        _ => bail!(UnsupportedParam(format!("Invalid nr encoding in vmv<nr>r.v: nr = {}", nr)))
                                    };

                                    let eew = self.vtype.vsew;

                                    let evl = val_times_lmul_over_sew(VLEN as u32, eew, emul);
                                    if self.vstart >= evl {
                                        bail!(UnsupportedParam(format!("evl {} <= vstart {} therefore vector move is no op", evl, self.vstart)))
                                    }
                                    if rd == rs2 {
                                        // architetural no-op
                                        return Ok(None)
                                    }

                                    for vx in 0..nr {
                                        let val = self.vreg.load_vreg(rs2 + vx)?;
                                        self.vreg.store_vreg(rd + vx, val)?;
                                    }
                                } else {
                                    bail!(UnimplementedInstruction("vsmul"));
                                }
                            }

                            0b000000 => {
                                // vadd
                                if (!vm) && rd == 0 {
                                    bail!(UnsupportedParam("Can't handle vadd on the mask register, because it uses the mask register :)".to_string()));
                                }

                                for i in self.vstart..self.vl {
                                    if !self.vreg.seg_masked_out(vm, i) {
                                        let val = self.vreg.load_vreg_elem_int(self.vtype.vsew, rs2, i)?;
                                        // Cast the value down to the element type, do the wrapping addition, then cast it back up
                                        let val = match self.vtype.vsew {
                                            Sew::e8 => {
                                                (val as u8).wrapping_add(imm as u8) as u128
                                            }
                                            Sew::e16 => {
                                                (val as u16).wrapping_add(imm as u16) as u128
                                            }
                                            Sew::e32 => {
                                                (val as u32).wrapping_add(imm as u32) as u128
                                            }
                                            Sew::e64 => {
                                                (val as u64).wrapping_add(imm as u64) as u128
                                            }
                                            Sew::e128 => {
                                                (val as u128).wrapping_add(imm as u128) as u128
                                            }
                                        };
                                        self.vreg.store_vreg_elem_int(self.vtype.vsew, rd, i, val)?;
                                    }
                                }
                            }

                            _ => bail!(MiscDecodeException(format!(
                                    "Vector arithmetic funct3 {:03b} funct6 {:06b} not yet handled", funct3, funct6)
                            ))
                        }
                    }

                    _ => bail!(UnsupportedParam(format!("Vector arithmetic funct3 {:03b} currently not supported", funct3)))
                }
            }

            (LoadFP | StoreFP, InstructionBits::FLdStType{rd, rs1, rs2, vm, ..}) => {
                let op = DecodedMemOp::decode_load_store(opcode, inst, self.vtype, self.vl, sreg)?;

                // Pre-check that the mem-op doesn't do anything dumb
                if op.dir() == MemOpDir::Load && (!vm) && rd == 0 {
                    // If we're masked, we can't load over v0 as that's the mask register
                    bail!("Masked instruction cannot load into v0");
                }
                // Check for no-op
                if op.evl() <= self.vstart {
                    println!("EVL {} <= vstart {} => vector {:?} is no-op", op.evl(), self.vstart, op.dir());
                    return Ok(None)
                }

                let addr_provenance = sreg.get_addr_provenance(rs1)?;

                // Any exception at this point does not set the vstart CSR
                // In the fast-path Success or Indeterminate cases the access is still executed,
                // and the element index is reported to the user through the context string.

                // Pre-check capability access
                let (fast_check_result, addr_range) = self.fast_check_load_store(addr_provenance, rs2, vm, op, sreg);
                match fast_check_result {
                    // There was a fast path that didn't raise an exception
                    Ok(true) => {
                        self.exec_load_store(addr_range, rd, rs1, rs2, vm, op, sreg, mem)
                            .context("Executing pre-checked vector access - shouldn't throw CapabilityExceptions under any circumstances")
                    },
                    // There was a fast path that raised an exception, re-raise it
                    Err(e) => {
                        // This assumes imprecise error handling
                        Err(e)
                    }
                    // There was no fast path, or it was uncertain if a CapabilityException would actually be raised
                    Ok(false) => {
                        self.exec_load_store(addr_range, rd, rs1, rs2, vm, op, sreg, mem)
                            .context("Executing not-pre-checked vector access - may throw CapabilityException")
                    },
                }.context(format!("Executing vector access {:?}", op))?;
            }

            _ => bail!("Unexpected opcode/InstructionBits pair at vector unit")
        }

        // If we get this far, the vector instruction has completed
        // As per RVVspec 3.7, we "reset the vstart CSR to zero at the end of execution"
        self.vstart = 0;

        Ok(None)
    }
}

impl<uXLEN: PossibleXlen, TElem> CSRProvider<uXLEN> for Rvv<uXLEN, TElem> {
    fn has_csr(&self, csr: u32) -> bool {
        match csr {
            // Should be implemented, aren't yet
            0x008 | 0x009 | 0x00A | 0x00F => todo!(),

            0xC20 | 0xC21 | 0xC22 => true,

            _ => false
        }
    }

    fn csr_atomic_read_write(&mut self, csr: u32, _need_read: bool, _write_val: uXLEN) -> Result<Option<uXLEN>> {
        match csr {
            0xC20 | 0xC21 | 0xC22 => bail!("CSR 0x{:04x} is read-only, cannot atomic read/write", csr),
            _ => todo!()
        }
    }

    fn csr_atomic_read_set(&mut self, csr: u32, set_bits: Option<uXLEN>) -> Result<uXLEN> {
        if set_bits != None {
            match csr {
                0xC20 | 0xC21 | 0xC22 => bail!("CSR 0x{:04x} is read-only, cannot atomic set", csr),
                _ => todo!()
            }
        } else {
            match csr {
                0xC20 => Ok(self.vl.into()),
                0xC21 => Ok(self.vtype.encode().into()),
                0xC22 => Ok(((VLEN/8) as u32).into()),

                _ => todo!()
            }
        }
    }
    fn csr_atomic_read_clear(&mut self, _csr: u32, _clear_bits: Option<uXLEN>) -> Result<uXLEN> {
        todo!()
    }
}