-
Notifications
You must be signed in to change notification settings - Fork 4
/
linux-v5.19.patch
2661 lines (2498 loc) · 115 KB
/
linux-v5.19.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
From b91271ad11a71ced1e7eaaab144070813c3c164f Mon Sep 17 00:00:00 2001
From: Dongli Zhang <[email protected]>
Date: Sun, 25 Sep 2022 18:27:48 -0700
Subject: [PATCH 1/1] linux v5.19
Signed-off-by: Dongli Zhang <[email protected]>
---
arch/x86/events/core.c | 92 ++++++++++++++++++
arch/x86/events/intel/core.c | 64 ++++++++++++
arch/x86/events/perf_event.h | 50 ++++++++++
arch/x86/include/asm/kvm_host.h | 37 +++++++
arch/x86/include/asm/perf_event.h | 23 +++++
arch/x86/include/asm/spec-ctrl.h | 4 +
arch/x86/kernel/cpu/bugs.c | 5 +
arch/x86/kernel/kvm.c | 4 +
arch/x86/kvm/lapic.c | 5 +
arch/x86/kvm/mmu/mmu.c | 28 ++++++
arch/x86/kvm/mmu/spte.c | 14 +++
arch/x86/kvm/mmu/spte.h | 7 ++
arch/x86/kvm/mmu/tdp_iter.c | 150 ++++++++++++++++++++++++++++-
arch/x86/kvm/mmu/tdp_iter.h | 79 +++++++++++++++
arch/x86/kvm/mmu/tdp_mmu.c | 109 +++++++++++++++++++++
arch/x86/kvm/pmu.c | 77 +++++++++++++++
arch/x86/kvm/pmu.h | 12 +++
arch/x86/kvm/svm/svm.c | 23 +++++
arch/x86/kvm/vmx/nested.c | 19 ++++
arch/x86/kvm/vmx/pmu_intel.c | 4 +
arch/x86/kvm/vmx/vmcs12.h | 9 ++
arch/x86/kvm/vmx/vmx.c | 13 +++
arch/x86/kvm/x86.c | 37 +++++++
drivers/acpi/cppc_acpi.c | 6 ++
drivers/net/virtio_net.c | 4 +
drivers/virtio/virtio_ring.c | 114 ++++++++++++++++++++++
include/linux/kvm_para.h | 48 +++++++++
kernel/events/core.c | 98 +++++++++++++++++++
tools/perf/arch/x86/util/evlist.c | 8 ++
tools/perf/arch/x86/util/topdown.c | 8 ++
tools/perf/builtin-stat.c | 39 ++++++++
tools/perf/util/evlist.c | 5 +
tools/perf/util/evsel.c | 7 ++
tools/perf/util/parse-events.c | 9 ++
tools/perf/util/pmu.c | 4 +
virt/kvm/kvm_main.c | 21 ++++
36 files changed, 1235 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 30788894124f..d64ff2280223 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -54,6 +54,19 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
+/*
+ * 在以下使用perf_is_hybrid:
+ * - arch/x86/events/core.c|57| <<global>> DEFINE_STATIC_KEY_FALSE(perf_is_hybrid);
+ * - arch/x86/events/intel/core.c|6257| <<intel_pmu_init>> static_branch_enable(&perf_is_hybrid);
+ * - arch/x86/events/perf_event.h|679| <<is_hybrid>> #define is_hybrid() static_branch_unlikely(&perf_is_hybrid)
+ *
+ * 在下面的情况会intel_pmu_init()-->static_branch_enable(&perf_is_hybrid)
+ * case INTEL_FAM6_ALDERLAKE:
+ * case INTEL_FAM6_ALDERLAKE_L:
+ * case INTEL_FAM6_ALDERLAKE_N:
+ * case INTEL_FAM6_RAPTORLAKE:
+ * case INTEL_FAM6_RAPTORLAKE_P:
+ */
DEFINE_STATIC_KEY_FALSE(perf_is_hybrid);
/*
@@ -470,6 +483,11 @@ void x86_del_exclusive(unsigned int what)
atomic_dec(&x86_pmu.lbr_exclusive[what]);
}
+/*
+ * called by:
+ * - arch/x86/events/core.c|657| <<x86_pmu_hw_config>> return x86_setup_perfctr(event);
+ * - arch/x86/events/intel/p4.c|850| <<p4_hw_config>> rc = x86_setup_perfctr(event);
+ */
int x86_setup_perfctr(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
@@ -496,6 +514,9 @@ int x86_setup_perfctr(struct perf_event *event)
/*
* The generic map:
*/
+ /*
+ * intel_pmu_event_map()
+ */
config = x86_pmu.event_map(attr->config);
if (config == 0)
@@ -557,6 +578,15 @@ int x86_pmu_max_precise(void)
return precise;
}
+/*
+ * 在以下使用x86_pmu_hw_config():
+ * - arch/x86/events/intel/knc.c|297| <<global>> .hw_config = x86_pmu_hw_config,
+ * - arch/x86/events/intel/p6.c|208| <<global>> .hw_config = x86_pmu_hw_config,
+ * - arch/x86/events/zhaoxin/core.c|466| <<global>> .hw_config = x86_pmu_hw_config,
+ * - arch/x86/events/amd/core.c|444| <<amd_pmu_hw_config>> ret = x86_pmu_hw_config(event);
+ * - arch/x86/events/intel/core.c|3778| <<core_pmu_hw_config>> int ret = x86_pmu_hw_config(event);
+ * - arch/x86/events/intel/core.c|3825| <<intel_pmu_hw_config>> int ret = x86_pmu_hw_config(event);
+ */
int x86_pmu_hw_config(struct perf_event *event)
{
if (event->attr.precise_ip) {
@@ -647,6 +677,10 @@ int x86_pmu_hw_config(struct perf_event *event)
/*
* Setup the hardware configuration for a given attr_type
*/
+/*
+ * called by:
+ * - arch/x86/events/core.c|2483| <<x86_pmu_event_init>> err = __x86_pmu_event_init(event);
+ */
static int __x86_pmu_event_init(struct perf_event *event)
{
int err;
@@ -669,6 +703,9 @@ static int __x86_pmu_event_init(struct perf_event *event)
event->hw.extra_reg.idx = EXTRA_REG_NONE;
event->hw.branch_reg.idx = EXTRA_REG_NONE;
+ /*
+ * intel_pmu_hw_config()
+ */
return x86_pmu.hw_config(event);
}
@@ -1434,6 +1471,11 @@ int x86_perf_event_set_period(struct perf_event *event)
return ret;
}
+/*
+ * called by:
+ * - arch/x86/events/amd/core.c|768| <<amd_pmu_enable_event>> x86_pmu_enable_event(event);
+ * - arch/x86/events/intel/core.c|3997| <<core_pmu_enable_event>> x86_pmu_enable_event(event);
+ */
void x86_pmu_enable_event(struct perf_event *event)
{
if (__this_cpu_read(cpu_hw_events.enabled))
@@ -1529,6 +1571,12 @@ static void x86_pmu_start(struct perf_event *event, int flags)
perf_event_update_userpage(event);
}
+/*
+ * called by:
+ * - arch/x86/events/intel/core.c|3036| <<intel_pmu_handle_irq>> perf_event_print_debug();
+ * - arch/x86/events/intel/knc.c|236| <<knc_pmu_handle_irq>> perf_event_print_debug();
+ * - drivers/tty/sysrq.c|294| <<sysrq_handle_showregs>> perf_event_print_debug();
+ */
void perf_event_print_debug(void)
{
u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
@@ -1762,6 +1810,40 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(perf_event_nmi_handler);
+/*
+ * 5.4的例子.
+ * crash> unconstrained
+ * unconstrained = $1 = {
+ * {
+ * idxmsk = {255},
+ * idxmsk64 = 255
+ * },
+ * code = 0,
+ * cmask = 0,
+ * weight = 8,
+ * overlap = 0,
+ * flags = 0,
+ * size = 0
+ * }
+ * crash> emptyconstraint
+ * emptyconstraint = $2 = {
+ * {
+ * idxmsk = {0},
+ * idxmsk64 = 0
+ * },
+ * code = 0,
+ * cmask = 0,
+ * weight = 0,
+ * overlap = 0,
+ * flags = 0,
+ * size = 0
+ * }
+ *
+ * 在以下设置unconstrained:
+ * - arch/x86/events/core.c|2159| <<init_hw_perf_events>> unconstrained = (struct event_constraint)
+ * - arch/x86/events/intel/core.c|6331| <<intel_pmu_init>> pmu->unconstrained = (struct event_constraint)
+ * - arch/x86/events/intel/core.c|6352| <<intel_pmu_init>> pmu->unconstrained = (struct event_constraint)
+ */
struct event_constraint emptyconstraint;
struct event_constraint unconstrained;
@@ -2367,6 +2449,10 @@ static struct cpu_hw_events *allocate_fake_cpuc(struct pmu *event_pmu)
/*
* validate that we can schedule this event
*/
+/*
+ * called by:
+ * - arch/x86/events/core.c|2488| <<x86_pmu_event_init>> err = validate_event(event);
+ */
static int validate_event(struct perf_event *event)
{
struct cpu_hw_events *fake_cpuc;
@@ -2377,6 +2463,9 @@ static int validate_event(struct perf_event *event)
if (IS_ERR(fake_cpuc))
return PTR_ERR(fake_cpuc);
+ /*
+ * hsw_get_event_constraints()
+ */
c = x86_pmu.get_event_constraints(fake_cpuc, 0, event);
if (!c || !c->weight)
@@ -2453,6 +2542,9 @@ static int validate_group(struct perf_event *event)
return ret;
}
+/*
+ * struct pmu pmu.event_init = x86_pmu_event_init()
+ */
static int x86_pmu_event_init(struct perf_event *event)
{
struct x86_hybrid_pmu *pmu = NULL;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 45024abd929f..f5a35f0e6dc6 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -39,6 +39,10 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
[PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
};
+/*
+ * 是用在"core"的.
+ * struct x86_pmu core_pmu.event_constraints = intel_core_event_constraints
+ */
static struct event_constraint intel_core_event_constraints[] __read_mostly =
{
INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
@@ -2404,6 +2408,10 @@ static inline void intel_clear_masks(struct perf_event *event, int idx)
__clear_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
}
+/*
+ * called by:
+ * - arch/x86/events/intel/core.c|2449| <<intel_pmu_disable_event>> intel_pmu_disable_fixed(event);
+ */
static void intel_pmu_disable_fixed(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -2748,8 +2756,33 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
wrmsrl(hwc->config_base, ctrl_val);
}
+/*
+ * 4.14的例子
+ * intel_pmu_enable_event
+ * x86_pmu_enable
+ * perf_pmu_enable
+ * ctx_resched
+ * __perf_event_enable
+ * event_function
+ * remote_function
+ * flush_smp_call_function_queue
+ * generic_smp_call_function_single_interrupt
+ * smp_call_function_single_interrupt
+ * call_function_single_interrupt
+ * cpuidle_enter_state
+ * cpuidle_enter
+ * call_cpuidle
+ * do_idle
+ * cpu_startup_entry
+ * start_secondary
+ * secondary_startup_64
+ */
static void intel_pmu_enable_event(struct perf_event *event)
{
+ /*
+ * struct perf_event *event:
+ * -> struct hw_perf_event hw;
+ */
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
@@ -3245,11 +3278,19 @@ struct event_constraint *
x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
+ /*
+ * 不是hybrid的话返回x86_pmu.event_constraints
+ */
struct event_constraint *event_constraints = hybrid(cpuc->pmu, event_constraints);
struct event_constraint *c;
if (event_constraints) {
for_each_event_constraint(c, event_constraints) {
+ /*
+ * struct perf_event *event:
+ * -> struct hw_perf_event hw;
+ * -> (union) u64 config;
+ */
if (constraint_match(c, event->hw.config)) {
event->hw.flags |= c->flags;
return c;
@@ -5502,6 +5543,11 @@ static void intel_pmu_check_num_counters(int *num_counters,
*intel_ctrl |= fixed_mask << INTEL_PMC_IDX_FIXED;
}
+/*
+ * called by:
+ * - arch/x86/events/intel/core.c|5630| <<intel_pmu_check_hybrid_pmus>> intel_pmu_check_event_constraints(pmu->event_constraints,
+ * - arch/x86/events/intel/core.c|6432| <<intel_pmu_init>> intel_pmu_check_event_constraints(x86_pmu.event_constraints,
+ */
static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
int num_counters,
int num_counters_fixed,
@@ -5526,6 +5572,9 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con
* Disable topdown slots and metrics events,
* if slots event is not in CPUID.
*/
+ /*
+ * 1 << 35
+ */
if (!(INTEL_PMC_MSK_FIXED_SLOTS & intel_ctrl))
c->idxmsk64 = 0;
c->weight = hweight64(c->idxmsk64);
@@ -5569,6 +5618,10 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs)
}
}
+/*
+ * called by:
+ * - arch/x86/events/intel/core.c|6480| <<intel_pmu_init>> intel_pmu_check_hybrid_pmus((u64)fixed_mask);
+ */
static void intel_pmu_check_hybrid_pmus(u64 fixed_mask)
{
struct x86_hybrid_pmu *pmu;
@@ -5631,6 +5684,13 @@ __init int intel_pmu_init(void)
* Check whether the Architectural PerfMon supports
* Branch Misses Retired hw_event or not.
*/
+ /*
+ * 一个例子
+ * # cpuid -l 0xa -1 -r
+ * Disclaimer: cpuid may not support decoding of all cpuid registers.
+ * CPU:
+ * 0x0000000a 0x00: eax=0x08300802 ebx=0x00000000 ecx=0x00000000 edx=0x00000603
+ */
cpuid(10, &eax.full, &ebx.full, &fixed_mask, &edx.full);
if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
return -ENODEV;
@@ -5661,6 +5721,10 @@ __init int intel_pmu_init(void)
x86_pmu.num_counters_fixed =
max((int)edx.split.num_counters_fixed, assume);
+ /*
+ * 1 << 3 是 1000
+ * (1 << 3) - 1 是 111
+ */
fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1;
} else if (version >= 5)
x86_pmu.num_counters_fixed = fls(fixed_mask);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 21a5482bcf84..d298fd698d2c 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -246,6 +246,9 @@ struct cpu_hw_events {
u64 tags[X86_PMC_IDX_MAX];
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+ /*
+ * #define X86_PMC_IDX_MAX 64
+ */
struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
int n_excl; /* the number of exclusive events */
@@ -672,6 +675,19 @@ static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
return container_of(pmu, struct x86_hybrid_pmu, pmu);
}
+/*
+ * 在以下使用perf_is_hybrid:
+ * - arch/x86/events/core.c|57| <<global>> DEFINE_STATIC_KEY_FALSE(perf_is_hybrid);
+ * - arch/x86/events/intel/core.c|6257| <<intel_pmu_init>> static_branch_enable(&perf_is_hybrid);
+ * - arch/x86/events/perf_event.h|679| <<is_hybrid>> #define is_hybrid() static_branch_unlikely(&perf_is_hybrid)
+ *
+ * 在下面的情况会intel_pmu_init()-->static_branch_enable(&perf_is_hybrid)
+ * case INTEL_FAM6_ALDERLAKE:
+ * case INTEL_FAM6_ALDERLAKE_L:
+ * case INTEL_FAM6_ALDERLAKE_N:
+ * case INTEL_FAM6_RAPTORLAKE:
+ * case INTEL_FAM6_RAPTORLAKE_P:
+ */
extern struct static_key_false perf_is_hybrid;
#define is_hybrid() static_branch_unlikely(&perf_is_hybrid)
@@ -1070,6 +1086,23 @@ extern u64 __read_mostly hw_cache_extra_regs
u64 x86_perf_event_update(struct perf_event *event);
+/*
+ * called by:
+ * - arch/x86/events/core.c|214| <<reserve_pmc_hardware>> if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
+ * - arch/x86/events/core.c|222| <<reserve_pmc_hardware>> release_evntsel_nmi(x86_pmu_config_addr(i));
+ * - arch/x86/events/core.c|239| <<release_pmc_hardware>> release_evntsel_nmi(x86_pmu_config_addr(i));
+ * - arch/x86/events/core.c|262| <<check_hw_exists>> reg = x86_pmu_config_addr(i);
+ * - arch/x86/events/core.c|686| <<x86_pmu_disable_all>> rdmsrl(x86_pmu_config_addr(idx), val);
+ * - arch/x86/events/core.c|690| <<x86_pmu_disable_all>> wrmsrl(x86_pmu_config_addr(idx), val);
+ * - arch/x86/events/core.c|692| <<x86_pmu_disable_all>> wrmsrl(x86_pmu_config_addr(idx + 1), 0);
+ * - arch/x86/events/core.c|1242| <<x86_assign_hw_event>> hwc->config_base = x86_pmu_config_addr(hwc->idx);
+ * - arch/x86/events/core.c|1572| <<perf_event_print_debug>> rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
+ * - arch/x86/events/intel/core.c|2828| <<intel_pmu_reset>> wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
+ * - arch/x86/events/intel/core.c|3975| <<core_guest_get_msrs>> arr[idx].msr = x86_pmu_config_addr(idx);
+ * - arch/x86/events/intel/p4.c|1382| <<p4_pmu_init>> reg = x86_pmu_config_addr(i);
+ * - arch/x86/events/perf_event.h|1134| <<__x86_pmu_enable_event>> wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en);
+ * - arch/x86/events/perf_event.h|1155| <<x86_pmu_disable_event>> wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0);
+ */
static inline unsigned int x86_pmu_config_addr(int index)
{
return x86_pmu.eventsel + (x86_pmu.addr_offset ?
@@ -1118,6 +1151,16 @@ static inline bool is_counter_pair(struct hw_perf_event *hwc)
return hwc->flags & PERF_X86_EVENT_PAIR;
}
+/*
+ * called by:
+ * - arch/x86/events/amd/core.c|799| <<amd_pmu_v2_enable_event>> __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ * - arch/x86/events/core.c|743| <<x86_pmu_enable_all>> __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ * - arch/x86/events/core.c|1440| <<x86_pmu_enable_event>> __x86_pmu_enable_event(&event->hw,
+ * - arch/x86/events/intel/core.c|2320| <<intel_pmu_nhm_workaround>> __x86_pmu_enable_event(&event->hw,
+ * - arch/x86/events/intel/core.c|2762| <<intel_pmu_enable_event>> __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ * - arch/x86/events/intel/core.c|4012| <<core_pmu_enable_all>> __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ * - arch/x86/events/zhaoxin/core.c|347| <<zhaoxin_pmu_enable_event>> __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ */
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 enable_mask)
{
@@ -1144,6 +1187,13 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
void x86_pmu_stop(struct perf_event *event, int flags);
+/*
+ * 在以下使用:
+ * - struct x86_pmu core_pmu.disable = x86_pmu_disable_event()
+ * - arch/x86/events/amd/core.c|809| <<amd_pmu_disable_event>> x86_pmu_disable_event(event);
+ * - arch/x86/events/intel/core.c|2441| <<intel_pmu_disable_event>> x86_pmu_disable_event(event);
+ * - arch/x86/events/zhaoxin/core.c|310| <<zhaoxin_pmu_disable_event>> x86_pmu_disable_event(event);
+ */
static inline void x86_pmu_disable_event(struct perf_event *event)
{
u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9217bd6cf0d1..751ebef8d4bf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -494,6 +494,18 @@ struct kvm_pmc {
* eventsel value for general purpose counters,
* ctrl value for fixed counters.
*/
+ /*
+ * 在以下使用kvm_pmc->current_config:
+ * - arch/x86/kvm/pmu.c|268| <<reprogram_gp_counter>> if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
+ * - arch/x86/kvm/pmu.c|273| <<reprogram_gp_counter>> pmc->current_config = eventsel;
+ * - arch/x86/kvm/pmu.c|308| <<reprogram_fixed_counter>> if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
+ * - arch/x86/kvm/pmu.c|313| <<reprogram_fixed_counter>> pmc->current_config = (u64)ctrl;
+ * - arch/x86/kvm/pmu.c|568| <<cpl_is_matched>> u64 config = pmc->current_config;
+ * - arch/x86/kvm/pmu.h|74| <<pmc_release_perf_event>> pmc->current_config = 0;
+ * - arch/x86/kvm/svm/pmu.c|328| <<amd_pmu_init>> pmu->gp_counters[i].current_config = 0;
+ * - arch/x86/kvm/vmx/pmu_intel.c|570| <<intel_pmu_init>> pmu->gp_counters[i].current_config = 0;
+ * - arch/x86/kvm/vmx/pmu_intel.c|577| <<intel_pmu_init>> pmu->fixed_counters[i].current_config = 0;
+ */
u64 current_config;
bool is_paused;
bool intr;
@@ -507,6 +519,20 @@ struct kvm_pmu {
u64 fixed_ctr_ctrl;
u64 global_ctrl;
u64 global_status;
+ /*
+ * 在以下使用kvm_pmu->counter_bitmask[2]:
+ * - arch/x86/kvm/pmu.h|48| <<pmc_bitmask>> return pmu->counter_bitmask[pmc->type];
+ * - arch/x86/kvm/svm/pmu.c|306| <<amd_pmu_refresh>> pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
+ * - arch/x86/kvm/svm/pmu.c|311| <<amd_pmu_refresh>> pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
+ * - arch/x86/kvm/vmx/pmu_intel.c|149| <<intel_rdpmc_ecx_to_pmc>> *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
+ * - arch/x86/kvm/vmx/pmu_intel.c|373| <<intel_pmu_get_msr>> val & pmu->counter_bitmask[KVM_PMC_GP];
+ * - arch/x86/kvm/vmx/pmu_intel.c|378| <<intel_pmu_get_msr>> val & pmu->counter_bitmask[KVM_PMC_FIXED];
+ * - arch/x86/kvm/vmx/pmu_intel.c|432| <<intel_pmu_set_msr>> (data & ~pmu->counter_bitmask[KVM_PMC_GP]))
+ * - arch/x86/kvm/vmx/pmu_intel.c|489| <<intel_pmu_refresh>> pmu->counter_bitmask[KVM_PMC_GP] = 0;
+ * - arch/x86/kvm/vmx/pmu_intel.c|490| <<intel_pmu_refresh>> pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
+ * - arch/x86/kvm/vmx/pmu_intel.c|510| <<intel_pmu_refresh>> pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
+ * - arch/x86/kvm/vmx/pmu_intel.c|524| <<intel_pmu_refresh>> pmu->counter_bitmask[KVM_PMC_FIXED] = ((u64)1 << edx.split.bit_width_fixed) - 1;
+ */
u64 counter_bitmask[2];
u64 global_ctrl_mask;
u64 global_ovf_ctrl_mask;
@@ -1271,6 +1297,17 @@ struct kvm_arch {
* count to zero should removed the root from the list and clean
* it up, freeing the root after an RCU grace period.
*/
+ /*
+ * 在以下使用kvm_arch->tdp_mmu_roots:
+ * - arch/x86/kvm/mmu/tdp_mmu.c|30| <<kvm_mmu_init_tdp_mmu>> INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
+ * - arch/x86/kvm/mmu/tdp_mmu.c|58| <<kvm_mmu_uninit_tdp_mmu>> WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
+ * - arch/x86/kvm/mmu/tdp_mmu.c|213| <<tdp_mmu_next_root>> next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots,
+ * - arch/x86/kvm/mmu/tdp_mmu.c|217| <<tdp_mmu_next_root>> next_root = list_first_or_null_rcu(&kvm->arch.tdp_mmu_roots,
+ * - arch/x86/kvm/mmu/tdp_mmu.c|225| <<tdp_mmu_next_root>> next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots,
+ * - arch/x86/kvm/mmu/tdp_mmu.c|269| <<for_each_tdp_mmu_root>> list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) \
+ * - arch/x86/kvm/mmu/tdp_mmu.c|335| <<kvm_tdp_mmu_get_vcpu_root_hpa>> list_add_rcu(&root->link, &kvm->arch.tdp_mmu_roots);
+ * - arch/x86/kvm/mmu/tdp_mmu.c|1052| <<kvm_tdp_mmu_invalidate_all_roots>> list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) {
+ */
struct list_head tdp_mmu_roots;
/*
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 409725e86f42..ce1c5b5af527 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -252,7 +252,20 @@ struct x86_pmu_capability {
/* TOPDOWN.SLOTS: event=0x00,umask=0x4 (pseudo-encoding) */
#define MSR_ARCH_PERFMON_FIXED_CTR3 0x30c
+/*
+ * INTEL_PMC_IDX_FIXED = 32
+ * 32 + 3 = 35
+ */
#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
+/*
+ * 在以下使用INTEL_PMC_MSK_FIXED_SLOTS:
+ * - arch/x86/events/intel/core.c|5571| <<intel_pmu_check_event_constraints>> if (!(INTEL_PMC_MSK_FIXED_SLOTS & intel_ctrl))
+ * - arch/x86/events/intel/core.c|5633| <<intel_pmu_check_hybrid_pmus>> pmu->intel_ctrl |= INTEL_PMC_MSK_FIXED_SLOTS;
+ * - arch/x86/include/asm/perf_event.h|294| <<INTEL_PMC_MSK_TOPDOWN>> #define INTEL_PMC_MSK_TOPDOWN ((0xffull << INTEL_PMC_IDX_METRIC_BASE) | INTEL_PMC_MSK_FIXED_SLOTS)
+ */
+/*
+ * 1 << 35
+ */
#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
static inline bool use_fixed_pseudo_encoding(u64 code)
@@ -275,6 +288,9 @@ static inline bool use_fixed_pseudo_encoding(u64 code)
*
* Internally the TopDown metric events are mapped to the FxCtr 3 (SLOTS).
*/
+/*
+ * 32 + 16 = 48
+ */
#define INTEL_PMC_IDX_METRIC_BASE (INTEL_PMC_IDX_FIXED + 16)
#define INTEL_PMC_IDX_TD_RETIRING (INTEL_PMC_IDX_METRIC_BASE + 0)
#define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1)
@@ -285,6 +301,13 @@ static inline bool use_fixed_pseudo_encoding(u64 code)
#define INTEL_PMC_IDX_TD_FETCH_LAT (INTEL_PMC_IDX_METRIC_BASE + 6)
#define INTEL_PMC_IDX_TD_MEM_BOUND (INTEL_PMC_IDX_METRIC_BASE + 7)
#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_MEM_BOUND
+/*
+ * 在以下使用INTEL_PMC_MSK_TOPDOWN:
+ * - arch/x86/events/intel/core.c|5566| <<intel_pmu_check_event_constraints>> if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) {
+ * - arch/x86/include/asm/perf_event.h|326| <<INTEL_PMC_OTHER_TOPDOWN_BITS>> (~(0x1ull << bit) & INTEL_PMC_MSK_TOPDOWN)
+ *
+ * (0xff << 48) | (1 << 35) = 0xff000000000000 | 0x800000000 = 0xff000800000000
+ */
#define INTEL_PMC_MSK_TOPDOWN ((0xffull << INTEL_PMC_IDX_METRIC_BASE) | \
INTEL_PMC_MSK_FIXED_SLOTS)
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 5393babc0598..afca3741b702 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -23,6 +23,10 @@ extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bo
*
* Avoids writing to the MSR if the content/bits are the same
*/
+/*
+ * called by:
+ * - arch/x86/kvm/svm/svm.c|3854| <<svm_vcpu_run>> x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
+ */
static inline
void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
{
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 6761668100b9..f9f0fb5a1ca4 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -199,6 +199,11 @@ void __init check_bugs(void)
* NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is
* done in vmenter.S.
*/
+/*
+ * called by:
+ * - arch/x86/include/asm/spec-ctrl.h|29| <<x86_spec_ctrl_set_guest>> x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
+ * - arch/x86/include/asm/spec-ctrl.h|43| <<x86_spec_ctrl_restore_host>> x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
+ */
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1a3658f7e6d9..4e8c068b7924 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -645,6 +645,10 @@ static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
}
}
+/*
+ * 在以下使用kvm_flush_tlb_multi():
+ * - arch/x86/kernel/kvm.c|844| <<kvm_guest_init>> pv_ops.mmu.flush_tlb_multi = kvm_flush_tlb_multi;
+ */
static void kvm_flush_tlb_multi(const struct cpumask *cpumask,
const struct flush_tlb_info *info)
{
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0e68b4c937fc..fcd75018d0a3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1095,6 +1095,11 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
* Add a pending IRQ into lapic.
* Return 1 if successfully added and 0 if discarded.
*/
+/*
+ * called by:
+ * - arch/x86/kvm/lapic.c|634| <<kvm_apic_set_irq>> return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, irq->level, irq->trig_mode, dest_map);
+ * - arch/x86/kvm/lapic.c|2481| <<kvm_apic_local_deliver>> return __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
+ */
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode,
struct dest_map *dest_map)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 17252f39bd7c..3df1f157e9e0 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4050,6 +4050,11 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
}
+/*
+ * called by:
+ * - arch/x86/kvm/mmu/mmu.c|4120| <<nonpaging_page_fault>> return direct_page_fault(vcpu, fault);
+ * - arch/x86/kvm/mmu/mmu.c|4168| <<kvm_tdp_page_fault>> return direct_page_fault(vcpu, fault);
+ */
static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu);
@@ -4153,6 +4158,29 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
}
EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
+/*
+ * 235 struct kvm_page_fault fault = {
+ * 236 .addr = cr2_or_gpa,
+ * 237 .error_code = err,
+ * 238 .exec = err & PFERR_FETCH_MASK,
+ * 239 .write = err & PFERR_WRITE_MASK,
+ * 240 .present = err & PFERR_PRESENT_MASK,
+ * 241 .rsvd = err & PFERR_RSVD_MASK,
+ * 242 .user = err & PFERR_USER_MASK,
+ * 243 .prefetch = prefetch,
+ * 244 .is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault),
+ * 245 .nx_huge_page_workaround_enabled = is_nx_huge_page_enabled(),
+ * 246
+ * 247 .max_level = KVM_MAX_HUGEPAGE_LEVEL,
+ * 248 .req_level = PG_LEVEL_4K,
+ * 249 .goal_level = PG_LEVEL_4K,
+ * 250 };
+ *
+ * 在以下使用kvm_tdp_page_fault():
+ * - arch/x86/kvm/mmu/mmu.c|4860| <<init_kvm_tdp_mmu>> context->page_fault = kvm_tdp_page_fault;
+ * - arch/x86/kvm/mmu/mmu_internal.h|244| <<kvm_mmu_do_page_fault>> .is_tdp = likely(vcpu->arch.mmu->page_fault == kvm_tdp_page_fault),
+ * - arch/x86/kvm/mmu/mmu_internal.h|262| <<kvm_mmu_do_page_fault>> r = kvm_tdp_page_fault(vcpu, &fault);
+ */
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
while (fault->max_level > PG_LEVEL_4K) {
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index b5960bbde7f7..578a6bb50e44 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -126,6 +126,13 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
bool host_writable, u64 *new_spte)
{
int level = sp->role.level;
+ /*
+ * A MMU present SPTE is backed by actual memory and may or may not be present
+ * in hardware. E.g. MMIO SPTEs are not considered present. Use bit 11, as it
+ * is ignored by all flavors of SPTEs and checking a low bit often generates
+ * better code than for a high bit, e.g. 56+. MMU present checks are pervasive
+ * enough that the improved code generation is noticeable in KVM's footprint.
+ */
u64 spte = SPTE_MMU_PRESENT_MASK;
bool wrprot = false;
@@ -282,6 +289,13 @@ u64 make_huge_page_split_spte(u64 huge_spte, int huge_level, int index)
u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
{
+ /*
+ * A MMU present SPTE is backed by actual memory and may or may not be present
+ * in hardware. E.g. MMIO SPTEs are not considered present. Use bit 11, as it
+ * is ignored by all flavors of SPTEs and checking a low bit often generates
+ * better code than for a high bit, e.g. 56+. MMU present checks are pervasive
+ * enough that the improved code generation is noticeable in KVM's footprint.
+ */
u64 spte = SPTE_MMU_PRESENT_MASK;
spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK |
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 0127bb6e3c7d..61fb4a004cf1 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -212,6 +212,13 @@ static inline bool is_mmio_spte(u64 spte)
static inline bool is_shadow_present_pte(u64 pte)
{
+ /*
+ * A MMU present SPTE is backed by actual memory and may or may not be present
+ * in hardware. E.g. MMIO SPTEs are not considered present. Use bit 11, as it
+ * is ignored by all flavors of SPTEs and checking a low bit often generates
+ * better code than for a high bit, e.g. 56+. MMU present checks are pervasive
+ * enough that the improved code generation is noticeable in KVM's footprint.
+ */
return !!(pte & SPTE_MMU_PRESENT_MASK);
}
diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c
index ee4802d7b36c..f3fc68a75ac9 100644
--- a/arch/x86/kvm/mmu/tdp_iter.c
+++ b/arch/x86/kvm/mmu/tdp_iter.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// spdx-license-identifier: gpl-2.0
#include "mmu_internal.h"
#include "tdp_iter.h"
@@ -8,13 +8,37 @@
* Recalculates the pointer to the SPTE for the current GFN and level and
* reread the SPTE.
*/
+/*
+ * called by:
+ * - arch/x86/kvm/mmu/tdp_iter.c|34| <<tdp_iter_restart>> tdp_iter_refresh_sptep(iter);
+ * - arch/x86/kvm/mmu/tdp_iter.c|101| <<try_step_down>> tdp_iter_refresh_sptep(iter);
+ * - arch/x86/kvm/mmu/tdp_iter.c|143| <<try_step_up>> tdp_iter_refresh_sptep(iter);
+ *
+ * 核心思想是修改tdp_iter->sptep, 现在的pointer
+ */
static void tdp_iter_refresh_sptep(struct tdp_iter *iter)
{
+ /*
+ * 在以下使用tdp_iter->pt_path[PT64_ROOT_MAX_LEVEL]:
+ * - arch/x86/kvm/mmu/tdp_iter.c|19| <<tdp_iter_refresh_sptep>> iter->sptep = iter->pt_path[iter->level - 1] +
+ * - arch/x86/kvm/mmu/tdp_iter.c|79| <<tdp_iter_start>> iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root->spt;
+ * - arch/x86/kvm/mmu/tdp_iter.c|133| <<try_step_down>> iter->pt_path[iter->level - 1] = child_pt;
+ *
+ * 在以下修改tdp_iter->sptep:
+ * - arch/x86/kvm/mmu/tdp_iter.c|19| <<tdp_iter_refresh_sptep>> iter->sptep = iter->pt_path[iter->level - 1] + SHADOW_PT_INDEX(iter->gfn << PAGE_SHIFT, iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|166| <<try_step_side>> iter->sptep++;
+ */
iter->sptep = iter->pt_path[iter->level - 1] +
SHADOW_PT_INDEX(iter->gfn << PAGE_SHIFT, iter->level);
iter->old_spte = kvm_tdp_mmu_read_spte(iter->sptep);
}
+/*
+ * called by:
+ * - arch/x86/kvm/mmu/tdp_iter.c|33| <<tdp_iter_restart>> iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|100| <<try_step_down>> iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|142| <<try_step_up>> iter->gfn = round_gfn_for_level(iter->gfn, iter->level);
+ */
static gfn_t round_gfn_for_level(gfn_t gfn, int level)
{
return gfn & -KVM_PAGES_PER_HPAGE(level);
@@ -24,12 +48,51 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
* Return the TDP iterator to the root PT and allow it to continue its
* traversal over the paging structure from there.
*/
+/*
+ * called by:
+ * - arch/x86/kvm/mmu/tdp_iter.c|57| <<tdp_iter_start>> tdp_iter_restart(iter);
+ * - arch/x86/kvm/mmu/tdp_iter.c|176| <<tdp_iter_next>> tdp_iter_restart(iter);
+ */
void tdp_iter_restart(struct tdp_iter *iter)
{
+ /*
+ * 在以下设置tdp_iter->yielded:
+ * - arch/x86/kvm/mmu/tdp_iter.c|46| <<tdp_iter_restart>> iter->yielded = false;
+ * - arch/x86/kvm/mmu/tdp_mmu.c|836| <<tdp_mmu_iter_cond_resched>> iter->yielded = true;
+ * - arch/x86/kvm/mmu/tdp_mmu.c|1464| <<tdp_mmu_alloc_sp_for_split>> iter->yielded = true;
+ *
+ * True if KVM dropped mmu_lock and yielded in the middle of a walk, in
+ * which case tdp_iter_next() needs to restart the walk at the root
+ * level instead of advancing to the next entry.
+ */
iter->yielded = false;
+ /*
+ *
+ * 在以下使用tdp_iter->yielded_gfn:
+ * - arch/x86/kvm/mmu/tdp_iter.c|47| <<tdp_iter_restart>> iter->yielded_gfn = iter->next_last_level_gfn;
+ * - arch/x86/kvm/mmu/tdp_mmu.c|818| <<tdp_mmu_iter_cond_resched>> if (iter->next_last_level_gfn == iter->yielded_gfn)
+ *
+ * The next_last_level_gfn at the time when the thread last
+ * yielded. Only yielding when the next_last_level_gfn !=
+ * yielded_gfn helps ensure forward progress.
+ */
iter->yielded_gfn = iter->next_last_level_gfn;
iter->level = iter->root_level;
+ /*
+ * 在以下修改tdp_iter->gfn:
+ * - arch/x86/kvm/mmu/tdp_iter.c|88| <<tdp_iter_restart>> iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|193| <<try_step_down>> iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|220| <<try_step_side>> iter->gfn += KVM_PAGES_PER_HPAGE(iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|249| <<try_step_up>> iter->gfn = round_gfn_for_level(iter->gfn, iter->level);
+ *
+ * 在以下设置tdp_iter->next_last_level_gfn:
+ * - arch/x86/kvm/mmu/tdp_iter.c|76| <<tdp_iter_start>> iter->next_last_level_gfn = next_last_level_gfn;
+ * - arch/x86/kvm/mmu/tdp_iter.c|162| <<try_step_side>> iter->next_last_level_gfn = iter->gfn;
+ *
+ * The iterator will traverse the paging structure towards the mapping
+ * for this GFN.
+ */
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
tdp_iter_refresh_sptep(iter);
@@ -40,6 +103,10 @@ void tdp_iter_restart(struct tdp_iter *iter)
* Sets a TDP iterator to walk a pre-order traversal of the paging structure
* rooted at root_pt, starting with the walk to translate next_last_level_gfn.
*/
+/*
+ * called by:
+ * - arch/x86/kvm/mmu/tdp_iter.h|128| <<for_each_tdp_pte_min_level>> for (tdp_iter_start(&iter, root, min_level, start); \
+ */
void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
int min_level, gfn_t next_last_level_gfn)
{
@@ -48,9 +115,23 @@ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
WARN_ON(root_level < 1);
WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
+ /*
+ * 在以下设置tdp_iter->next_last_level_gfn:
+ * - arch/x86/kvm/mmu/tdp_iter.c|76| <<tdp_iter_start>> iter->next_last_level_gfn = next_last_level_gfn;
+ * - arch/x86/kvm/mmu/tdp_iter.c|162| <<try_step_side>> iter->next_last_level_gfn = iter->gfn;
+ *
+ * The iterator will traverse the paging structure towards the mapping
+ * for this GFN.
+ */
iter->next_last_level_gfn = next_last_level_gfn;
iter->root_level = root_level;
iter->min_level = min_level;
+ /*
+ * 在以下使用tdp_iter->pt_path[PT64_ROOT_MAX_LEVEL]:
+ * - arch/x86/kvm/mmu/tdp_iter.c|19| <<tdp_iter_refresh_sptep>> iter->sptep = iter->pt_path[iter->level - 1] +
+ * - arch/x86/kvm/mmu/tdp_iter.c|79| <<tdp_iter_start>> iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root->spt;
+ * - arch/x86/kvm/mmu/tdp_iter.c|133| <<try_step_down>> iter->pt_path[iter->level - 1] = child_pt;
+ */
iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root->spt;
iter->as_id = kvm_mmu_page_as_id(root);
@@ -62,6 +143,11 @@ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
* address of the child page table referenced by the SPTE. Returns null if
* there is no such entry.
*/
+/*
+ * called by:
+ * - arch/x86/kvm/mmu/tdp_iter.c|94| <<try_step_down>> child_pt = spte_to_child_pt(iter->old_spte, iter->level);
+ * - arch/x86/kvm/mmu/tdp_mmu.c|600| <<__handle_changed_spte>> handle_removed_pt(kvm, spte_to_child_pt(old_spte, level), shared);
+ */
tdp_ptep_t spte_to_child_pt(u64 spte, int level)
{
/*
@@ -78,6 +164,10 @@ tdp_ptep_t spte_to_child_pt(u64 spte, int level)
* Steps down one level in the paging structure towards the goal GFN. Returns
* true if the iterator was able to step down a level, false otherwise.
*/
+/*
+ * called by:
+ * - arch/x86/kvm/mmu/tdp_iter.c|180| <<tdp_iter_next>> if (try_step_down(iter))
+ */
static bool try_step_down(struct tdp_iter *iter)
{
tdp_ptep_t child_pt;
@@ -85,6 +175,11 @@ static bool try_step_down(struct tdp_iter *iter)
if (iter->level == iter->min_level)
return false;
+ /*
+ * 在以下修改tdp_iter->sptep:
+ * - arch/x86/kvm/mmu/tdp_iter.c|19| <<tdp_iter_refresh_sptep>> iter->sptep = iter->pt_path[iter->level - 1] + SHADOW_PT_INDEX(iter->gfn << PAGE_SHIFT, iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|166| <<try_step_side>> iter->sptep++;
+ */
/*
* Reread the SPTE before stepping down to avoid traversing into page
* tables that are no longer linked from this entry.
@@ -96,7 +191,20 @@ static bool try_step_down(struct tdp_iter *iter)
return false;
iter->level--;
+ /*
+ * 在以下使用tdp_iter->pt_path[PT64_ROOT_MAX_LEVEL]:
+ * - arch/x86/kvm/mmu/tdp_iter.c|19| <<tdp_iter_refresh_sptep>> iter->sptep = iter->pt_path[iter->level - 1] +
+ * - arch/x86/kvm/mmu/tdp_iter.c|79| <<tdp_iter_start>> iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root->spt;
+ * - arch/x86/kvm/mmu/tdp_iter.c|133| <<try_step_down>> iter->pt_path[iter->level - 1] = child_pt;
+ */
iter->pt_path[iter->level - 1] = child_pt;
+ /*
+ * 在以下修改tdp_iter->gfn:
+ * - arch/x86/kvm/mmu/tdp_iter.c|88| <<tdp_iter_restart>> iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|193| <<try_step_down>> iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|220| <<try_step_side>> iter->gfn += KVM_PAGES_PER_HPAGE(iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|249| <<try_step_up>> iter->gfn = round_gfn_for_level(iter->gfn, iter->level);
+ */
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
tdp_iter_refresh_sptep(iter);
@@ -110,6 +218,10 @@ static bool try_step_down(struct tdp_iter *iter)
* able to step to the next entry in the page table, false if the iterator was
* already at the end of the current page table.
*/
+/*
+ * called by:
+ * - arch/x86/kvm/mmu/tdp_iter.c|184| <<tdp_iter_next>> if (try_step_side(iter))
+ */
static bool try_step_side(struct tdp_iter *iter)
{
/*
@@ -122,6 +234,11 @@ static bool try_step_side(struct tdp_iter *iter)
iter->gfn += KVM_PAGES_PER_HPAGE(iter->level);
iter->next_last_level_gfn = iter->gfn;
+ /*
+ * 在以下修改tdp_iter->sptep:
+ * - arch/x86/kvm/mmu/tdp_iter.c|19| <<tdp_iter_refresh_sptep>> iter->sptep = iter->pt_path[iter->level - 1] + SHADOW_PT_INDEX(iter->gfn << PAGE_SHIFT, iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|166| <<try_step_side>> iter->sptep++;
+ */
iter->sptep++;
iter->old_spte = kvm_tdp_mmu_read_spte(iter->sptep);
@@ -133,12 +250,24 @@ static bool try_step_side(struct tdp_iter *iter)
* can continue from the next entry in the parent page table. Returns true on a
* successful step up, false if already in the root page.
*/
+/*
+ * called by:
+ * - arch/x86/kvm/mmu/tdp_iter.c|154| <<tdp_iter_step_up>> WARN_ON(!try_step_up(iter));
+ * - arch/x86/kvm/mmu/tdp_iter.c|186| <<tdp_iter_next>> } while (try_step_up(iter));
+ */
static bool try_step_up(struct tdp_iter *iter)
{
if (iter->level == iter->root_level)
return false;
iter->level++;
+ /*
+ * 在以下修改tdp_iter->gfn:
+ * - arch/x86/kvm/mmu/tdp_iter.c|88| <<tdp_iter_restart>> iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|193| <<try_step_down>> iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|220| <<try_step_side>> iter->gfn += KVM_PAGES_PER_HPAGE(iter->level);
+ * - arch/x86/kvm/mmu/tdp_iter.c|249| <<try_step_up>> iter->gfn = round_gfn_for_level(iter->gfn, iter->level);
+ */
iter->gfn = round_gfn_for_level(iter->gfn, iter->level);
tdp_iter_refresh_sptep(iter);
@@ -149,6 +278,10 @@ static bool try_step_up(struct tdp_iter *iter)
* Step the iterator back up a level in the paging structure. Should only be
* used when the iterator is below the root level.
*/
+/*
+ * called by:
+ * - arch/x86/kvm/mmu/tdp_mmu.c|1784| <<zap_collapsible_spte_range>> tdp_iter_step_up(&iter);
+ */
void tdp_iter_step_up(struct tdp_iter *iter)
{
WARN_ON(!try_step_up(iter));
@@ -170,8 +303,23 @@ void tdp_iter_step_up(struct tdp_iter *iter)
* SPTE will have already been visited, and so the iterator must also step
* to the side again.
*/
+/*
+ * called by:
+ * - arch/x86/kvm/mmu/tdp_iter.h|130| <<for_each_tdp_pte_min_level>> tdp_iter_next(&iter))
+ */
void tdp_iter_next(struct tdp_iter *iter)
{
+ /*
+ * 在以下设置tdp_iter->yielded:
+ * - arch/x86/kvm/mmu/tdp_iter.c|46| <<tdp_iter_restart>> iter->yielded = false;
+ * - arch/x86/kvm/mmu/tdp_mmu.c|836| <<tdp_mmu_iter_cond_resched>> iter->yielded = true;
+ * - arch/x86/kvm/mmu/tdp_mmu.c|1464| <<tdp_mmu_alloc_sp_for_split>> iter->yielded = true;
+ *
+ * True if KVM dropped mmu_lock and yielded in the middle of a walk, in
+ * which case tdp_iter_next() needs to restart the walk at the root
+ * level instead of advancing to the next entry.
+ *
+ */
if (iter->yielded) {
tdp_iter_restart(iter);
return;
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
index adfca0cf94d3..5cd25a1956b7 100644
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -14,21 +14,45 @@