-
Notifications
You must be signed in to change notification settings - Fork 4
/
linux-mm-for-linux-5.5.patch
714 lines (672 loc) · 24.7 KB
/
linux-mm-for-linux-5.5.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
From e0749ca78c07ca888d0f608b896bbe5aa2863e8d Mon Sep 17 00:00:00 2001
From: Dongli Zhang <[email protected]>
Date: Mon, 30 Mar 2020 08:13:59 -0700
Subject: [PATCH 1/1] linux mm for linux-5.5
Signed-off-by: Dongli Zhang <[email protected]>
---
mm/slab.h | 4 +
mm/slub.c | 347 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 351 insertions(+)
diff --git a/mm/slab.h b/mm/slab.h
index 7e94700aa78c..c49e2b037c4d 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -625,6 +625,10 @@ struct kmem_cache_node {
};
+/*
+ * struct kmem_cache_node *node[MAX_NUMNODES];
+ * 返回s->node[node]
+ */
static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
{
return s->node[node];
diff --git a/mm/slub.c b/mm/slub.c
index 8eafccf75940..a2463584b6f8 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -131,6 +131,19 @@ void *fixup_red_left(struct kmem_cache *s, void *p)
return p;
}
+/*
+ * called by:
+ * - mm/slub.c|1883| <<get_partial_node>> if (!kmem_cache_has_cpu_partial(s)
+ * - mm/slub.c|2889| <<__slab_free>> if (kmem_cache_has_cpu_partial(s) && !prior) {
+ * - mm/slub.c|2946| <<__slab_free>> if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
+ * - mm/slub.c|3505| <<set_cpu_partial>> if (!kmem_cache_has_cpu_partial(s))
+ * - mm/slub.c|5030| <<cpu_partial_store>> if (objects && !kmem_cache_has_cpu_partial(s))
+ *
+ * 使用了CONFIG_SLUB_CPU_PARTIAL的情况下, 当kmem_cache->flags设置了debug的任何flag的时候返回false
+ * 没有CONFIG_SLUB_CPU_PARTIAL直接返回false
+ * ol支持CONFIG_SLUB_CPU_PARTIAL
+ * 说明用debug的时候不支持kmem_cache_has_cpu_partial
+ */
static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
{
#ifdef CONFIG_SLUB_CPU_PARTIAL
@@ -360,6 +373,11 @@ static __always_inline void slab_unlock(struct page *page)
}
/* Interrupts must be disabled (for the fallback code to work right) */
+/*
+ * 我们期待__cmpxchg_double_slab()返回true, 不希望false
+ * 核心思想是判断page->freelist和page->counters是否和old的相等
+ * 如果相等,则吧page->freelist和page->counters都更新成新的
+ */
static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
void *freelist_old, unsigned long counters_old,
void *freelist_new, unsigned long counters_new,
@@ -397,6 +415,11 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
return false;
}
+/*
+ * 我们期待__cmpxchg_double_slab()返回true, 不希望false
+ * 核心思想是判断page->freelist和page->counters是否和old的相等
+ * 如果相等,则吧page->freelist和page->counters都更新成新的
+ */
static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
void *freelist_old, unsigned long counters_old,
void *freelist_new, unsigned long counters_new,
@@ -1009,6 +1032,13 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
/*
* Tracking of fully allocated slabs for debugging purposes.
*/
+/*
+ * called by:
+ * - mm/slub.c|2251| <<deactivate_slab>> add_full(s, n, page);
+ *
+ * add_full()只在设置了SLAB_STORE_USER的情况下有用
+ * 把page->slab_list加入kmem_cache_node->full
+ */
static void add_full(struct kmem_cache *s,
struct kmem_cache_node *n, struct page *page)
{
@@ -1019,6 +1049,15 @@ static void add_full(struct kmem_cache *s,
list_add(&page->slab_list, &n->full);
}
+/*
+ * called by:
+ * - mm/slub.c|2246| <<deactivate_slab>> remove_full(s, n, page);
+ * - mm/slub.c|3024| <<__slab_free>> remove_full(s, n, page);
+ * - mm/slub.c|3040| <<__slab_free>> remove_full(s, n, page);
+ *
+ * remove_full()只在设置了SLAB_STORE_USER的情况下有用
+ * 把page->slab_list从kmem_cache_node->full中移除
+ */
static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
{
if (!(s->flags & SLAB_STORE_USER))
@@ -1056,6 +1095,13 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
atomic_long_add(objects, &n->total_objects);
}
}
+/*
+ * called by:
+ * - mm/slub.c|1813| <<discard_slab>> dec_slabs_node(s, page_to_nid(page), page->objects);
+ *
+ * 对应的kmem_cache_node->nr_slabs减少1
+ * kmem_cache_node->total_objects减少参数的objects
+ */
static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
{
struct kmem_cache_node *n = get_node(s, node);
@@ -1103,6 +1149,10 @@ static inline int alloc_consistency_checks(struct kmem_cache *s,
return 1;
}
+/*
+ * called by:
+ * - mm/slub.c|2856| <<___slab_alloc>> !alloc_debug_processing(s, page, freelist, addr))
+ */
static noinline int alloc_debug_processing(struct kmem_cache *s,
struct page *page,
void *object, unsigned long addr)
@@ -1614,6 +1664,10 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
+/*
+ * called by:
+ * - mm/slub.c|1762| <<new_slab>> return allocate_slab(s,
+ */
static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
{
struct page *page;
@@ -1694,6 +1748,11 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
return page;
}
+/*
+ * called by:
+ * - mm/slub.c|2686| <<new_slab_objects>> page = new_slab(s, flags, node);
+ * - mm/slub.c|3620| <<early_kmem_cache_node_alloc>> page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
+ */
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
{
if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
@@ -1708,6 +1767,13 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
}
+/*
+ * called by:
+ * - mm/slub.c|1778| <<rcu_free_slab>> __free_slab(page->slab_cache, page);
+ * - mm/slub.c|1786| <<free_slab>> __free_slab(s, page);
+ *
+ * 核心思想就是让一个page(也许是多个order的compound)和slab完全脱离关系
+ */
static void __free_slab(struct kmem_cache *s, struct page *page)
{
int order = compound_order(page);
@@ -1732,6 +1798,9 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
__free_pages(page, order);
}
+/*
+ * 核心思想就是让一个page(也许是多个order的compound)和slab完全脱离关系
+ */
static void rcu_free_slab(struct rcu_head *h)
{
struct page *page = container_of(h, struct page, rcu_head);
@@ -1739,6 +1808,13 @@ static void rcu_free_slab(struct rcu_head *h)
__free_slab(page->slab_cache, page);
}
+/*
+ * called by:
+ * - mm/slub.c|1814| <<discard_slab>> free_slab(s, page);
+ *
+ * 核心思想就是让一个page(也许是多个order的compound)和slab完全脱离关系
+ * 不管是直接释放还是通过rcu
+ */
static void free_slab(struct kmem_cache *s, struct page *page)
{
if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
@@ -1747,15 +1823,45 @@ static void free_slab(struct kmem_cache *s, struct page *page)
__free_slab(s, page);
}
+/*
+ * called by:
+ * - mm/slub.c|2275| <<deactivate_slab>> discard_slab(s, page);
+ * - mm/slub.c|2345| <<unfreeze_partials>> discard_slab(s, page);
+ * - mm/slub.c|3045| <<__slab_free>> discard_slab(s, page);
+ * - mm/slub.c|3827| <<free_partial>> discard_slab(s, page);
+ * - mm/slub.c|4131| <<__kmem_cache_shrink>> discard_slab(s, page);
+ *
+ * 核心思想就是让一个page(也许是多个order的compound)和slab完全脱离关系, 不管是直接释放还是通过rcu
+ * 对应的kmem_cache_node->nr_slabs减少1
+ * kmem_cache_node->total_objects减少参数的objects
+ */
static void discard_slab(struct kmem_cache *s, struct page *page)
{
+ /*
+ * 对应的kmem_cache_node->nr_slabs减少1
+ * kmem_cache_node->total_objects减少参数的objects
+ */
dec_slabs_node(s, page_to_nid(page), page->objects);
+ /*
+ * 核心思想就是让一个page(也许是多个order的compound)和slab完全脱离关系
+ * 不管是直接释放还是通过rcu
+ */
free_slab(s, page);
}
/*
* Management of partially allocated slabs.
*/
+/*
+ * called by:
+ * - mm/slub.c|1796| <<add_partial>> __add_partial(n, page, tail);
+ * - mm/slub.c|3476| <<early_kmem_cache_node_alloc>> __add_partial(n, page, DEACTIVATE_TO_HEAD);
+ *
+ * 把page通过page->slab_list加入kmem_cache_node->partial
+ * 如果tail==DEACTIVATE_TO_TAIL就放入尾部
+ * 否则放入头部
+ * 增加n->nr_partial++
+ */
static inline void
__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
{
@@ -1766,6 +1872,17 @@ __add_partial(struct kmem_cache_node *n, struct page *page, int tail)
list_add(&page->slab_list, &n->partial);
}
+/*
+ * called by:
+ * - mm/slub.c|2218| <<deactivate_slab>> add_partial(n, page, tail);
+ * - mm/slub.c|2301| <<unfreeze_partials>> add_partial(n, page, DEACTIVATE_TO_TAIL);
+ * - mm/slub.c|2994| <<__slab_free>> add_partial(n, page, DEACTIVATE_TO_TAIL);
+ *
+ * 把page通过page->slab_list加入kmem_cache_node->partial
+ * 如果tail==DEACTIVATE_TO_TAIL就放入尾部
+ * 否则放入头部
+ * 增加n->nr_partial++
+ */
static inline void add_partial(struct kmem_cache_node *n,
struct page *page, int tail)
{
@@ -1773,6 +1890,16 @@ static inline void add_partial(struct kmem_cache_node *n,
__add_partial(n, page, tail);
}
+/*
+ * called by:
+ * - mm/slub.c|1848| <<acquire_slab>> remove_partial(n, page);
+ * - mm/slub.c|2213| <<deactivate_slab>> remove_partial(n, page);
+ * - mm/slub.c|3005| <<__slab_free>> remove_partial(n, page);
+ * - mm/slub.c|3786| <<free_partial>> remove_partial(n, page);
+ *
+ * 把page->slab_list从kmem_cache_node->partial移走
+ * 减少n->nr_partial--
+ */
static inline void remove_partial(struct kmem_cache_node *n,
struct page *page)
{
@@ -1787,6 +1914,12 @@ static inline void remove_partial(struct kmem_cache_node *n,
*
* Returns a list of objects or NULL if it fails.
*/
+/*
+ * called by:
+ * - mm/slub.c|1998| <<get_partial_node>> t = acquire_slab(s, n, page, object == NULL, &objects);
+ *
+ * 参数的page是一个slab page
+ */
static inline void *acquire_slab(struct kmem_cache *s,
struct kmem_cache_node *n, struct page *page,
int mode, int *objects)
@@ -1805,8 +1938,13 @@ static inline void *acquire_slab(struct kmem_cache *s,
freelist = page->freelist;
counters = page->counters;
new.counters = counters;
+ /* 哪怕没在用, 只要不在page->freelist中, 也算是inuse */
*objects = new.objects - new.inuse;
if (mode) {
+ /*
+ * 这种情况下下面会把page->freelist清空
+ * 然后返回freelist
+ */
new.inuse = page->objects;
new.freelist = NULL;
} else {
@@ -1816,12 +1954,21 @@ static inline void *acquire_slab(struct kmem_cache *s,
VM_BUG_ON(new.frozen);
new.frozen = 1;
+ /*
+ * 我们期待__cmpxchg_double_slab()返回true, 不希望false
+ * 核心思想是判断page->freelist和page->counters是否和old的相等
+ * 如果相等,则吧page->freelist和page->counters都更新成新的
+ */
if (!__cmpxchg_double_slab(s, page,
freelist, counters,
new.freelist, new.counters,
"acquire_slab"))
return NULL;
+ /*
+ * 把page->slab_list从kmem_cache_node->partial移走
+ * 减少n->nr_partial--
+ */
remove_partial(n, page);
WARN_ON(!freelist);
return freelist;
@@ -1847,6 +1994,17 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
* partial slab and there is none available then get_partials()
* will return NULL.
*/
+ /*
+ * struct kmem_cache_node:
+ * #ifdef CONFIG_SLUB
+ * unsigned long nr_partial;
+ * struct list_head partial;
+ * #ifdef CONFIG_SLUB_DEBUG
+ * atomic_long_t nr_slabs;
+ * atomic_long_t total_objects;
+ * struct list_head full;
+ * #endif
+ */
if (!n || !n->nr_partial)
return NULL;
@@ -1857,6 +2015,14 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
if (!pfmemalloc_match(page, flags))
continue;
+ /*
+ * Remove slab from the partial list, freeze it and
+ * return the pointer to the freelist.
+ *
+ * Returns a list of objects or NULL if it fails.
+ *
+ * 函数结束后, c->page->frozen变成了1
+ */
t = acquire_slab(s, n, page, object == NULL, &objects);
if (!t)
break;
@@ -1865,11 +2031,21 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
if (!object) {
c->page = page;
stat(s, ALLOC_FROM_PARTIAL);
+ /*
+ * !!!唯一修改object的地方!!!
+ */
object = t;
} else {
put_cpu_partial(s, page, 0);
stat(s, CPU_PARTIAL_NODE);
}
+ /*
+ * kmem_cache_has_cpu_partial():
+ * 使用了CONFIG_SLUB_CPU_PARTIAL的情况下, 当kmem_cache->flags设置了debug的任何flag的时候返回false
+ * 没有CONFIG_SLUB_CPU_PARTIAL直接返回false
+ * ol支持CONFIG_SLUB_CPU_PARTIAL
+ * 说明用debug的时候不支持kmem_cache_has_cpu_partial
+ */
if (!kmem_cache_has_cpu_partial(s)
|| available > slub_cpu_partial(s) / 2)
break;
@@ -1946,6 +2122,10 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
/*
* Get a partial page, lock it and return it.
*/
+/*
+ * called by:
+ * - mm/slub.c|2681| <<new_slab_objects>> freelist = get_partial(s, flags, node, c);
+ */
static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
struct kmem_cache_cpu *c)
{
@@ -1957,6 +2137,11 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
else if (!node_present_pages(node))
searchnode = node_to_mem_node(node);
+ /*
+ * get_node():
+ * struct kmem_cache_node *node[MAX_NUMNODES];
+ * 返回s->node[node]
+ */
object = get_partial_node(s, get_node(s, searchnode), c, flags);
if (object || node != NUMA_NO_NODE)
return object;
@@ -2036,20 +2221,48 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
/*
* Remove the cpu slab
*/
+/*
+ * called by:
+ * - mm/slub.c|2307| <<flush_slab>> deactivate_slab(s, c->page, c->freelist, c);
+ * - mm/slub.c|2559| <<___slab_alloc>> deactivate_slab(s, page, c->freelist, c);
+ * - mm/slub.c|2570| <<___slab_alloc>> deactivate_slab(s, page, c->freelist, c);
+ * - mm/slub.c|2625| <<___slab_alloc>> deactivate_slab(s, page, get_freepointer(s, freelist), c);
+ *
+ * Remove the cpu slab (看了几次注释很准确)
+ * 该函数会将CPU缓存对象的freelist中的对象,还给slab对象(page)
+ * 然后把这个page放入partial或者根据情况释放
+ * 关键最后会进行以下:
+ * c->page = NULL; ---->>>>>>>>>>>>>>>>>!!!!
+ * c->freelist = NULL; ---->>>>>>>>>>>>>>>>>!!!!
+ *
+ * 假设从___slab_alloc()第一次进入这里, page->freelist=NULL, c->page=page,
+ * freelist是第一个(要分配出去的)object
+ * 第3个参数是get_freepointer(s, freelist), 也就是分配出去的下一个
+ */
static void deactivate_slab(struct kmem_cache *s, struct page *page,
void *freelist, struct kmem_cache_cpu *c)
{
enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
+ /*
+ * struct kmem_cache_node *node[MAX_NUMNODES];
+ * 返回s->node[node]
+ */
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
int lock = 0;
+ /*
+ * l和m什么关系???
+ */
enum slab_modes l = M_NONE, m = M_NONE;
void *nextfree;
+ /* DEACTIVATE_TO_HEAD: Cpu slab was moved to the head of partials */
int tail = DEACTIVATE_TO_HEAD;
struct page new;
struct page old;
if (page->freelist) {
+ /* DEACTIVATE_REMOTE_FREES: Slab contained remotely freed objects */
stat(s, DEACTIVATE_REMOTE_FREES);
+ /* DEACTIVATE_TO_TAIL: Cpu slab was moved to the tail of partials */
tail = DEACTIVATE_TO_TAIL;
}
@@ -2061,18 +2274,53 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
* There is no need to take the list->lock because the page
* is still frozen.
*/
+ /*
+ * 把参数freelist指向的entry除了最后一个全部放入page->freelist
+ * 并修改page->inuse (减少, 因为还回来了)
+ * 只保留最后一个不还
+ */
while (freelist && (nextfree = get_freepointer(s, freelist))) {
void *prior;
unsigned long counters;
+ /*
+ * 每一次while()获得的nextfree可能是invalid的地址
+ * 说明freelist被corrupt了, 所以应该隔离freelist
+ * 然后break
+ */
+
do {
prior = page->freelist;
counters = page->counters;
+ /*
+ * freelist的下一个指向prior
+ */
set_freepointer(s, freelist, prior);
+ /*
+ * struct kmem_cache *slab_cache; // not slob
+ * // Double-word boundary
+ * void *freelist; // first free object
+ * union {
+ * void *s_mem; // slab: first object
+ * unsigned long counters; // SLUB
+ * struct { // SLUB
+ * unsigned inuse:16;
+ * unsigned objects:15;
+ * unsigned frozen:1;
+ * };
+ * };
+ *
+ * 下面的inuse是counters的一部分
+ */
new.counters = counters;
new.inuse--;
VM_BUG_ON(!new.frozen);
+ /*
+ * 我们期待__cmpxchg_double_slab()返回true, 不希望false
+ * 核心思想是判断page->freelist和page->counters是否和old的相等
+ * 如果相等,则吧page->freelist和page->counters都更新成新的
+ */
} while (!__cmpxchg_double_slab(s, page,
prior, counters,
freelist, new.counters,
@@ -2112,6 +2360,13 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
new.frozen = 0;
+ /*
+ * 如果new.inuse为0了并且partial的page很多了, 可以M_FREE --> discard_slab()
+ * 否则如果page还有entry可用, 可以M_PARTIAL --> add_partial()
+ * 其他条件说明M_FULL --> add_full()
+ *
+ * m一开始是M_NONE, 下面的三个总会有一个设置m为: M_PARTIAL, M_FULL, M_FREE
+ */
if (!new.inuse && n->nr_partial >= s->min_partial)
m = M_FREE;
else if (new.freelist) {
@@ -2138,12 +2393,35 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
}
}
+ /*
+ * 最最开始的时候l = M_NONE
+ */
if (l != m) {
+ /*
+ * remove_partial():
+ * 把page->slab_list从kmem_cache_node->partial移走
+ * 减少n->nr_partial--
+ *
+ * remove_full():
+ * remove_full()只在设置了SLAB_STORE_USER的情况下有用
+ * 把page->slab_list从kmem_cache_node->full中移除
+ */
if (l == M_PARTIAL)
remove_partial(n, page);
else if (l == M_FULL)
remove_full(s, n, page);
+ /*
+ * add_partial():
+ * 把page通过page->slab_list加入kmem_cache_node->partial
+ * 如果tail==DEACTIVATE_TO_TAIL就放入尾部
+ * 否则放入头部
+ * 增加n->nr_partial++
+ *
+ * add_full():
+ * add_full()只在设置了SLAB_STORE_USER的情况下有用
+ * 把page->slab_list加入kmem_cache_node->full
+ */
if (m == M_PARTIAL)
add_partial(n, page, tail);
else if (m == M_FULL)
@@ -2151,6 +2429,11 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
}
l = m;
+ /*
+ * 我们期待__cmpxchg_double_slab()返回true, 不希望false
+ * 核心思想是判断page->freelist和page->counters是否和old的相等
+ * 如果相等,则吧page->freelist和page->counters都更新成新的
+ */
if (!__cmpxchg_double_slab(s, page,
old.freelist, old.counters,
new.freelist, new.counters,
@@ -2166,6 +2449,11 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
stat(s, DEACTIVATE_FULL);
else if (m == M_FREE) {
stat(s, DEACTIVATE_EMPTY);
+ /*
+ * 核心思想就是让一个page(也许是多个order的compound)和slab完全脱离关系, 不管是直接释放还是通过rcu
+ * 对应的kmem_cache_node->nr_slabs减少1
+ * kmem_cache_node->total_objects减少参数的objects
+ */
discard_slab(s, page);
stat(s, FREE_SLAB);
}
@@ -2181,6 +2469,12 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
* for the cpu using c (or some other guarantee must be there
* to guarantee no concurrent accesses).
*/
+/*
+ * called by:
+ * - mm/slub.c|2492| <<put_cpu_partial>> unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
+ * - mm/slub.c|2514| <<put_cpu_partial>> unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
+ * - mm/slub.c|2546| <<__flush_cpu_slab>> unfreeze_partials(s, c);
+ */
static void unfreeze_partials(struct kmem_cache *s,
struct kmem_cache_cpu *c)
{
@@ -2249,6 +2543,11 @@ static void unfreeze_partials(struct kmem_cache *s,
* If we did not find a slot then simply move all the partials to the
* per node partial list.
*/
+/*
+ * called by:
+ * - mm/slub.c|2022| <<get_partial_node>> put_cpu_partial(s, page, 0);
+ * - mm/slub.c|3203| <<__slab_free>> put_cpu_partial(s, page, 1);
+ */
static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
{
#ifdef CONFIG_SLUB_CPU_PARTIAL
@@ -2288,6 +2587,11 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
page->pobjects = pobjects;
page->next = oldpage;
+ /*
+ * struct kmem_cache:
+ * -> struct kmem_cache_cpu __percpu *cpu_slab:
+ * -> struct page *partial;
+ */
} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
!= oldpage);
if (unlikely(!s->cpu_partial)) {
@@ -2301,6 +2605,11 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
#endif /* CONFIG_SLUB_CPU_PARTIAL */
}
+/*
+ * called by:
+ * - mm/slub.c|2531| <<__flush_cpu_slab>> flush_slab(s, c);
+ * - mm/slub.c|2671| <<new_slab_objects>> flush_slab(s, c);
+ */
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
stat(s, CPUSLAB_FLUSH);
@@ -2441,6 +2750,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
#endif
}
+/*
+ * called by:
+ * - mm/slub.c|2843| <<___slab_alloc>> freelist = new_slab_objects(s, gfpflags, node, &c);
+ */
static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
int node, struct kmem_cache_cpu **pc)
{
@@ -2450,6 +2763,9 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
+ /*
+ * Get a partial page, lock it and return it.
+ */
freelist = get_partial(s, flags, node, c);
if (freelist)
@@ -2537,6 +2853,11 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
* Version of __slab_alloc to use when we know that interrupts are
* already disabled (which is the case for bulk allocation).
*/
+/*
+ * called by:
+ * - mm/slub.c|2858| <<__slab_alloc>> p = ___slab_alloc(s, gfpflags, node, addr, c);
+ * - mm/slub.c|3371| <<kmem_cache_alloc_bulk>> p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
+ */
static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c)
{
@@ -2599,6 +2920,11 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
new_slab:
+ /*
+ * 检查c->partial
+ * 在过去4.14的linux只设置slub_debug=U就不会执行下面的了
+ * 看来只要激活了slub_debug就不会走percpu cache了
+ */
if (slub_percpu_partial(c)) {
page = c->page = slub_percpu_partial(c);
slub_set_percpu_partial(c, page);
@@ -2622,6 +2948,15 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
!alloc_debug_processing(s, page, freelist, addr))
goto new_slab; /* Slab failed checks. Next slab needed */
+ /*
+ * Remove the cpu slab (看了几次注释很准确)
+ * 该函数会将CPU缓存对象的get_freepointer(s, freelist)中的
+ * 对象(也就是freelist的下一个代表的),还给slab对象(page)
+ * 然后把这个page放入partial或者根据情况释放
+ * 关键最后会进行以下:
+ * c->page = NULL; ---->>>>>>>>>>>>>>>>>!!!!
+ * c->freelist = NULL; ---->>>>>>>>>>>>>>>>>!!!!
+ */
deactivate_slab(s, page, get_freepointer(s, freelist), c);
return freelist;
}
@@ -2630,6 +2965,10 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
* Another one that disabled interrupt and compensates for possible
* cpu changes by refetching the per cpu area pointer.
*/
+/*
+ * called by:
+ * - mm/slub.c|2932| <<slab_alloc_node>> object = __slab_alloc(s, gfpflags, node, addr, c);
+ */
static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c)
{
@@ -2672,6 +3011,14 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
*
* Otherwise we can simply pick the next object from the lockless free list.
*/
+/*
+ * called by:
+ * - mm/slub.c|2999| <<slab_alloc>> return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
+ * - mm/slub.c|3027| <<kmem_cache_alloc_node>> void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
+ * - mm/slub.c|3041| <<kmem_cache_alloc_node_trace>> void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
+ * - mm/slub.c|4079| <<__kmalloc_node>> ret = slab_alloc_node(s, flags, node, _RET_IP_);
+ * - mm/slub.c|4597| <<__kmalloc_node_track_caller>> ret = slab_alloc_node(s, gfpflags, node, caller);
+ */
static __always_inline void *slab_alloc_node(struct kmem_cache *s,
gfp_t gfpflags, int node, unsigned long addr)
{
--
2.17.1