-
Notifications
You must be signed in to change notification settings - Fork 4
/
block-comment-for-block-and-drivers-for-4.17.14.patch
9716 lines (8971 loc) · 316 KB
/
block-comment-for-block-and-drivers-for-4.17.14.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
From f6563d22d15265ccb1fd49d7001ac3ea80be0758 Mon Sep 17 00:00:00 2001
From: Dongli Zhang <[email protected]>
Date: Wed, 3 Oct 2018 21:27:52 +0800
Subject: [PATCH 1/1] jsjjsjjj
Signed-off-by: Dongli Zhang <[email protected]>
---
block/bio.c | 1 +
block/blk-cgroup.c | 23 ++
block/blk-core.c | 210 +++++++++-
block/blk-flush.c | 21 +
block/blk-integrity.c | 15 +
block/blk-merge.c | 58 +++
block/blk-mq-cpumap.c | 21 +
block/blk-mq-pci.c | 9 +
block/blk-mq-sched.c | 268 ++++++++++++-
block/blk-mq-sched.h | 76 ++++
block/blk-mq-tag.c | 172 ++++++++
block/blk-mq-tag.h | 62 ++-
block/blk-mq-virtio.c | 6 +
block/blk-mq.c | 992 +++++++++++++++++++++++++++++++++++++++++++++-
block/blk-mq.h | 61 ++-
block/blk-settings.c | 12 +
block/blk-softirq.c | 73 +++-
block/blk-stat.c | 259 +++++++++++-
block/blk-stat.h | 129 +++++-
block/blk-throttle.c | 3 +
block/blk-timeout.c | 43 +-
block/blk.h | 4 +
block/elevator.c | 402 ++++++++++++++++++-
block/kyber-iosched.c | 514 +++++++++++++++++++++++-
block/mq-deadline.c | 331 +++++++++++++++-
block/noop-iosched.c | 27 ++
block/partition-generic.c | 10 +
include/linux/blk-mq.h | 136 ++++++-
include/linux/blk_types.h | 1 +
include/linux/blkdev.h | 246 +++++++++++-
include/linux/elevator.h | 5 +
include/linux/sbitmap.h | 147 ++++++-
lib/sbitmap.c | 208 +++++++++-
33 files changed, 4446 insertions(+), 99 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index 9f7fa24..d3e6e3c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -838,6 +838,7 @@ int bio_add_page(struct bio *bio, struct page *page,
/*
* cloned bio must not modify vec list
*/
+ /* BIO_CLONED: doesn't own data */
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return 0;
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index eb85cb8..6544a98 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -89,9 +89,16 @@ static void blkg_free(struct blkcg_gq *blkg)
*
* Allocate a new blkg assocating @blkcg and @q.
*/
+/*
+ * called by:
+ * - blkg_create()
+ * - __acquires()
+ * - blkcg_init_queue()
+ */
static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
gfp_t gfp_mask)
{
+ /* association between a blk cgroup and a request queue */
struct blkcg_gq *blkg;
int i;
@@ -168,6 +175,12 @@ EXPORT_SYMBOL_GPL(blkg_lookup_slowpath);
* If @new_blkg is %NULL, this function tries to allocate a new one as
* necessary using %GFP_NOWAIT. @new_blkg is always consumed on return.
*/
+/*
+ * called by:
+ * - blkg_lookup_create()
+ * - __acquires()
+ * - blkcg_init_queue()
+ */
static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
struct request_queue *q,
struct blkcg_gq *new_blkg)
@@ -1165,8 +1178,12 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
* RETURNS:
* 0 on success, -errno on failure.
*/
+/*
+ * called only by blk_alloc_queue_node()
+ */
int blkcg_init_queue(struct request_queue *q)
{
+ /* association between a blk cgroup and a request queue */
struct blkcg_gq *new_blkg, *blkg;
bool preloaded;
int ret;
@@ -1438,6 +1455,12 @@ EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
* Register @pol with blkcg core. Might sleep and @pol may be modified on
* successful registration. Returns 0 on success and -errno on failure.
*/
+/*
+ * called by:
+ * - bfq_init() -- blkcg_policy_register(&blkcg_policy_bfq);
+ * - throtl_init() -- blkcg_policy_register(&blkcg_policy_throtl);
+ * - cfq_init() -- blkcg_policy_register(&blkcg_policy_cfq);
+ */
int blkcg_policy_register(struct blkcg_policy *pol)
{
struct blkcg *blkcg;
diff --git a/block/blk-core.c b/block/blk-core.c
index 77938b5..67f132e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -410,6 +410,11 @@ EXPORT_SYMBOL(blk_stop_queue);
* and blkcg_exit_queue() to be called with queue lock initialized.
*
*/
+/*
+ * called only by:
+ * - block/blk-core.c|783| <<blk_cleanup_queue>> blk_sync_queue(q);
+ * - drivers/md/md.c|5865| <<mddev_detach>> blk_sync_queue(mddev->queue);
+ */
void blk_sync_queue(struct request_queue *q)
{
del_timer_sync(&q->timeout);
@@ -972,10 +977,15 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref)
wake_up_all(&q->mq_freeze_wq);
}
+/*
+ * 只在一处被设置为request_queue q->timeout
+ * - block/blk-core.c|1036| <<blk_alloc_queue_node>> timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
+ */
static void blk_rq_timed_out_timer(struct timer_list *t)
{
struct request_queue *q = from_timer(q, t, timeout);
+ /* 根据种类, blk_timeout_work()或者blk_mq_timeout_work() */
kblockd_schedule_work(&q->timeout_work);
}
@@ -992,6 +1002,11 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
* crash in the blkcg code. This function namely calls blkcg_init_queue() and
* the queue lock pointer must be set before blkcg_init_queue() is called.
*/
+/*
+ * 分配request_queue
+ *
+ * mq可以忽略lock
+ */
struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
spinlock_t *lock)
{
@@ -1014,6 +1029,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
if (!q->backing_dev_info)
goto fail_split;
+ /* 分配struct blk_queue_stats */
q->stats = blk_alloc_queue_stats();
if (!q->stats)
goto fail_stats;
@@ -1062,11 +1078,15 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
* Init percpu_ref in atomic mode so that it's faster to shutdown.
* See blk_register_queue() for details.
*/
+ /*
+ * 当refcount是0的时候调用release=blk_queue_usage_counter_release()
+ */
if (percpu_ref_init(&q->q_usage_counter,
blk_queue_usage_counter_release,
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
goto fail_bdi;
+ /* initialize blkcg part of request queue */
if (blkcg_init_queue(q))
goto fail_ref;
@@ -1148,7 +1168,14 @@ EXPORT_SYMBOL(blk_init_queue_node);
static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
-
+/*
+ * 对于mq的设备:
+ * 如果queue的数量大于1, 默认用none (不是noop!)
+ * 如果queue的数量等于1, 默认用mq-deadline
+ *
+ * 对于sq的设备:
+ * 默认用CONFIG_DEFAULT_IOSCHED
+ */
int blk_init_allocated_queue(struct request_queue *q)
{
WARN_ON_ONCE(q->mq_ops);
@@ -1804,6 +1831,13 @@ void blk_put_request(struct request *req)
}
EXPORT_SYMBOL(blk_put_request);
+/*
+ * called by:
+ * - blk_attempt_plug_merge()
+ * - blk_queue_bio()
+ * - blk_mq_sched_try_merge()
+ * - blk_mq_attempt_merge()
+ */
bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
struct bio *bio)
{
@@ -1826,6 +1860,13 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
return true;
}
+/*
+ * called by:
+ * - blk_attempt_plug_merge()
+ * - blk_queue_bio()
+ * - blk_mq_sched_try_merge()
+ * - blk_mq_attempt_merge()
+ */
bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
struct bio *bio)
{
@@ -1850,6 +1891,12 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
return true;
}
+/*
+ * called by:
+ * - blk_attempt_plug_merge()
+ * - blk_mq_sched_try_merge()
+ * - blk_mq_attempt_merge()
+ */
bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
struct bio *bio)
{
@@ -1896,10 +1943,30 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
*
* Caller must ensure !blk_queue_nomerges(q) beforehand.
*/
+/*
+ * called by:
+ * - blk_queue_bio()
+ * - blk_mq_make_request()
+ *
+ * 试一下能不能和current->plug里的已经存在的request们merge
+ * 如果q->mq_ops存在, 使用mq_list, 否则使用list
+ */
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int *request_count,
struct request **same_queue_rq)
{
+ /*
+ * blk_plug permits building a queue of related requests by holding the I/O
+ * fragments for a short period. This allows merging of sequential requests
+ * into single larger request. As the requests are moved from a per-task list to
+ * the device's request_queue in a batch, this results in improved scalability
+ * as the lock contention for request_queue lock is reduced.
+ *
+ * It is ok not to disable preemption when adding the request to the plug list
+ * or when attempting a merge, because blk_schedule_flush_list() will only flush
+ * the plug list when the task sleeps by itself. For details, please see
+ * schedule() where blk_schedule_flush_plug() is called.
+ */
struct blk_plug *plug;
struct request *rq;
struct list_head *plug_list;
@@ -2026,6 +2093,10 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
* any locks.
*/
if (!blk_queue_nomerges(q)) {
+ /*
+ * 试一下能不能和current->plug里的已经存在的request们merge
+ * 如果q->mq_ops存在, 使用mq_list, 否则使用list
+ */
if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
return BLK_QC_T_NONE;
} else
@@ -2572,6 +2643,15 @@ blk_qc_t submit_bio(struct bio *bio)
}
EXPORT_SYMBOL(submit_bio);
+/*
+ * called by:
+ * - drivers/nvme/target/io-cmd.c|89| <<nvmet_execute_rw>> blk_poll(bdev_get_queue(req->ns->bdev), cookie);
+ * - fs/block_dev.c|240| <<__blkdev_direct_IO_simple>> !blk_poll(bdev_get_queue(bdev), qc))
+ * - fs/block_dev.c|406| <<__blkdev_direct_IO>> !blk_poll(bdev_get_queue(bdev), qc))
+ * - fs/direct-io.c|521| <<dio_await_one>> !blk_poll(dio->bio_disk->queue, dio->bio_cookie))
+ * - fs/iomap.c|1076| <<iomap_dio_rw>> !blk_poll(dio->submit.last_queue,
+ * - mm/page_io.c|411| <<swap_readpage>> if (!blk_poll(disk->queue, qc))
+ */
bool blk_poll(struct request_queue *q, blk_qc_t cookie)
{
if (!q->poll_fn || !blk_qc_t_valid(cookie))
@@ -2628,6 +2708,10 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
* @q: the queue to submit the request
* @rq: the request being queued
*/
+/*
+ * called only by:
+ * - drivers/md/dm-rq.c|410| <<dm_dispatch_clone_request>> r = blk_insert_cloned_request(clone->q, clone);
+ */
blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
{
unsigned long flags;
@@ -2816,6 +2900,10 @@ void blk_account_io_start(struct request *rq, bool new_io)
part_stat_unlock();
}
+/*
+ * called only by:
+ * - blk_peek_request()
+ */
static struct request *elv_next_request(struct request_queue *q)
{
struct request *rq;
@@ -2959,6 +3047,11 @@ struct request *blk_peek_request(struct request_queue *q)
}
EXPORT_SYMBOL(blk_peek_request);
+/*
+ * called only by blk_start_request()
+ *
+ * 把request从rq->queuelist删除
+ */
static void blk_dequeue_request(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -2966,6 +3059,7 @@ static void blk_dequeue_request(struct request *rq)
BUG_ON(list_empty(&rq->queuelist));
BUG_ON(ELV_ON_HASH(rq));
+ /* 把request从rq->queuelist删除 */
list_del_init(&rq->queuelist);
/*
@@ -2994,8 +3088,27 @@ void blk_start_request(struct request *req)
blk_dequeue_request(req);
+ /*
+ * 设置QUEUE_FLAG_STATS:
+ * - block/blk-stat.c|197| <<blk_stat_add_callback>> blk_queue_flag_set(QUEUE_FLAG_STATS, q);
+ * - block/blk-stat.c|259| <<blk_stat_enable_accounting>> blk_queue_flag_set(QUEUE_FLAG_STATS, q);
+ *
+ * 清空QUEUE_FLAG_STATS:
+ * - block/blk-stat.c|216| <<blk_stat_remove_callback>> blk_queue_flag_clear(QUEUE_FLAG_STATS, q);
+ *
+ * 测试QUEUE_FLAG_STATS:
+ * - block/blk-core.c|3091| <<blk_start_request>> if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
+ * - block/blk-mq.c|925| <<blk_mq_start_request>> if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
+ */
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
blk_stat_set_issue(&req->issue_stat, blk_rq_sectors(req));
+ /*
+ * RQF_STATS的状态:
+ * - block/blk-core.c|3105| <<blk_start_request>> req->rq_flags |= RQF_STATS;
+ * - block/blk-core.c|3306| <<blk_finish_request>> if (req->rq_flags & RQF_STATS)
+ * - block/blk-mq.c|745| <<__blk_mq_complete_request>> if (rq->rq_flags & RQF_STATS) {
+ * - block/blk-mq.c|939| <<blk_mq_start_request>> rq->rq_flags |= RQF_STATS;
+ */
req->rq_flags |= RQF_STATS;
wbt_issue(req->q->rq_wb, &req->issue_stat);
}
@@ -3197,6 +3310,13 @@ void blk_finish_request(struct request *req, blk_status_t error)
lockdep_assert_held(req->q->queue_lock);
WARN_ON_ONCE(q->mq_ops);
+ /*
+ * RQF_STATS的状态:
+ * - block/blk-core.c|3105| <<blk_start_request>> req->rq_flags |= RQF_STATS;
+ * - block/blk-core.c|3306| <<blk_finish_request>> if (req->rq_flags & RQF_STATS)
+ * - block/blk-mq.c|745| <<__blk_mq_complete_request>> if (rq->rq_flags & RQF_STATS) {
+ * - block/blk-mq.c|939| <<blk_mq_start_request>> rq->rq_flags |= RQF_STATS;
+ */
if (req->rq_flags & RQF_STATS)
blk_stat_add(req);
@@ -3556,12 +3676,14 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
}
EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
+/* 把一个work放到kblockd_workqueue */
int kblockd_schedule_work(struct work_struct *work)
{
return queue_work(kblockd_workqueue, work);
}
EXPORT_SYMBOL(kblockd_schedule_work);
+/* 把一个work放到某cpu的kblockd_workqueue */
int kblockd_schedule_work_on(int cpu, struct work_struct *work)
{
return queue_work_on(cpu, kblockd_workqueue, work);
@@ -3571,6 +3693,7 @@ EXPORT_SYMBOL(kblockd_schedule_work_on);
int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
unsigned long delay)
{
+ /* delay: number of jiffies to wait before queueing */
return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
}
EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
@@ -3589,6 +3712,12 @@ EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
* plug. By flushing the pending I/O when the process goes to sleep, we avoid
* this kind of deadlock.
*/
+/*
+ * plug的故事似乎是这样:
+ * 1. 先blk_start_plug(), 好多好多地方都调用了blk_start_plug(), 参数plug一般是caller的本地变量
+ * 2. 做好多操作
+ * 3. 最后blk_finish_plug()
+ */
void blk_start_plug(struct blk_plug *plug)
{
struct task_struct *tsk = current;
@@ -3610,11 +3739,15 @@ void blk_start_plug(struct blk_plug *plug)
}
EXPORT_SYMBOL(blk_start_plug);
+/*
+ * called used by blk_flush_plug_list() as sorting comparasion
+ */
static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
{
struct request *rqa = container_of(a, struct request, queuelist);
struct request *rqb = container_of(b, struct request, queuelist);
+ /* blk_rq_pos(): the current sector */
return !(rqa->q < rqb->q ||
(rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb)));
}
@@ -3625,6 +3758,11 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
* additional stack usage in driver dispatch, in places where the originally
* plugger did not intend it.
*/
+/*
+ * called only by blk_flush_plug_list() 两处
+ *
+ * 根据from_schedule决定是用blk_run_queue_async()还是__blk_run_queue()
+ */
static void queue_unplugged(struct request_queue *q, unsigned int depth,
bool from_schedule)
__releases(q->queue_lock)
@@ -3640,11 +3778,17 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
spin_unlock(q->queue_lock);
}
+/*
+ * called only by blk_flush_plug_list()
+ *
+ * 把plug->cb_list中的entry删掉, 遍历每一个entry (struct blk_plug_cb) 调用callback
+ */
static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
{
LIST_HEAD(callbacks);
while (!list_empty(&plug->cb_list)) {
+ /* 从plug->cb_list加到callbacks */
list_splice_init(&plug->cb_list, &callbacks);
while (!list_empty(&callbacks)) {
@@ -3657,6 +3801,18 @@ static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
}
}
+/*
+ * called by:
+ * - drivers/block/drbd/drbd_req.c|1313| <<drbd_check_plugged>>
+ * - drivers/block/umem.c|522| <<mm_check_plugged>>
+ * - drivers/md/raid1.c|1513| <<raid1_write_request>>
+ * - drivers/md/raid10.c|1290| <<raid10_write_one_disk>>
+ * - drivers/md/raid5.c|5442| <<release_stripe_plug>>
+ * - fs/btrfs/raid56.c|1786| <<raid56_parity_write>>
+ *
+ * 遍历current的plug->cb_list, 根据参数unplug和data寻找已有的struct blk_plug*
+ * 有的话就返回 没有创建新的加入到plug->cb_list
+ */
struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
int size)
{
@@ -3682,6 +3838,18 @@ struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
}
EXPORT_SYMBOL(blk_check_plugged);
+/*
+ * called by:
+ * - blk_queue_bio()
+ * - blk_poll()
+ * - blk_finish_plug()
+ * - blk_mq_make_request() -- 只有一个队列的情况
+ * - blk_flush_plug()
+ * - blk_schedule_flush_plug()
+ *
+ * 核心思想是遍历plug->list上的request, 然后把request应__elv_add_request()加入到queue
+ * 清空plug->list
+ */
void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
struct request_queue *q;
@@ -3690,6 +3858,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
LIST_HEAD(list);
unsigned int depth;
+ /* 把plug->cb_list中的entry删掉, 遍历每一个entry (struct blk_plug_cb) 调用callback */
flush_plug_callbacks(plug, from_schedule);
if (!list_empty(&plug->mq_list))
@@ -3698,6 +3867,11 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
if (list_empty(&plug->list))
return;
+ /*
+ * blk_plug有list, mq_list和cb_list 3个list_head
+ *
+ * list里是request
+ */
list_splice_init(&plug->list, &list);
list_sort(NULL, &list, plug_rq_cmp);
@@ -3709,6 +3883,12 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
* Save and disable interrupts here, to avoid doing it for every
* queue lock we have to take.
*/
+ /*
+ * 上面有list_splice_init(&plug->list, &list);
+ *
+ * 这里等价遍历plug->list上的request, 然后把request应__elv_add_request()加入到queue
+ * 清空plug->list
+ */
local_irq_save(flags);
while (!list_empty(&list)) {
rq = list_entry_rq(list.next);
@@ -3718,6 +3898,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
/*
* This drops the queue lock
*/
+ /* 根据from_schedule决定是用blk_run_queue_async()还是__blk_run_queue() */
if (q)
queue_unplugged(q, depth, from_schedule);
q = rq->q;
@@ -3736,6 +3917,11 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
/*
* rq is already accounted, so use raw insert
*/
+ /*
+ * 这里是核心代码
+ *
+ * 把request加入到rq->queuelist
+ */
if (op_is_flush(rq->cmd_flags))
__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
else
@@ -3747,6 +3933,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
/*
* This drops the queue lock
*/
+ /* 根据from_schedule决定是用blk_run_queue_async()还是__blk_run_queue() */
if (q)
queue_unplugged(q, depth, from_schedule);
@@ -3785,6 +3972,11 @@ EXPORT_SYMBOL(blk_finish_plug);
* The block layer runtime PM is request based, so only works for drivers
* that use request as their IO unit instead of those directly use bio's.
*/
+/*
+ * called by:
+ * - sd_probe_async() drivers/scsi/sd.c
+ * - sr_probe() drivers/scsi/sr.c
+ */
void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
{
/* not support for RQF_PM and ->rpm_status in blk-mq yet */
@@ -3819,6 +4011,9 @@ EXPORT_SYMBOL(blk_pm_runtime_init);
* 0 - OK to runtime suspend the device
* -EBUSY - Device should not be runtime suspended
*/
+/*
+ * called only by sdev_runtime_suspend() drivers/scsi/scsi_pm.c
+ */
int blk_pre_runtime_suspend(struct request_queue *q)
{
int ret = 0;
@@ -3851,6 +4046,9 @@ EXPORT_SYMBOL(blk_pre_runtime_suspend);
* This function should be called near the end of the device's
* runtime_suspend callback.
*/
+/*
+ * called only by sdev_runtime_suspend() drivers/scsi/scsi_pm.c
+ */
void blk_post_runtime_suspend(struct request_queue *q, int err)
{
if (!q->dev)
@@ -3878,6 +4076,9 @@ EXPORT_SYMBOL(blk_post_runtime_suspend);
* This function should be called near the start of the device's
* runtime_resume callback.
*/
+/*
+ * called obly by sdev_runtime_resume() drivers/scsi/scsi_pm.c
+ */
void blk_pre_runtime_resume(struct request_queue *q)
{
if (!q->dev)
@@ -3903,6 +4104,9 @@ EXPORT_SYMBOL(blk_pre_runtime_resume);
* This function should be called near the end of the device's
* runtime_resume callback.
*/
+/*
+ * called only by sdev_runtime_resume() drivers/scsi/scsi_pm.c
+ */
void blk_post_runtime_resume(struct request_queue *q, int err)
{
if (!q->dev)
@@ -3935,6 +4139,9 @@ EXPORT_SYMBOL(blk_post_runtime_resume);
* runtime PM status and re-enable peeking requests from the queue. It
* should be called before first request is added to the queue.
*/
+/*
+ * called only by scsi_bus_resume_common()
+ */
void blk_set_runtime_active(struct request_queue *q)
{
spin_lock_irq(q->queue_lock);
@@ -3946,6 +4153,7 @@ void blk_set_runtime_active(struct request_queue *q)
EXPORT_SYMBOL(blk_set_runtime_active);
#endif
+/* called only by genhd_device_init() */
int __init blk_dev_init(void)
{
BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
diff --git a/block/blk-flush.c b/block/blk-flush.c
index f171706..124c202 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -132,6 +132,11 @@ static void blk_flush_restore_request(struct request *rq)
rq->end_io = rq->flush.saved_end_io;
}
+/*
+ * called by:
+ * - blk_flush_complete_seq()
+ * - blk_kick_flush()
+ */
static bool blk_flush_queue_rq(struct request *rq, bool add_front)
{
if (rq->q->mq_ops) {
@@ -431,6 +436,12 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
* @rq is being submitted. Analyze what needs to be done and put it on the
* right queue.
*/
+/*
+ * called by:
+ * - blk_mq_sched_insert_request()
+ * - blk_mq_make_request()
+ * - __elv_add_request()
+ */
void blk_insert_flush(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -518,6 +529,9 @@ void blk_insert_flush(struct request *rq)
* room for storing the error offset in case of a flush error, if they
* wish to.
*/
+/*
+ * 被文件系统调用的很多
+ */
int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
sector_t *error_sector)
{
@@ -560,6 +574,11 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
}
EXPORT_SYMBOL(blkdev_issue_flush);
+/*
+ * called by:
+ * - blk_init_allocated_queue()
+ * - blk_mq_init_hctx()
+ */
struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
int node, int cmd_size)
{
@@ -574,6 +593,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
spin_lock_init(&fq->mq_flush_lock);
rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
+ /* 就分配了一个request */
fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
if (!fq->flush_rq)
goto fail_rq;
@@ -596,6 +616,7 @@ void blk_free_flush_queue(struct blk_flush_queue *fq)
if (!fq)
return;
+ /* flush_rq就是一个元素 */
kfree(fq->flush_rq);
kfree(fq);
}
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index feb3057..d23db2f 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -406,6 +406,15 @@ static const struct blk_integrity_profile nop_profile = {
* struct with values appropriate for the underlying hardware. See
* Documentation/block/data-integrity.txt.
*/
+/*
+ * called by:
+ * - dm_integrity_set() -- drivers/md/dm-integrity.c
+ * - dm_table_register_integrity() -- drivers/md/dm-table.c
+ * - md_integrity_register() -- drivers/md/md.c
+ * - nd_integrity_init() -- drivers/nvdimm/core.c
+ * - nvme_init_integrity() -- drivers/nvme/host/core.c
+ * - sd_dif_config_host() -- drivers/scsi/sd_dif.c
+ */
void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
{
struct blk_integrity *bi = &disk->queue->integrity;
@@ -436,6 +445,9 @@ void blk_integrity_unregister(struct gendisk *disk)
}
EXPORT_SYMBOL(blk_integrity_unregister);
+/*
+ * called only by __device_add_disk()
+ */
void blk_integrity_add(struct gendisk *disk)
{
if (kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype,
@@ -445,6 +457,9 @@ void blk_integrity_add(struct gendisk *disk)
kobject_uevent(&disk->integrity_kobj, KOBJ_ADD);
}
+/*
+ * called only by del_gendisk()
+ */
void blk_integrity_del(struct gendisk *disk)
{
kobject_uevent(&disk->integrity_kobj, KOBJ_REMOVE);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 481dc02..2eba4d9 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -673,24 +673,48 @@ static void blk_account_io_merge(struct request *req)
* For non-mq, this has to be called with the request spinlock acquired.
* For mq with scheduling, the appropriate queue wide lock should be held.
*/
+/*
+ * called by:
+ * - attempt_back_merge()
+ * - attempt_front_merge()
+ * - blk_attempt_req_merge()
+ *
+ * 把req和next合并 返回next
+ */
static struct request *attempt_merge(struct request_queue *q,
struct request *req, struct request *next)
{
if (!q->mq_ops)
lockdep_assert_held(q->queue_lock);
+ /*
+ * 判断这个request是否允许merge,
+ * 比如scsi passthrough或者Driver private requests不能被merge
+ *
+ * 两个(req和next)必须都允许merge
+ */
if (!rq_mergeable(req) || !rq_mergeable(next))
return NULL;
+ /* request必须有一样的op */
if (req_op(req) != req_op(next))
return NULL;
/*
* not contiguous
*/
+ /*
+ * blk_rq_pos(req) ----> rq->__sector: the current sector
+ * blk_rq_sectors(req) ----> rq->__data_len >> SECTOR_SHIFT
+ *
+ * 如果两个request前后不连续就不能merge
+ */
if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
return NULL;
+ /*
+ * rq_disk是struct gendisk
+ */
if (rq_data_dir(req) != rq_data_dir(next)
|| req->rq_disk != next->rq_disk
|| req_no_special_merge(next))
@@ -767,26 +791,51 @@ static struct request *attempt_merge(struct request_queue *q,
return next;
}
+/*
+ * called by:
+ * - blk_queue_bio()
+ * - blk_mq_sched_try_merge()
+ *
+ * 取出rq在request_queue中的下一个request(next)
+ * 把rq和next合并 返回next
+ */
struct request *attempt_back_merge(struct request_queue *q, struct request *rq)
{
+ /* 取出rq在request_queue中的下一个request */
struct request *next = elv_latter_request(q, rq);
+ /* 把rq和next合并 返回next */
if (next)
return attempt_merge(q, rq, next);
return NULL;
}
+/*
+ * called by:
+ * - blk_queue_bio()
+ * - blk_mq_sched_try_merge()
+ *
+ * 取出rq在request_queue中的上一个request(prev)
+ * 把rq和prev合并 返回prev
+ */
struct request *attempt_front_merge(struct request_queue *q, struct request *rq)
{
+ /* 取出rq在request_queue中的上一个request */
struct request *prev = elv_former_request(q, rq);
+ /* 把rq和prev合并 返回prev */
if (prev)
return attempt_merge(q, prev, rq);
return NULL;
}
+/*
+ * 被elv_attempt_insert_merge()调用两处
+ *
+ * 把req和next合并 释放next
+ */
int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
struct request *next)
{
@@ -797,6 +846,7 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next))
return 0;
+ /* 把rq和next合并 返回next */
free = attempt_merge(q, rq, next);
if (free) {
__blk_put_request(q, free);
@@ -806,6 +856,14 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
return 0;
}
+/*
+ * called by:
+ * - blk_attempt_plug_merge()
+ * - blk_mq_attempt_merge()
+ * - elv_bio_merge_ok()
+ *
+ * 通过request和bio的属性确认他们能否merge
+ */
bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
{
if (!rq_mergeable(rq) || !bio_mergeable(bio))
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 3eb169f..dda55f2 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -14,11 +14,13 @@
#include "blk.h"
#include "blk-mq.h"
+/* 两次被blk_mq_map_queues()调用 */
static int cpu_to_queue_index(unsigned int nr_queues, const int cpu)
{
return cpu % nr_queues;
}
+/* called only by blk_mq_map_queues() */
static int get_first_sibling(unsigned int cpu)
{
unsigned int ret;
@@ -30,8 +32,19 @@ static int get_first_sibling(unsigned int cpu)
return cpu;
}
+/*
+ * called by:
+ * - blk_mq_rdma_map_queues() -- block/blk-mq-rdma.c
+ * - blk_mq_virtio_map_queues() -- block/blk-mq-virtio.c
+ * - blk_mq_update_queue_map() -- block/blk-mq.c
+ * - qla2xxx_map_queues() -- drivers/scsi/qla2xxx/qla_os.c
+ * - scsi_map_queues() 两次 -- drivers/scsi/scsi_lib.c
+ *
+ * 设置set->mq_map, 初始化里面每一个元素
+ */
int blk_mq_map_queues(struct blk_mq_tag_set *set)
{
+ /* mq_map是cpu个元素的数组 */
unsigned int *map = set->mq_map;
unsigned int nr_queues = set->nr_hw_queues;
unsigned int cpu, first_sibling;
@@ -62,6 +75,14 @@ EXPORT_SYMBOL_GPL(blk_mq_map_queues);
* We have no quick way of doing reverse lookups. This is only used at
* queue init time, so runtime isn't important.
*/
+/*
+ * called by:
+ * - blk_mq_alloc_rq_map()
+ * - blk_mq_alloc_rqs()
+ * - blk_mq_realloc_hw_ctxs()
+ *
+ * index是hw queue的index 返回hw queue index对应的numa node
+ */
int blk_mq_hw_queue_to_node(unsigned int *mq_map, unsigned int index)
{
int i;
diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c
index e233996..1f1b3b0 100644
--- a/block/blk-mq-pci.c
+++ b/block/blk-mq-pci.c
@@ -29,17 +29,26 @@
* that maps a queue to the CPUs that have irq affinity for the corresponding
* vector.
*/
+/*
+ * called by:
+ * - nvme_pci_map_queues() -- drivers/nvme/host/pci.c
+ * - qla2xxx_map_queues() -- drivers/scsi/qla2xxx/qla_os.c
+ * - pqi_map_queues() -- drivers/scsi/smartpqi/smartpqi_init.c
+ */
int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev,
int offset)
{
+ /* blk_mq_tag_set中包含struct blk_mq_tags指针数组 */
const struct cpumask *mask;
unsigned int queue, cpu;
for (queue = 0; queue < set->nr_hw_queues; queue++) {
+ /* 返回vector (queue+offset)的affinity */
mask = pci_irq_get_affinity(pdev, queue + offset);
if (!mask)
goto fallback;
+ /* 如果一个interrupt对多个cpu怎么办?? */
for_each_cpu(cpu, mask)
set->mq_map[cpu] = queue;
}
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 25c14c5..f33da05 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -16,12 +16,16 @@
#include "blk-mq-tag.h"
#include "blk-wbt.h"
+/*
+ * 目前没人调用
+ */
void blk_mq_sched_free_hctx_data(struct request_queue *q,
void (*exit)(struct blk_mq_hw_ctx *))
{
struct blk_mq_hw_ctx *hctx;
int i;
+ /* (q)->queue_hw_ctx[i]; */
queue_for_each_hw_ctx(q, hctx, i) {
if (exit && hctx->sched_data)
exit(hctx);
@@ -31,6 +35,9 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
+/*
+ * called only by blk_mq_get_request()
+ */
void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
{
struct request_queue *q = rq->q;
@@ -54,6 +61,9 @@ void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
* Mark a hardware queue as needing a restart. For shared queues, maintain
* a count of how many hardware queues are marked for restart.
*/
+/*
+ * called only by blk_mq_sched_dispatch_requests()
+ */
static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
@@ -68,6 +78,9 @@ static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
+/*
+ * called only by blk_mq_sched_dispatch_requests()
+ */
static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)