diff --git a/MAINTAINERS b/MAINTAINERS index 379387e20a96d..07a9c274c0e29 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6027,7 +6027,7 @@ S: Supported F: Documentation/networking/devlink F: include/net/devlink.h F: include/uapi/linux/devlink.h -F: net/core/devlink.c +F: net/devlink/ DH ELECTRONICS IMX6 DHCOM BOARD SUPPORT M: Christoph Niedermaier diff --git a/Makefile b/Makefile index 8a1be042d807f..4ba914c3f81f5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 49 +SUBLEVEL = 50 NAME = Curry Ramen ifndef EXTRAVERSION diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index c0983130a44c9..e0a4da4cfd8bc 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -121,7 +121,24 @@ #define cpu_has_4k_cache __isa_ge_or_opt(1, MIPS_CPU_4K_CACHE) #endif #ifndef cpu_has_octeon_cache -#define cpu_has_octeon_cache 0 +#define cpu_has_octeon_cache \ +({ \ + int __res; \ + \ + switch (boot_cpu_type()) { \ + case CPU_CAVIUM_OCTEON: \ + case CPU_CAVIUM_OCTEON_PLUS: \ + case CPU_CAVIUM_OCTEON2: \ + case CPU_CAVIUM_OCTEON3: \ + __res = 1; \ + break; \ + \ + default: \ + __res = 0; \ + } \ + \ + __res; \ +}) #endif /* Don't override `cpu_has_fpu' to 1 or the "nofpu" option won't work. */ #ifndef cpu_has_fpu @@ -351,7 +368,7 @@ ({ \ int __res; \ \ - switch (current_cpu_type()) { \ + switch (boot_cpu_type()) { \ case CPU_M14KC: \ case CPU_74K: \ case CPU_1074K: \ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6bf8dc0b8f935..d702359f8ab5e 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -447,24 +447,30 @@ config TOOLCHAIN_HAS_ZIHINTPAUSE config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI def_bool y # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc - depends on AS_IS_GNU && AS_VERSION >= 23800 + # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=98416dbb0a62579d4a7a4a76bab51b5b52fec2cd + depends on AS_IS_GNU && AS_VERSION >= 23600 help - Newer binutils versions default to ISA spec version 20191213 which - moves some instructions from the I extension to the Zicsr and Zifencei - extensions. + Binutils-2.38 and GCC-12.1.0 bumped the default ISA spec to the newer + 20191213 version, which moves some instructions from the I extension to + the Zicsr and Zifencei extensions. This requires explicitly specifying + Zicsr and Zifencei when binutils >= 2.38 or GCC >= 12.1.0. Zicsr + and Zifencei are supported in binutils from version 2.36 onwards. + To make life easier, and avoid forcing toolchains that default to a + newer ISA spec to version 2.2, relax the check to binutils >= 2.36. + For clang < 17 or GCC < 11.3.0, for which this is not possible or need + special treatment, this is dealt with in TOOLCHAIN_NEEDS_OLD_ISA_SPEC. config TOOLCHAIN_NEEDS_OLD_ISA_SPEC def_bool y depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI # https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16 - depends on CC_IS_CLANG && CLANG_VERSION < 170000 + # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d29f5d6ab513c52fd872f532c492e35ae9fd6671 + depends on (CC_IS_CLANG && CLANG_VERSION < 170000) || (CC_IS_GCC && GCC_VERSION < 110300) help - Certain versions of clang do not support zicsr and zifencei via -march - but newer versions of binutils require it for the reasons noted in the - help text of CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. This - option causes an older ISA spec compatible with these older versions - of clang to be passed to GAS, which has the same result as passing zicsr - and zifencei to -march. + Certain versions of clang and GCC do not support zicsr and zifencei via + -march. This option causes an older ISA spec compatible with these older + versions of clang and GCC to be passed to GAS, which has the same result + as passing zicsr and zifencei to -march. config FPU bool "FPU support" diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 7f34f3c7c8827..737c0857b14cd 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -11,7 +11,13 @@ compat_vdso-syms += flush_icache COMPAT_CC := $(CC) COMPAT_LD := $(LD) -COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32 +# binutils 2.35 does not support the zifencei extension, but in the ISA +# spec 20191213, G stands for IMAFD_ZICSR_ZIFENCEI. +ifdef CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI + COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32 +else + COMPAT_CC_FLAGS := -march=rv32imafd -mabi=ilp32 +endif COMPAT_LD_FLAGS := -melf32lriscv # Disable attributes, as they're useless and break the build. diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h index 9fcfa5c4dad79..71b5059e092ab 100644 --- a/arch/x86/kernel/fpu/context.h +++ b/arch/x86/kernel/fpu/context.h @@ -19,8 +19,7 @@ * FPU state for a task MUST let the rest of the kernel know that the * FPU registers are no longer valid for this task. * - * Either one of these invalidation functions is enough. Invalidate - * a resource you control: CPU if using the CPU for something else + * Invalidate a resource you control: CPU if using the CPU for something else * (with preemption disabled), FPU for the current task, or a task that * is prevented from running by the current task. */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index caf33486dc5ee..a083f9ac9e4f6 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -679,7 +679,7 @@ static void fpu_reset_fpregs(void) struct fpu *fpu = ¤t->thread.fpu; fpregs_lock(); - fpu__drop(fpu); + __fpu_invalidate_fpregs_state(fpu); /* * This does not change the actual hardware registers. It just * resets the memory image and sets TIF_NEED_FPU_LOAD so a diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 0bab497c94369..1afbc4866b100 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -882,6 +882,13 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) goto out_disable; } + /* + * CPU capabilities initialization runs before FPU init. So + * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely + * functional, set the feature bit so depending code works. + */ + setup_force_cpu_cap(X86_FEATURE_OSXSAVE); + print_xstate_offset_size(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", fpu_kernel_cfg.max_features, diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 230108a90cf39..beca03556379d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4212,7 +4212,8 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) * root was invalidated by a memslot update or a relevant mmu_notifier fired. */ static bool is_page_fault_stale(struct kvm_vcpu *vcpu, - struct kvm_page_fault *fault, int mmu_seq) + struct kvm_page_fault *fault, + unsigned long mmu_seq) { struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 672f0432d7777..70945f00ec412 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -51,7 +51,17 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) if (!kvm->arch.tdp_mmu_enabled) return; - /* Also waits for any queued work items. */ + /* + * Invalidate all roots, which besides the obvious, schedules all roots + * for zapping and thus puts the TDP MMU's reference to each root, i.e. + * ultimately frees all roots. + */ + kvm_tdp_mmu_invalidate_all_roots(kvm); + + /* + * Destroying a workqueue also first flushes the workqueue, i.e. no + * need to invoke kvm_tdp_mmu_zap_invalidated_roots(). + */ destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages)); @@ -127,16 +137,6 @@ static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root queue_work(kvm->arch.tdp_mmu_zap_wq, &root->tdp_mmu_async_work); } -static inline bool kvm_tdp_root_mark_invalid(struct kvm_mmu_page *page) -{ - union kvm_mmu_page_role role = page->role; - role.invalid = true; - - /* No need to use cmpxchg, only the invalid bit can change. */ - role.word = xchg(&page->role.word, role.word); - return role.invalid; -} - void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared) { @@ -145,45 +145,12 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) return; - WARN_ON(!root->tdp_mmu_page); - /* - * The root now has refcount=0. It is valid, but readers already - * cannot acquire a reference to it because kvm_tdp_mmu_get_root() - * rejects it. This remains true for the rest of the execution - * of this function, because readers visit valid roots only - * (except for tdp_mmu_zap_root_work(), which however - * does not acquire any reference itself). - * - * Even though there are flows that need to visit all roots for - * correctness, they all take mmu_lock for write, so they cannot yet - * run concurrently. The same is true after kvm_tdp_root_mark_invalid, - * since the root still has refcount=0. - * - * However, tdp_mmu_zap_root can yield, and writers do not expect to - * see refcount=0 (see for example kvm_tdp_mmu_invalidate_all_roots()). - * So the root temporarily gets an extra reference, going to refcount=1 - * while staying invalid. Readers still cannot acquire any reference; - * but writers are now allowed to run if tdp_mmu_zap_root yields and - * they might take an extra reference if they themselves yield. - * Therefore, when the reference is given back by the worker, - * there is no guarantee that the refcount is still 1. If not, whoever - * puts the last reference will free the page, but they will not have to - * zap the root because a root cannot go from invalid to valid. + * The TDP MMU itself holds a reference to each root until the root is + * explicitly invalidated, i.e. the final reference should be never be + * put for a valid root. */ - if (!kvm_tdp_root_mark_invalid(root)) { - refcount_set(&root->tdp_mmu_root_count, 1); - - /* - * Zapping the root in a worker is not just "nice to have"; - * it is required because kvm_tdp_mmu_invalidate_all_roots() - * skips already-invalid roots. If kvm_tdp_mmu_put_root() did - * not add the root to the workqueue, kvm_tdp_mmu_zap_all_fast() - * might return with some roots not zapped yet. - */ - tdp_mmu_schedule_zap_root(kvm, root); - return; - } + KVM_BUG_ON(!is_tdp_mmu_page(root) || !root->role.invalid, kvm); spin_lock(&kvm->arch.tdp_mmu_pages_lock); list_del_rcu(&root->link); @@ -329,7 +296,14 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) root = tdp_mmu_alloc_sp(vcpu); tdp_mmu_init_sp(root, NULL, 0, role); - refcount_set(&root->tdp_mmu_root_count, 1); + /* + * TDP MMU roots are kept until they are explicitly invalidated, either + * by a memslot update or by the destruction of the VM. Initialize the + * refcount to two; one reference for the vCPU, and one reference for + * the TDP MMU itself, which is held until the root is invalidated and + * is ultimately put by tdp_mmu_zap_root_work(). + */ + refcount_set(&root->tdp_mmu_root_count, 2); spin_lock(&kvm->arch.tdp_mmu_pages_lock); list_add_rcu(&root->link, &kvm->arch.tdp_mmu_roots); @@ -1027,32 +1001,49 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) /* * Mark each TDP MMU root as invalid to prevent vCPUs from reusing a root that * is about to be zapped, e.g. in response to a memslots update. The actual - * zapping is performed asynchronously, so a reference is taken on all roots. - * Using a separate workqueue makes it easy to ensure that the destruction is - * performed before the "fast zap" completes, without keeping a separate list - * of invalidated roots; the list is effectively the list of work items in - * the workqueue. - * - * Get a reference even if the root is already invalid, the asynchronous worker - * assumes it was gifted a reference to the root it processes. Because mmu_lock - * is held for write, it should be impossible to observe a root with zero refcount, - * i.e. the list of roots cannot be stale. + * zapping is performed asynchronously. Using a separate workqueue makes it + * easy to ensure that the destruction is performed before the "fast zap" + * completes, without keeping a separate list of invalidated roots; the list is + * effectively the list of work items in the workqueue. * - * This has essentially the same effect for the TDP MMU - * as updating mmu_valid_gen does for the shadow MMU. + * Note, the asynchronous worker is gifted the TDP MMU's reference. + * See kvm_tdp_mmu_get_vcpu_root_hpa(). */ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) { struct kvm_mmu_page *root; - lockdep_assert_held_write(&kvm->mmu_lock); - list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { - if (!root->role.invalid && - !WARN_ON_ONCE(!kvm_tdp_mmu_get_root(root))) { + /* + * mmu_lock must be held for write to ensure that a root doesn't become + * invalid while there are active readers (invalidating a root while + * there are active readers may or may not be problematic in practice, + * but it's uncharted territory and not supported). + * + * Waive the assertion if there are no users of @kvm, i.e. the VM is + * being destroyed after all references have been put, or if no vCPUs + * have been created (which means there are no roots), i.e. the VM is + * being destroyed in an error path of KVM_CREATE_VM. + */ + if (IS_ENABLED(CONFIG_PROVE_LOCKING) && + refcount_read(&kvm->users_count) && kvm->created_vcpus) + lockdep_assert_held_write(&kvm->mmu_lock); + + /* + * As above, mmu_lock isn't held when destroying the VM! There can't + * be other references to @kvm, i.e. nothing else can invalidate roots + * or be consuming roots, but walking the list of roots does need to be + * guarded against roots being deleted by the asynchronous zap worker. + */ + rcu_read_lock(); + + list_for_each_entry_rcu(root, &kvm->arch.tdp_mmu_roots, link) { + if (!root->role.invalid) { root->role.invalid = true; tdp_mmu_schedule_zap_root(kvm, root); } } + + rcu_read_unlock(); } /* diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 4459cfbdbcb18..c2f0f74193f0e 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1223,9 +1223,6 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, __func__, cmd->cmd_op, ub_cmd->q_id, tag, ub_cmd->result); - if (!(issue_flags & IO_URING_F_SQE128)) - goto out; - if (ub_cmd->q_id >= ub->dev_info.nr_hw_queues) goto out; diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c index 4fb4fd4b06bda..737aa70e2cb3d 100644 --- a/drivers/clk/clk-devres.c +++ b/drivers/clk/clk-devres.c @@ -205,18 +205,19 @@ EXPORT_SYMBOL(devm_clk_put); struct clk *devm_get_clk_from_child(struct device *dev, struct device_node *np, const char *con_id) { - struct clk **ptr, *clk; + struct devm_clk_state *state; + struct clk *clk; - ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL); - if (!ptr) + state = devres_alloc(devm_clk_release, sizeof(*state), GFP_KERNEL); + if (!state) return ERR_PTR(-ENOMEM); clk = of_clk_get_by_name(np, con_id); if (!IS_ERR(clk)) { - *ptr = clk; - devres_add(dev, ptr); + state->clk = clk; + devres_add(dev, state); } else { - devres_free(ptr); + devres_free(state); } return clk; diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 348b3a9170fa4..7f5ed1aa7a9f8 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -191,6 +191,7 @@ static const struct dma_fence_ops timeline_fence_ops = { */ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) { + LIST_HEAD(signalled); struct sync_pt *pt, *next; trace_sync_timeline(obj); @@ -203,21 +204,20 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) if (!timeline_fence_signaled(&pt->base)) break; - list_del_init(&pt->link); + dma_fence_get(&pt->base); + + list_move_tail(&pt->link, &signalled); rb_erase(&pt->node, &obj->pt_tree); - /* - * A signal callback may release the last reference to this - * fence, causing it to be freed. That operation has to be - * last to avoid a use after free inside this loop, and must - * be after we remove the fence from the timeline in order to - * prevent deadlocking on timeline->lock inside - * timeline_fence_release(). - */ dma_fence_signal_locked(&pt->base); } spin_unlock_irq(&obj->lock); + + list_for_each_entry_safe(pt, next, &signalled, link) { + list_del_init(&pt->link); + dma_fence_put(&pt->base); + } } /** diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index fef12e57b1f13..b352775e5e0b8 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -290,6 +290,15 @@ static void gpio_sim_mutex_destroy(void *data) mutex_destroy(lock); } +static void gpio_sim_dispose_mappings(void *data) +{ + struct gpio_sim_chip *chip = data; + unsigned int i; + + for (i = 0; i < chip->gc.ngpio; i++) + irq_dispose_mapping(irq_find_mapping(chip->irq_sim, i)); +} + static void gpio_sim_sysfs_remove(void *data) { struct gpio_sim_chip *chip = data; @@ -398,10 +407,14 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev) if (!chip->pull_map) return -ENOMEM; - chip->irq_sim = devm_irq_domain_create_sim(dev, NULL, num_lines); + chip->irq_sim = devm_irq_domain_create_sim(dev, swnode, num_lines); if (IS_ERR(chip->irq_sim)) return PTR_ERR(chip->irq_sim); + ret = devm_add_action_or_reset(dev, gpio_sim_dispose_mappings, chip); + if (ret) + return ret; + mutex_init(&chip->lock); ret = devm_add_action_or_reset(dev, gpio_sim_mutex_destroy, &chip->lock); diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c index a032003c340cc..d6ea47873627f 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.c +++ b/drivers/gpu/drm/arm/hdlcd_drv.c @@ -290,7 +290,7 @@ static int hdlcd_drm_bind(struct device *dev) */ if (hdlcd_read(hdlcd, HDLCD_REG_COMMAND)) { hdlcd_write(hdlcd, HDLCD_REG_COMMAND, 0); - drm_aperture_remove_framebuffers(false, &hdlcd_driver); + drm_aperture_remove_framebuffers(&hdlcd_driver); } drm_mode_config_reset(drm); diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c index 142668cd6d7cd..688ba358f5319 100644 --- a/drivers/gpu/drm/armada/armada_drv.c +++ b/drivers/gpu/drm/armada/armada_drv.c @@ -95,7 +95,7 @@ static int armada_drm_bind(struct device *dev) } /* Remove early framebuffers */ - ret = drm_aperture_remove_framebuffers(false, &armada_drm_driver); + ret = drm_aperture_remove_framebuffers(&armada_drm_driver); if (ret) { dev_err(dev, "[" DRM_NAME ":%s] can't kick out simple-fb: %d\n", __func__, ret); diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index b9392f31e6291..800471f2a2037 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -89,27 +89,13 @@ static const struct pci_device_id ast_pciidlist[] = { MODULE_DEVICE_TABLE(pci, ast_pciidlist); -static int ast_remove_conflicting_framebuffers(struct pci_dev *pdev) -{ - bool primary = false; - resource_size_t base, size; - - base = pci_resource_start(pdev, 0); - size = pci_resource_len(pdev, 0); -#ifdef CONFIG_X86 - primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW; -#endif - - return drm_aperture_remove_conflicting_framebuffers(base, size, primary, &ast_driver); -} - static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct ast_private *ast; struct drm_device *dev; int ret; - ret = ast_remove_conflicting_framebuffers(pdev); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &ast_driver); if (ret) return ret; diff --git a/drivers/gpu/drm/drm_aperture.c b/drivers/gpu/drm/drm_aperture.c index 3b8fdeeafd53a..697cffbfd6037 100644 --- a/drivers/gpu/drm/drm_aperture.c +++ b/drivers/gpu/drm/drm_aperture.c @@ -32,17 +32,13 @@ * * static int remove_conflicting_framebuffers(struct pci_dev *pdev) * { - * bool primary = false; * resource_size_t base, size; * int ret; * * base = pci_resource_start(pdev, 0); * size = pci_resource_len(pdev, 0); - * #ifdef CONFIG_X86 - * primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW; - * #endif * - * return drm_aperture_remove_conflicting_framebuffers(base, size, primary, + * return drm_aperture_remove_conflicting_framebuffers(base, size, * &example_driver); * } * @@ -161,7 +157,6 @@ EXPORT_SYMBOL(devm_aperture_acquire_from_firmware); * drm_aperture_remove_conflicting_framebuffers - remove existing framebuffers in the given range * @base: the aperture's base address in physical memory * @size: aperture size in bytes - * @primary: also kick vga16fb if present * @req_driver: requesting DRM driver * * This function removes graphics device drivers which use the memory range described by @@ -171,9 +166,9 @@ EXPORT_SYMBOL(devm_aperture_acquire_from_firmware); * 0 on success, or a negative errno code otherwise */ int drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size, - bool primary, const struct drm_driver *req_driver) + const struct drm_driver *req_driver) { - return aperture_remove_conflicting_devices(base, size, primary, req_driver->name); + return aperture_remove_conflicting_devices(base, size, false, req_driver->name); } EXPORT_SYMBOL(drm_aperture_remove_conflicting_framebuffers); diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c index cd9c73f5a64ab..738eb558a97e9 100644 --- a/drivers/gpu/drm/gma500/psb_drv.c +++ b/drivers/gpu/drm/gma500/psb_drv.c @@ -424,12 +424,17 @@ static int psb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* * We cannot yet easily find the framebuffer's location in memory. So - * remove all framebuffers here. + * remove all framebuffers here. Note that we still want the pci special + * handling to kick out vgacon. * * TODO: Refactor psb_driver_load() to map vdc_reg earlier. Then we * might be able to read the framebuffer range from the device. */ - ret = drm_aperture_remove_framebuffers(true, &driver); + ret = drm_aperture_remove_framebuffers(&driver); + if (ret) + return ret; + + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); if (ret) return ret; diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index ca127ff797f75..29ee0814bccc8 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -74,7 +74,6 @@ static int hyperv_setup_vram(struct hyperv_drm_device *hv, drm_aperture_remove_conflicting_framebuffers(screen_info.lfb_base, screen_info.lfb_size, - false, &hyperv_driver); hv->fb_size = (unsigned long)hv->mmio_megabytes * 1024 * 1024; diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index b2838732ac936..cc84685368715 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -165,14 +165,60 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | 1 << 8 | state; } -u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg) +static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine) { - u32 gsi_offset = gt->uncore->gsi_offset; + switch (engine->id) { + case RCS0: + return GEN12_CCS_AUX_INV; + case BCS0: + return GEN12_BCS0_AUX_INV; + case VCS0: + return GEN12_VD0_AUX_INV; + case VCS2: + return GEN12_VD2_AUX_INV; + case VECS0: + return GEN12_VE0_AUX_INV; + case CCS0: + return GEN12_CCS0_AUX_INV; + default: + return INVALID_MMIO_REG; + } +} + +static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine) +{ + i915_reg_t reg = gen12_get_aux_inv_reg(engine); + + if (IS_PONTEVECCHIO(engine->i915)) + return false; + + /* + * So far platforms supported by i915 having flat ccs do not require + * AUX invalidation. Check also whether the engine requires it. + */ + return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915); +} + +u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) +{ + i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine); + u32 gsi_offset = engine->gt->uncore->gsi_offset; + + if (!gen12_needs_ccs_aux_inv(engine)) + return cs; *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; *cs++ = AUX_INV; - *cs++ = MI_NOOP; + + *cs++ = MI_SEMAPHORE_WAIT_TOKEN | + MI_SEMAPHORE_REGISTER_POLL | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; + *cs++ = 0; + *cs++ = 0; return cs; } @@ -181,7 +227,11 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) { struct intel_engine_cs *engine = rq->engine; - if (mode & EMIT_FLUSH) { + /* + * On Aux CCS platforms the invalidation of the Aux + * table requires quiescing memory traffic beforehand + */ + if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) { u32 flags = 0; u32 *cs; @@ -236,10 +286,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) else if (engine->class == COMPUTE_CLASS) flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; - if (!HAS_FLAT_CCS(rq->engine->i915)) - count = 8 + 4; - else - count = 8; + count = 8; + if (gen12_needs_ccs_aux_inv(rq->engine)) + count += 8; cs = intel_ring_begin(rq, count); if (IS_ERR(cs)) @@ -254,11 +303,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); - if (!HAS_FLAT_CCS(rq->engine->i915)) { - /* hsdes: 1809175790 */ - cs = gen12_emit_aux_table_inv(rq->engine->gt, cs, - GEN12_CCS_AUX_INV); - } + cs = gen12_emit_aux_table_inv(engine, cs); *cs++ = preparser_disable(false); intel_ring_advance(rq, cs); @@ -269,21 +314,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) { - intel_engine_mask_t aux_inv = 0; - u32 cmd, *cs; + u32 cmd = 4; + u32 *cs; - cmd = 4; if (mode & EMIT_INVALIDATE) { cmd += 2; - if (!HAS_FLAT_CCS(rq->engine->i915) && - (rq->engine->class == VIDEO_DECODE_CLASS || - rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) { - aux_inv = rq->engine->mask & - ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0); - if (aux_inv) - cmd += 4; - } + if (gen12_needs_ccs_aux_inv(rq->engine)) + cmd += 8; } cs = intel_ring_begin(rq, cmd); @@ -314,14 +352,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) *cs++ = 0; /* upper addr */ *cs++ = 0; /* value */ - if (aux_inv) { /* hsdes: 1809175790 */ - if (rq->engine->class == VIDEO_DECODE_CLASS) - cs = gen12_emit_aux_table_inv(rq->engine->gt, - cs, GEN12_VD0_AUX_INV); - else - cs = gen12_emit_aux_table_inv(rq->engine->gt, - cs, GEN12_VE0_AUX_INV); - } + cs = gen12_emit_aux_table_inv(rq->engine, cs); if (mode & EMIT_INVALIDATE) *cs++ = preparser_disable(false); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h index e4d24c811dd61..651eb786e930c 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h @@ -13,6 +13,7 @@ #include "intel_gt_regs.h" #include "intel_gpu_commands.h" +struct intel_engine_cs; struct intel_gt; struct i915_request; @@ -46,7 +47,7 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); -u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg); +u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs); static inline u32 * __gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index d4e9702d3c8e7..25ea5f8a464a4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -120,6 +120,7 @@ #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ #define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */ +#define MI_SEMAPHORE_REGISTER_POLL (1 << 16) #define MI_SEMAPHORE_POLL (1 << 15) #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 137e41e37ea54..7eb01ff17d89b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1296,10 +1296,7 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) IS_DG2_G11(ce->engine->i915)) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); - /* hsdes: 1809175790 */ - if (!HAS_FLAT_CCS(ce->engine->i915)) - cs = gen12_emit_aux_table_inv(ce->engine->gt, - cs, GEN12_CCS_AUX_INV); + cs = gen12_emit_aux_table_inv(ce->engine, cs); /* Wa_16014892111 */ if (IS_DG2(ce->engine->i915)) @@ -1322,17 +1319,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); - /* hsdes: 1809175790 */ - if (!HAS_FLAT_CCS(ce->engine->i915)) { - if (ce->engine->class == VIDEO_DECODE_CLASS) - cs = gen12_emit_aux_table_inv(ce->engine->gt, - cs, GEN12_VD0_AUX_INV); - else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS) - cs = gen12_emit_aux_table_inv(ce->engine->gt, - cs, GEN12_VE0_AUX_INV); - } - - return cs; + return gen12_emit_aux_table_inv(ce->engine, cs); } static void diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 35bc2a3fa811c..75a93951fe429 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -574,7 +574,6 @@ static int i915_pcode_init(struct drm_i915_private *i915) static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); - struct pci_dev *root_pdev; int ret; if (i915_inject_probe_failure(dev_priv)) @@ -686,15 +685,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) intel_bw_init_hw(dev_priv); - /* - * FIXME: Temporary hammer to avoid freezing the machine on our DGFX - * This should be totally removed when we handle the pci states properly - * on runtime PM and on s2idle cases. - */ - root_pdev = pcie_find_root_port(pdev); - if (root_pdev) - pci_d3cold_disable(root_pdev); - return 0; err_msi: @@ -718,16 +708,11 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) static void i915_driver_hw_remove(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); - struct pci_dev *root_pdev; i915_perf_fini(dev_priv); if (pdev->msi_enabled) pci_disable_msi(pdev); - - root_pdev = pcie_find_root_port(pdev); - if (root_pdev) - pci_d3cold_enable(root_pdev); } /** @@ -1625,6 +1610,8 @@ static int intel_runtime_suspend(struct device *kdev) { struct drm_i915_private *dev_priv = kdev_to_i915(kdev); struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; + struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + struct pci_dev *root_pdev; struct intel_gt *gt; int ret, i; @@ -1674,6 +1661,15 @@ static int intel_runtime_suspend(struct device *kdev) drm_err(&dev_priv->drm, "Unclaimed access detected prior to suspending\n"); + /* + * FIXME: Temporary hammer to avoid freezing the machine on our DGFX + * This should be totally removed when we handle the pci states properly + * on runtime PM. + */ + root_pdev = pcie_find_root_port(pdev); + if (root_pdev) + pci_d3cold_disable(root_pdev); + rpm->suspended = true; /* @@ -1712,6 +1708,8 @@ static int intel_runtime_resume(struct device *kdev) { struct drm_i915_private *dev_priv = kdev_to_i915(kdev); struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; + struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + struct pci_dev *root_pdev; struct intel_gt *gt; int ret, i; @@ -1725,6 +1723,11 @@ static int intel_runtime_resume(struct device *kdev) intel_opregion_notify_adapter(dev_priv, PCI_D0); rpm->suspended = false; + + root_pdev = pcie_find_root_port(pdev); + if (root_pdev) + pci_d3cold_enable(root_pdev); + if (intel_uncore_unclaimed_mmio(&dev_priv->uncore)) drm_dbg(&dev_priv->drm, "Unclaimed access during suspend, bios?\n"); diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index eea433ade79d0..119544d88b586 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -285,7 +285,7 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) * Remove early framebuffers (ie. simplefb). The framebuffer can be * located anywhere in RAM */ - ret = drm_aperture_remove_framebuffers(false, &meson_driver); + ret = drm_aperture_remove_framebuffers(&meson_driver); if (ret) goto free_drm; diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index 46168eccfac4a..d4a9b501e1bcc 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -157,7 +157,7 @@ struct drm_fb_helper *msm_fbdev_init(struct drm_device *dev) } /* the fw fb could be anywhere in memory */ - ret = drm_aperture_remove_framebuffers(false, dev->driver); + ret = drm_aperture_remove_framebuffers(dev->driver); if (ret) goto fini; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 813f9f8c86982..8e12053a220b0 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -140,7 +140,7 @@ static int rockchip_drm_bind(struct device *dev) int ret; /* Remove existing drivers that may own the framebuffer memory. */ - ret = drm_aperture_remove_framebuffers(false, &rockchip_drm_driver); + ret = drm_aperture_remove_framebuffers(&rockchip_drm_driver); if (ret) { DRM_DEV_ERROR(dev, "Failed to remove existing framebuffers - %d.\n", diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c index d7914f5122dff..0a09a85ac9d69 100644 --- a/drivers/gpu/drm/stm/drv.c +++ b/drivers/gpu/drm/stm/drv.c @@ -185,7 +185,7 @@ static int stm_drm_platform_probe(struct platform_device *pdev) DRM_DEBUG("%s\n", __func__); - ret = drm_aperture_remove_framebuffers(false, &drv_driver); + ret = drm_aperture_remove_framebuffers(&drv_driver); if (ret) return ret; diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 7910c5853f0a8..5c483bbccbbbc 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -98,7 +98,7 @@ static int sun4i_drv_bind(struct device *dev) goto unbind_all; /* Remove early framebuffers (ie. simplefb) */ - ret = drm_aperture_remove_framebuffers(false, &sun4i_drv_driver); + ret = drm_aperture_remove_framebuffers(&sun4i_drv_driver); if (ret) goto unbind_all; diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index a1f909dac89a7..5fc55b9777cbf 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1252,7 +1252,7 @@ static int host1x_drm_probe(struct host1x_device *dev) drm_mode_config_reset(drm); - err = drm_aperture_remove_framebuffers(false, &tegra_drm_driver); + err = drm_aperture_remove_framebuffers(&tegra_drm_driver); if (err < 0) goto hub; diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 8c329c071c62d..b6384a5dfdbc1 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -351,7 +351,7 @@ static int vc4_drm_bind(struct device *dev) return -EPROBE_DEFER; } - ret = drm_aperture_remove_framebuffers(false, driver); + ret = drm_aperture_remove_framebuffers(driver); if (ret) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 1ec9c53a7bf43..8459fab9d9797 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1683,4 +1683,16 @@ static inline bool vmw_has_fences(struct vmw_private *vmw) return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0; } +static inline bool vmw_shadertype_is_valid(enum vmw_sm_type shader_model, + u32 shader_type) +{ + SVGA3dShaderType max_allowed = SVGA3D_SHADERTYPE_PREDX_MAX; + + if (shader_model >= VMW_SM_5) + max_allowed = SVGA3D_SHADERTYPE_MAX; + else if (shader_model >= VMW_SM_4) + max_allowed = SVGA3D_SHADERTYPE_DX10_MAX; + return shader_type >= SVGA3D_SHADERTYPE_MIN && shader_type < max_allowed; +} + #endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 1c88b74d68cf0..58ca9adf09871 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1985,7 +1985,7 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv, cmd = container_of(header, typeof(*cmd), header); - if (cmd->body.type >= SVGA3D_SHADERTYPE_PREDX_MAX) { + if (!vmw_shadertype_is_valid(VMW_SM_LEGACY, cmd->body.type)) { VMW_DEBUG_USER("Illegal shader type %u.\n", (unsigned int) cmd->body.type); return -EINVAL; @@ -2108,8 +2108,6 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv, SVGA3dCmdHeader *header) { VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetSingleConstantBuffer); - SVGA3dShaderType max_shader_num = has_sm5_context(dev_priv) ? - SVGA3D_NUM_SHADERTYPE : SVGA3D_NUM_SHADERTYPE_DX10; struct vmw_resource *res = NULL; struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context); @@ -2126,6 +2124,14 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv, if (unlikely(ret != 0)) return ret; + if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type) || + cmd->body.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) { + VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n", + (unsigned int) cmd->body.type, + (unsigned int) cmd->body.slot); + return -EINVAL; + } + binding.bi.ctx = ctx_node->ctx; binding.bi.res = res; binding.bi.bt = vmw_ctx_binding_cb; @@ -2134,14 +2140,6 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv, binding.size = cmd->body.sizeInBytes; binding.slot = cmd->body.slot; - if (binding.shader_slot >= max_shader_num || - binding.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) { - VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n", - (unsigned int) cmd->body.type, - (unsigned int) binding.slot); - return -EINVAL; - } - vmw_binding_add(ctx_node->staged, &binding.bi, binding.shader_slot, binding.slot); @@ -2200,15 +2198,13 @@ static int vmw_cmd_dx_set_shader_res(struct vmw_private *dev_priv, { VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShaderResources) = container_of(header, typeof(*cmd), header); - SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ? - SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX; u32 num_sr_view = (cmd->header.size - sizeof(cmd->body)) / sizeof(SVGA3dShaderResourceViewId); if ((u64) cmd->body.startView + (u64) num_sr_view > (u64) SVGA3D_DX_MAX_SRVIEWS || - cmd->body.type >= max_allowed) { + !vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) { VMW_DEBUG_USER("Invalid shader binding.\n"); return -EINVAL; } @@ -2232,8 +2228,6 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv, SVGA3dCmdHeader *header) { VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShader); - SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ? - SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX; struct vmw_resource *res = NULL; struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context); struct vmw_ctx_bindinfo_shader binding; @@ -2244,8 +2238,7 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv, cmd = container_of(header, typeof(*cmd), header); - if (cmd->body.type >= max_allowed || - cmd->body.type < SVGA3D_SHADERTYPE_MIN) { + if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) { VMW_DEBUG_USER("Illegal shader type %u.\n", (unsigned int) cmd->body.type); return -EINVAL; diff --git a/drivers/hwmon/aquacomputer_d5next.c b/drivers/hwmon/aquacomputer_d5next.c index c51a2678f0eb5..8c7796d3fdd2d 100644 --- a/drivers/hwmon/aquacomputer_d5next.c +++ b/drivers/hwmon/aquacomputer_d5next.c @@ -12,9 +12,11 @@ #include #include +#include #include #include #include +#include #include #include #include @@ -49,6 +51,8 @@ static const char *const aqc_device_names[] = { #define CTRL_REPORT_ID 0x03 +#define CTRL_REPORT_DELAY 200 /* ms */ + /* The HID report that the official software always sends * after writing values, currently same for all devices */ @@ -269,6 +273,9 @@ struct aqc_data { enum kinds kind; const char *name; + ktime_t last_ctrl_report_op; + int ctrl_report_delay; /* Delay between two ctrl report operations, in ms */ + int buffer_size; u8 *buffer; int checksum_start; @@ -325,17 +332,35 @@ static int aqc_pwm_to_percent(long val) return DIV_ROUND_CLOSEST(val * 100 * 100, 255); } +static void aqc_delay_ctrl_report(struct aqc_data *priv) +{ + /* + * If previous read or write is too close to this one, delay the current operation + * to give the device enough time to process the previous one. + */ + if (priv->ctrl_report_delay) { + s64 delta = ktime_ms_delta(ktime_get(), priv->last_ctrl_report_op); + + if (delta < priv->ctrl_report_delay) + msleep(priv->ctrl_report_delay - delta); + } +} + /* Expects the mutex to be locked */ static int aqc_get_ctrl_data(struct aqc_data *priv) { int ret; + aqc_delay_ctrl_report(priv); + memset(priv->buffer, 0x00, priv->buffer_size); ret = hid_hw_raw_request(priv->hdev, CTRL_REPORT_ID, priv->buffer, priv->buffer_size, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); if (ret < 0) ret = -ENODATA; + priv->last_ctrl_report_op = ktime_get(); + return ret; } @@ -345,6 +370,8 @@ static int aqc_send_ctrl_data(struct aqc_data *priv) int ret; u16 checksum; + aqc_delay_ctrl_report(priv); + /* Init and xorout value for CRC-16/USB is 0xffff */ checksum = crc16(0xffff, priv->buffer + priv->checksum_start, priv->checksum_length); checksum ^= 0xffff; @@ -356,12 +383,16 @@ static int aqc_send_ctrl_data(struct aqc_data *priv) ret = hid_hw_raw_request(priv->hdev, CTRL_REPORT_ID, priv->buffer, priv->buffer_size, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); if (ret < 0) - return ret; + goto record_access_and_ret; /* The official software sends this report after every change, so do it here as well */ ret = hid_hw_raw_request(priv->hdev, SECONDARY_CTRL_REPORT_ID, secondary_ctrl_report, SECONDARY_CTRL_REPORT_SIZE, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); + +record_access_and_ret: + priv->last_ctrl_report_op = ktime_get(); + return ret; } @@ -853,6 +884,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id) priv->virtual_temp_sensor_start_offset = D5NEXT_VIRTUAL_SENSORS_START; priv->power_cycle_count_offset = D5NEXT_POWER_CYCLES; priv->buffer_size = D5NEXT_CTRL_REPORT_SIZE; + priv->ctrl_report_delay = CTRL_REPORT_DELAY; priv->temp_label = label_d5next_temp; priv->virtual_temp_label = label_virtual_temp_sensors; @@ -893,6 +925,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id) priv->virtual_temp_sensor_start_offset = OCTO_VIRTUAL_SENSORS_START; priv->power_cycle_count_offset = OCTO_POWER_CYCLES; priv->buffer_size = OCTO_CTRL_REPORT_SIZE; + priv->ctrl_report_delay = CTRL_REPORT_DELAY; priv->temp_label = label_temp_sensors; priv->virtual_temp_label = label_virtual_temp_sensors; @@ -913,6 +946,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id) priv->virtual_temp_sensor_start_offset = QUADRO_VIRTUAL_SENSORS_START; priv->power_cycle_count_offset = QUADRO_POWER_CYCLES; priv->buffer_size = QUADRO_CTRL_REPORT_SIZE; + priv->ctrl_report_delay = CTRL_REPORT_DELAY; priv->flow_sensor_offset = QUADRO_FLOW_SENSOR_OFFSET; priv->temp_label = label_temp_sensors; diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c index d810a78dde51d..31e3c37662185 100644 --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c @@ -821,6 +821,8 @@ static int vb2ops_venc_queue_setup(struct vb2_queue *vq, return -EINVAL; if (*nplanes) { + if (*nplanes != q_data->fmt->num_planes) + return -EINVAL; for (i = 0; i < *nplanes; i++) if (sizes[i] < q_data->sizeimage[i]) return -EINVAL; diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index b9dbad3a8af82..fc5da5d7744da 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -660,10 +660,10 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) return NULL; arp = (struct arp_pkt *)skb_network_header(skb); - /* Don't modify or load balance ARPs that do not originate locally - * (e.g.,arrive via a bridge). + /* Don't modify or load balance ARPs that do not originate + * from the bond itself or a VLAN directly above the bond. */ - if (!bond_slave_has_mac_rx(bond, arp->mac_src)) + if (!bond_slave_has_mac_rcu(bond, arp->mac_src)) return NULL; dev = ip_dev_find(dev_net(bond->dev), arp->ip_src); diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index 26a472d2ea583..6d549dbdb4674 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -192,12 +192,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev, nla_peer = data[VXCAN_INFO_PEER]; ifmp = nla_data(nla_peer); - err = rtnl_nla_parse_ifla(peer_tb, - nla_data(nla_peer) + - sizeof(struct ifinfomsg), - nla_len(nla_peer) - - sizeof(struct ifinfomsg), - NULL); + err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack); if (err < 0) return err; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 51d2ef0dc835c..b988c8a40d536 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1005,6 +1005,10 @@ mt753x_trap_frames(struct mt7530_priv *priv) mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK, MT753X_BPDU_CPU_ONLY); + /* Trap 802.1X PAE frames to the CPU port(s) */ + mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_PORT_FW_MASK, + MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY)); + /* Trap LLDP frames with :0E MAC DA to the CPU port(s) */ mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_PORT_FW_MASK, MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY)); diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 9a45663d8b4ef..6202b0f8c3f34 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -64,6 +64,8 @@ enum mt753x_id { /* Registers for BPDU and PAE frame control*/ #define MT753X_BPC 0x24 #define MT753X_BPDU_PORT_FW_MASK GENMASK(2, 0) +#define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16) +#define MT753X_PAE_PORT_FW(x) FIELD_PREP(MT753X_PAE_PORT_FW_MASK, x) /* Register for :03 and :0E MAC DA frame control */ #define MT753X_RGAC2 0x2c diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 5f6af0870dfd6..0186482194d20 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1071,6 +1071,9 @@ static u64 vsc9959_tas_remaining_gate_len_ps(u64 gate_len_ns) if (gate_len_ns == U64_MAX) return U64_MAX; + if (gate_len_ns < VSC9959_TAS_MIN_GATE_LEN_NS) + return 0; + return (gate_len_ns - VSC9959_TAS_MIN_GATE_LEN_NS) * PSEC_PER_NSEC; } diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 10c7c232cc4ec..52ee3751187a2 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1448,7 +1448,7 @@ int bgmac_phy_connect_direct(struct bgmac *bgmac) int err; phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); - if (!phy_dev || IS_ERR(phy_dev)) { + if (IS_ERR(phy_dev)) { dev_err(bgmac->dev, "Failed to register fixed PHY device\n"); return -ENODEV; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 1fe8038587ac8..1779ee524dac7 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -608,7 +608,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) }; phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); - if (!phydev || IS_ERR(phydev)) { + if (IS_ERR(phydev)) { dev_err(kdev, "failed to register fixed PHY device\n"); return -ENODEV; } diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index c2e7037c7ba1c..7750702900fa6 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1466,7 +1466,7 @@ static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt) tp->write_seq = snd_isn; tp->snd_nxt = snd_isn; tp->snd_una = snd_isn; - inet_sk(sk)->inet_id = get_random_u16(); + atomic_set(&inet_sk(sk)->inet_id, get_random_u16()); assign_rxopt(sk, opt); if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10)) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 5b96cd94dcd24..0b4ec6e41eb41 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -203,7 +203,7 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length) unsigned long offset; for (offset = 0; offset < length; offset += SMP_CACHE_BYTES) - asm("dcbfl %0,%1" :: "b" (addr), "r" (offset)); + asm("dcbf %0,%1,1" :: "b" (addr), "r" (offset)); } /* replenish the buffers for a pool. note that we don't need to diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0e01b1927c1c6..08ccf0024ce1a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2615,7 +2615,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) retval = i40e_correct_mac_vlan_filters (vsi, &tmp_add_list, &tmp_del_list, vlan_filters); - else + else if (pf->vf) retval = i40e_correct_vf_mac_vlan_filters (vsi, &tmp_add_list, &tmp_del_list, vlan_filters, pf->vf[vsi->vf_id].trusted); @@ -2788,7 +2788,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } /* if the VF is not trusted do not do promisc */ - if ((vsi->type == I40E_VSI_SRIOV) && !pf->vf[vsi->vf_id].trusted) { + if (vsi->type == I40E_VSI_SRIOV && pf->vf && + !pf->vf[vsi->vf_id].trusted) { clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); goto out; } diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index e864634d66bc6..818eca6aa4a41 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -396,7 +396,8 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Receive Packet Data Buffer Size. * The Packet Data Buffer Size is defined in 128 byte units. */ - rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S; + rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len, + BIT_ULL(ICE_RLAN_CTX_DBUF_S)); /* use 32 byte descriptors */ rlan_ctx.dsize = 1; diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index b8c31bf721ad1..b719e9a771e36 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1240,7 +1240,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) if (!vf) return -EINVAL; - ret = ice_check_vf_ready_for_reset(vf); + ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; @@ -1355,7 +1355,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) goto out_put_vf; } - ret = ice_check_vf_ready_for_reset(vf); + ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; @@ -1409,7 +1409,7 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) return -EOPNOTSUPP; } - ret = ice_check_vf_ready_for_reset(vf); + ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; @@ -1722,7 +1722,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, if (!vf) return -EINVAL; - ret = ice_check_vf_ready_for_reset(vf); + ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 71047fc341392..9dbe6e9bb1f79 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -185,25 +185,6 @@ int ice_check_vf_ready_for_cfg(struct ice_vf *vf) return 0; } -/** - * ice_check_vf_ready_for_reset - check if VF is ready to be reset - * @vf: VF to check if it's ready to be reset - * - * The purpose of this function is to ensure that the VF is not in reset, - * disabled, and is both initialized and active, thus enabling us to safely - * initialize another reset. - */ -int ice_check_vf_ready_for_reset(struct ice_vf *vf) -{ - int ret; - - ret = ice_check_vf_ready_for_cfg(vf); - if (!ret && !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) - ret = -EAGAIN; - - return ret; -} - /** * ice_trigger_vf_reset - Reset a VF on HW * @vf: pointer to the VF structure @@ -588,11 +569,17 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) return 0; } + if (flags & ICE_VF_RESET_LOCK) + mutex_lock(&vf->cfg_lock); + else + lockdep_assert_held(&vf->cfg_lock); + if (ice_is_vf_disabled(vf)) { vsi = ice_get_vf_vsi(vf); if (!vsi) { dev_dbg(dev, "VF is already removed\n"); - return -EINVAL; + err = -EINVAL; + goto out_unlock; } ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id); @@ -601,14 +588,9 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n", vf->vf_id); - return 0; + goto out_unlock; } - if (flags & ICE_VF_RESET_LOCK) - mutex_lock(&vf->cfg_lock); - else - lockdep_assert_held(&vf->cfg_lock); - /* Set VF disable bit state here, before triggering reset */ set_bit(ICE_VF_STATE_DIS, vf->vf_states); ice_trigger_vf_reset(vf, flags & ICE_VF_RESET_VFLR, false); diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index e5bed85724622..9f7fcd8e5714b 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -214,7 +214,6 @@ u16 ice_get_num_vfs(struct ice_pf *pf); struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf); bool ice_is_vf_disabled(struct ice_vf *vf); int ice_check_vf_ready_for_cfg(struct ice_vf *vf); -int ice_check_vf_ready_for_reset(struct ice_vf *vf); void ice_set_vf_state_dis(struct ice_vf *vf); bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf); void diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index ef3c709d6a750..2b4c791b6cbad 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -3722,7 +3722,6 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) ice_vc_notify_vf_link_state(vf); break; case VIRTCHNL_OP_RESET_VF: - clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states); ops->reset_vf(vf); break; case VIRTCHNL_OP_ADD_ETH_ADDR: diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 15e57460e19ea..07171e574e7d7 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -1404,18 +1404,6 @@ void igb_ptp_init(struct igb_adapter *adapter) return; } - spin_lock_init(&adapter->tmreg_lock); - INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); - - if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK) - INIT_DELAYED_WORK(&adapter->ptp_overflow_work, - igb_ptp_overflow_check); - - adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; - adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; - - igb_ptp_reset(adapter); - adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, &adapter->pdev->dev); if (IS_ERR(adapter->ptp_clock)) { @@ -1425,6 +1413,18 @@ void igb_ptp_init(struct igb_adapter *adapter) dev_info(&adapter->pdev->dev, "added PHC on %s\n", adapter->netdev->name); adapter->ptp_flags |= IGB_PTP_ENABLED; + + spin_lock_init(&adapter->tmreg_lock); + INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); + + if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK) + INIT_DELAYED_WORK(&adapter->ptp_overflow_work, + igb_ptp_overflow_check); + + adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; + adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; + + igb_ptp_reset(adapter); } } diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index dbfa4b9dee066..90ca01889cd82 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -536,7 +536,7 @@ #define IGC_PTM_CTRL_START_NOW BIT(29) /* Start PTM Now */ #define IGC_PTM_CTRL_EN BIT(30) /* Enable PTM */ #define IGC_PTM_CTRL_TRIG BIT(31) /* PTM Cycle trigger */ -#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x2f) << 2) +#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2) #define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8) #define IGC_PTM_SHORT_CYC_DEFAULT 10 /* Default Short/interrupted cycle interval */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 705325431dec3..5541e284cd3f0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4005,9 +4005,10 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req, if (link < 0) return NIX_AF_ERR_RX_LINK_INVALID; - nix_find_link_frs(rvu, req, pcifunc); linkcfg: + nix_find_link_frs(rvu, req, pcifunc); + cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link)); cfg = (cfg & ~(0xFFFFULL << 16)) | ((u64)req->maxlen << 16); if (req->update_minlen) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c index bd1a51a0a5408..f208a237d0b52 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c @@ -32,8 +32,8 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8), MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2), MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6), - MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 3), - MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 20, 8), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 4), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 21, 8), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4), diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index c968309657dd1..51eea1f0529c8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -517,11 +517,15 @@ static void mlxsw_pci_skb_cb_ts_set(struct mlxsw_pci *mlxsw_pci, struct sk_buff *skb, enum mlxsw_pci_cqe_v cqe_v, char *cqe) { + u8 ts_type; + if (cqe_v != MLXSW_PCI_CQE_V2) return; - if (mlxsw_pci_cqe2_time_stamp_type_get(cqe) != - MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC) + ts_type = mlxsw_pci_cqe2_time_stamp_type_get(cqe); + + if (ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC && + ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_MIRROR_UTC) return; mlxsw_skb_cb(skb)->cqe_ts.sec = mlxsw_pci_cqe2_time_stamp_sec_get(cqe); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 0777bed5bb1af..a34ff19c58bd2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -97,14 +97,6 @@ MLXSW_ITEM32(reg, sspr, m, 0x00, 31, 1); */ MLXSW_ITEM32_LP(reg, sspr, 0x00, 16, 0x00, 12); -/* reg_sspr_sub_port - * Virtual port within the physical port. - * Should be set to 0 when virtual ports are not enabled on the port. - * - * Access: RW - */ -MLXSW_ITEM32(reg, sspr, sub_port, 0x00, 8, 8); - /* reg_sspr_system_port * Unique identifier within the stacking domain that represents all the ports * that are available in the system (external ports). @@ -120,7 +112,6 @@ static inline void mlxsw_reg_sspr_pack(char *payload, u16 local_port) MLXSW_REG_ZERO(sspr, payload); mlxsw_reg_sspr_m_set(payload, 1); mlxsw_reg_sspr_local_port_set(payload, local_port); - mlxsw_reg_sspr_sub_port_set(payload, 0); mlxsw_reg_sspr_system_port_set(payload, local_port); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c index e4f4cded2b6f9..b1178b7a7f51a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c @@ -193,7 +193,7 @@ mlxsw_sp2_mr_tcam_rule_parse(struct mlxsw_sp_acl_rule *rule, key->vrid, GENMASK(7, 0)); mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB, - key->vrid >> 8, GENMASK(2, 0)); + key->vrid >> 8, GENMASK(3, 0)); switch (key->proto) { case MLXSW_SP_L3_PROTO_IPV4: return mlxsw_sp2_mr_tcam_rule_parse4(rulei, key); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c index 00c32320f8915..173808c096bab 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c @@ -169,7 +169,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_2[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4[] = { MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 24, 8), - MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x00, 0, 3), + MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x00, 0, 3, 0, true), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_0[] = { @@ -319,7 +319,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5b[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4b[] = { MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 13, 8), - MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x04, 21, 4, 0, true), + MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x04, 21, 4), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_2b[] = { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 796a38f9d7b24..cd16bc8bf154c 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -748,7 +748,8 @@ static int ipvlan_device_event(struct notifier_block *unused, write_pnet(&port->pnet, newnet); - ipvlan_migrate_l3s_hook(oldnet, newnet); + if (port->mode == IPVLAN_MODE_L3S) + ipvlan_migrate_l3s_hook(oldnet, newnet); break; } case NETDEV_UNREGISTER: diff --git a/drivers/net/veth.c b/drivers/net/veth.c index a71786b3e7ba7..727b9278b9fe5 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1716,10 +1716,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, nla_peer = data[VETH_INFO_PEER]; ifmp = nla_data(nla_peer); - err = rtnl_nla_parse_ifla(peer_tb, - nla_data(nla_peer) + sizeof(struct ifinfomsg), - nla_len(nla_peer) - sizeof(struct ifinfomsg), - NULL); + err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack); if (err < 0) return err; diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index cd3821a6444f0..4e436f2d13aeb 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -63,15 +63,14 @@ int of_reconfig_notifier_unregister(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister); -#ifdef DEBUG -const char *action_names[] = { +static const char *action_names[] = { + [0] = "INVALID", [OF_RECONFIG_ATTACH_NODE] = "ATTACH_NODE", [OF_RECONFIG_DETACH_NODE] = "DETACH_NODE", [OF_RECONFIG_ADD_PROPERTY] = "ADD_PROPERTY", [OF_RECONFIG_REMOVE_PROPERTY] = "REMOVE_PROPERTY", [OF_RECONFIG_UPDATE_PROPERTY] = "UPDATE_PROPERTY", }; -#endif int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p) { @@ -594,21 +593,9 @@ static int __of_changeset_entry_apply(struct of_changeset_entry *ce) } ret = __of_add_property(ce->np, ce->prop); - if (ret) { - pr_err("changeset: add_property failed @%pOF/%s\n", - ce->np, - ce->prop->name); - break; - } break; case OF_RECONFIG_REMOVE_PROPERTY: ret = __of_remove_property(ce->np, ce->prop); - if (ret) { - pr_err("changeset: remove_property failed @%pOF/%s\n", - ce->np, - ce->prop->name); - break; - } break; case OF_RECONFIG_UPDATE_PROPERTY: @@ -622,20 +609,17 @@ static int __of_changeset_entry_apply(struct of_changeset_entry *ce) } ret = __of_update_property(ce->np, ce->prop, &old_prop); - if (ret) { - pr_err("changeset: update_property failed @%pOF/%s\n", - ce->np, - ce->prop->name); - break; - } break; default: ret = -EINVAL; } raw_spin_unlock_irqrestore(&devtree_lock, flags); - if (ret) + if (ret) { + pr_err("changeset: apply failed: %-15s %pOF:%s\n", + action_names[ce->action], ce->np, ce->prop->name); return ret; + } switch (ce->action) { case OF_RECONFIG_ATTACH_NODE: @@ -921,6 +905,9 @@ int of_changeset_action(struct of_changeset *ocs, unsigned long action, if (!ce) return -ENOMEM; + if (WARN_ON(action >= ARRAY_SIZE(action_names))) + return -EINVAL; + /* get a reference to the node */ ce->action = action; ce->np = of_node_get(np); diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index f26d2ba8a3715..68278340cecfe 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -184,7 +184,8 @@ int __init ima_free_kexec_buffer(void) if (ret) return ret; - return memblock_phys_free(addr, size); + memblock_free_late(addr, size); + return 0; } #endif diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index b89ab5d9fea55..9be6ed47a1ce4 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -657,12 +657,12 @@ static void __init of_unittest_parse_phandle_with_args_map(void) memset(&args, 0, sizeof(args)); EXPECT_BEGIN(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle"); + "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle 12345678"); rc = of_parse_phandle_with_args_map(np, "phandle-list-bad-phandle", "phandle", 0, &args); EXPECT_END(KERN_INFO, - "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle"); + "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle 12345678"); unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 6efa3d8db9a56..ea0195337bab9 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -504,12 +504,15 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge) if (pass && dev->subordinate) { check_hotplug_bridge(slot, dev); pcibios_resource_survey_bus(dev->subordinate); - __pci_bus_size_bridges(dev->subordinate, - &add_list); + if (pci_is_root_bus(bus)) + __pci_bus_size_bridges(dev->subordinate, &add_list); } } } - __pci_bus_assign_resources(bus, &add_list, NULL); + if (pci_is_root_bus(bus)) + __pci_bus_assign_resources(bus, &add_list, NULL); + else + pci_assign_unassigned_bridge_resources(bus->self); } acpiphp_sanitize_bus(bus); diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index a8df77e80549c..be6838c252f09 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -862,6 +862,33 @@ static const struct pinconf_ops amd_pinconf_ops = { .pin_config_group_set = amd_pinconf_group_set, }; +static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) +{ + struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + unsigned long flags; + u32 pin_reg, mask; + int i; + + mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) | + BIT(WAKE_CNTRL_OFF_S4); + + for (i = 0; i < desc->npins; i++) { + int pin = desc->pins[i].number; + const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin); + + if (!pd) + continue; + + raw_spin_lock_irqsave(&gpio_dev->lock, flags); + + pin_reg = readl(gpio_dev->base + pin * 4); + pin_reg &= ~mask; + writel(pin_reg, gpio_dev->base + pin * 4); + + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); + } +} + #ifdef CONFIG_PM_SLEEP static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin) { @@ -1099,6 +1126,9 @@ static int amd_gpio_probe(struct platform_device *pdev) return PTR_ERR(gpio_dev->pctrl); } + /* Disable and mask interrupts */ + amd_gpio_irq_init(gpio_dev); + girq = &gpio_dev->gc.irq; gpio_irq_chip_set_chip(girq, &amd_gpio_irqchip); /* This will let us handle the parent IRQ in the driver */ diff --git a/drivers/pinctrl/renesas/pinctrl-rza2.c b/drivers/pinctrl/renesas/pinctrl-rza2.c index c0a04f1ee994e..12126e30dc20f 100644 --- a/drivers/pinctrl/renesas/pinctrl-rza2.c +++ b/drivers/pinctrl/renesas/pinctrl-rza2.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,7 @@ struct rza2_pinctrl_priv { struct pinctrl_dev *pctl; struct pinctrl_gpio_range gpio_range; int npins; + struct mutex mutex; /* serialize adding groups and functions */ }; #define RZA2_PDR(port) (0x0000 + (port) * 2) /* Direction 16-bit */ @@ -358,10 +360,14 @@ static int rza2_dt_node_to_map(struct pinctrl_dev *pctldev, psel_val[i] = MUX_FUNC(value); } + mutex_lock(&priv->mutex); + /* Register a single pin group listing all the pins we read from DT */ gsel = pinctrl_generic_add_group(pctldev, np->name, pins, npins, NULL); - if (gsel < 0) - return gsel; + if (gsel < 0) { + ret = gsel; + goto unlock; + } /* * Register a single group function where the 'data' is an array PSEL @@ -390,6 +396,8 @@ static int rza2_dt_node_to_map(struct pinctrl_dev *pctldev, (*map)->data.mux.function = np->name; *num_maps = 1; + mutex_unlock(&priv->mutex); + return 0; remove_function: @@ -398,6 +406,9 @@ static int rza2_dt_node_to_map(struct pinctrl_dev *pctldev, remove_group: pinctrl_generic_remove_group(pctldev, gsel); +unlock: + mutex_unlock(&priv->mutex); + dev_err(priv->dev, "Unable to parse DT node %s\n", np->name); return ret; @@ -473,6 +484,8 @@ static int rza2_pinctrl_probe(struct platform_device *pdev) if (IS_ERR(priv->base)) return PTR_ERR(priv->base); + mutex_init(&priv->mutex); + platform_set_drvdata(pdev, priv); priv->npins = (int)(uintptr_t)of_device_get_match_data(&pdev->dev) * diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c index fd11d28e5a1e4..2a617832a7e60 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c +++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -146,10 +147,11 @@ struct rzg2l_pinctrl { struct gpio_chip gpio_chip; struct pinctrl_gpio_range gpio_range; DECLARE_BITMAP(tint_slot, RZG2L_TINT_MAX_INTERRUPT); - spinlock_t bitmap_lock; + spinlock_t bitmap_lock; /* protect tint_slot bitmap */ unsigned int hwirq[RZG2L_TINT_MAX_INTERRUPT]; - spinlock_t lock; + spinlock_t lock; /* lock read/write registers */ + struct mutex mutex; /* serialize adding groups and functions */ }; static const unsigned int iolh_groupa_mA[] = { 2, 4, 8, 12 }; @@ -359,11 +361,13 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev, name = np->name; } + mutex_lock(&pctrl->mutex); + /* Register a single pin group listing all the pins we read from DT */ gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL); if (gsel < 0) { ret = gsel; - goto done; + goto unlock; } /* @@ -377,6 +381,8 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev, goto remove_group; } + mutex_unlock(&pctrl->mutex); + maps[idx].type = PIN_MAP_TYPE_MUX_GROUP; maps[idx].data.mux.group = name; maps[idx].data.mux.function = name; @@ -388,6 +394,8 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev, remove_group: pinctrl_generic_remove_group(pctldev, gsel); +unlock: + mutex_unlock(&pctrl->mutex); done: *index = idx; kfree(configs); @@ -1501,6 +1509,7 @@ static int rzg2l_pinctrl_probe(struct platform_device *pdev) spin_lock_init(&pctrl->lock); spin_lock_init(&pctrl->bitmap_lock); + mutex_init(&pctrl->mutex); platform_set_drvdata(pdev, pctrl); diff --git a/drivers/pinctrl/renesas/pinctrl-rzv2m.c b/drivers/pinctrl/renesas/pinctrl-rzv2m.c index 35f382b055e83..2858800288bb7 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzv2m.c +++ b/drivers/pinctrl/renesas/pinctrl-rzv2m.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -121,7 +122,8 @@ struct rzv2m_pinctrl { struct gpio_chip gpio_chip; struct pinctrl_gpio_range gpio_range; - spinlock_t lock; + spinlock_t lock; /* lock read/write registers */ + struct mutex mutex; /* serialize adding groups and functions */ }; static const unsigned int drv_1_8V_group2_uA[] = { 1800, 3800, 7800, 11000 }; @@ -320,11 +322,13 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev, name = np->name; } + mutex_lock(&pctrl->mutex); + /* Register a single pin group listing all the pins we read from DT */ gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL); if (gsel < 0) { ret = gsel; - goto done; + goto unlock; } /* @@ -338,6 +342,8 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev, goto remove_group; } + mutex_unlock(&pctrl->mutex); + maps[idx].type = PIN_MAP_TYPE_MUX_GROUP; maps[idx].data.mux.group = name; maps[idx].data.mux.function = name; @@ -349,6 +355,8 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev, remove_group: pinctrl_generic_remove_group(pctldev, gsel); +unlock: + mutex_unlock(&pctrl->mutex); done: *index = idx; kfree(configs); @@ -1070,6 +1078,7 @@ static int rzv2m_pinctrl_probe(struct platform_device *pdev) } spin_lock_init(&pctrl->lock); + mutex_init(&pctrl->mutex); platform_set_drvdata(pdev, pctrl); diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index bd38c7dcae347..de03b8889e9d3 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1176,6 +1176,11 @@ static const struct key_entry ideapad_keymap[] = { { KE_IGNORE, 0x03 | IDEAPAD_WMI_KEY }, /* Customizable Lenovo Hotkey ("star" with 'S' inside) */ { KE_KEY, 0x01 | IDEAPAD_WMI_KEY, { KEY_FAVORITES } }, + { KE_KEY, 0x04 | IDEAPAD_WMI_KEY, { KEY_SELECTIVE_SCREENSHOT } }, + /* Lenovo Support */ + { KE_KEY, 0x07 | IDEAPAD_WMI_KEY, { KEY_HELP } }, + { KE_KEY, 0x0e | IDEAPAD_WMI_KEY, { KEY_PICKUP_PHONE } }, + { KE_KEY, 0x0f | IDEAPAD_WMI_KEY, { KEY_HANGUP_PHONE } }, /* Dark mode toggle */ { KE_KEY, 0x13 | IDEAPAD_WMI_KEY, { KEY_PROG1 } }, /* Sound profile switch */ diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index f99a9ef42116f..84e3ad290f6ba 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -926,8 +926,7 @@ static void zcrypt_msgtype6_receive(struct ap_queue *aq, .type = TYPE82_RSP_CODE, .reply_code = REP82_ERROR_MACHINE_FAILURE, }; - struct response_type *resp_type = - (struct response_type *)msg->private; + struct response_type *resp_type = msg->private; struct type86x_reply *t86r; int len; @@ -982,8 +981,7 @@ static void zcrypt_msgtype6_receive_ep11(struct ap_queue *aq, .type = TYPE82_RSP_CODE, .reply_code = REP82_ERROR_MACHINE_FAILURE, }; - struct response_type *resp_type = - (struct response_type *)msg->private; + struct response_type *resp_type = msg->private; struct type86_ep11_reply *t86r; int len; @@ -1156,23 +1154,36 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq, struct ica_xcRB *xcrb, struct ap_message *ap_msg) { - int rc; - struct response_type *rtype = (struct response_type *)(ap_msg->private); + struct response_type *rtype = ap_msg->private; struct { struct type6_hdr hdr; struct CPRBX cprbx; /* ... more data blocks ... */ } __packed * msg = ap_msg->msg; - - /* - * Set the queue's reply buffer length minus 128 byte padding - * as reply limit for the card firmware. - */ - msg->hdr.fromcardlen1 = min_t(unsigned int, msg->hdr.fromcardlen1, - zq->reply.bufsize - 128); - if (msg->hdr.fromcardlen2) - msg->hdr.fromcardlen2 = - zq->reply.bufsize - msg->hdr.fromcardlen1 - 128; + unsigned int max_payload_size; + int rc, delta; + + /* calculate maximum payload for this card and msg type */ + max_payload_size = zq->reply.bufsize - sizeof(struct type86_fmt2_msg); + + /* limit each of the two from fields to the maximum payload size */ + msg->hdr.fromcardlen1 = min(msg->hdr.fromcardlen1, max_payload_size); + msg->hdr.fromcardlen2 = min(msg->hdr.fromcardlen2, max_payload_size); + + /* calculate delta if the sum of both exceeds max payload size */ + delta = msg->hdr.fromcardlen1 + msg->hdr.fromcardlen2 + - max_payload_size; + if (delta > 0) { + /* + * Sum exceeds maximum payload size, prune fromcardlen1 + * (always trust fromcardlen2) + */ + if (delta > msg->hdr.fromcardlen1) { + rc = -EINVAL; + goto out; + } + msg->hdr.fromcardlen1 -= delta; + } init_completion(&rtype->work); rc = ap_queue_message(zq->queue, ap_msg); @@ -1243,7 +1254,7 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * { int rc; unsigned int lfmt; - struct response_type *rtype = (struct response_type *)(ap_msg->private); + struct response_type *rtype = ap_msg->private; struct { struct type6_hdr hdr; struct ep11_cprb cprbx; @@ -1365,7 +1376,7 @@ static long zcrypt_msgtype6_rng(struct zcrypt_queue *zq, short int verb_length; short int key_length; } __packed * msg = ap_msg->msg; - struct response_type *rtype = (struct response_type *)(ap_msg->private); + struct response_type *rtype = ap_msg->private; int rc; msg->cprbx.domain = AP_QID_QUEUE(zq->queue->qid); diff --git a/drivers/scsi/raid_class.c b/drivers/scsi/raid_class.c index 711252e52d8e1..95a86e0dfd77a 100644 --- a/drivers/scsi/raid_class.c +++ b/drivers/scsi/raid_class.c @@ -209,54 +209,6 @@ raid_attr_ro_state(level); raid_attr_ro_fn(resync); raid_attr_ro_state_fn(state); -static void raid_component_release(struct device *dev) -{ - struct raid_component *rc = - container_of(dev, struct raid_component, dev); - dev_printk(KERN_ERR, rc->dev.parent, "COMPONENT RELEASE\n"); - put_device(rc->dev.parent); - kfree(rc); -} - -int raid_component_add(struct raid_template *r,struct device *raid_dev, - struct device *component_dev) -{ - struct device *cdev = - attribute_container_find_class_device(&r->raid_attrs.ac, - raid_dev); - struct raid_component *rc; - struct raid_data *rd = dev_get_drvdata(cdev); - int err; - - rc = kzalloc(sizeof(*rc), GFP_KERNEL); - if (!rc) - return -ENOMEM; - - INIT_LIST_HEAD(&rc->node); - device_initialize(&rc->dev); - rc->dev.release = raid_component_release; - rc->dev.parent = get_device(component_dev); - rc->num = rd->component_count++; - - dev_set_name(&rc->dev, "component-%d", rc->num); - list_add_tail(&rc->node, &rd->component_list); - rc->dev.class = &raid_class.class; - err = device_add(&rc->dev); - if (err) - goto err_out; - - return 0; - -err_out: - put_device(&rc->dev); - list_del(&rc->node); - rd->component_count--; - put_device(component_dev); - kfree(rc); - return err; -} -EXPORT_SYMBOL(raid_component_add); - struct raid_template * raid_class_attach(struct raid_function_template *ft) { diff --git a/drivers/scsi/snic/snic_disc.c b/drivers/scsi/snic/snic_disc.c index cd27562ec922e..6c529b37f3b46 100644 --- a/drivers/scsi/snic/snic_disc.c +++ b/drivers/scsi/snic/snic_disc.c @@ -303,12 +303,11 @@ snic_tgt_create(struct snic *snic, struct snic_tgt_id *tgtid) "Snic Tgt: device_add, with err = %d\n", ret); - put_device(&tgt->dev); put_device(&snic->shost->shost_gendev); spin_lock_irqsave(snic->shost->host_lock, flags); list_del(&tgt->list); spin_unlock_irqrestore(snic->shost->host_lock, flags); - kfree(tgt); + put_device(&tgt->dev); tgt = NULL; return tgt; diff --git a/drivers/thunderbolt/tmu.c b/drivers/thunderbolt/tmu.c index 626aca3124b1c..d9544600b3867 100644 --- a/drivers/thunderbolt/tmu.c +++ b/drivers/thunderbolt/tmu.c @@ -415,7 +415,8 @@ int tb_switch_tmu_disable(struct tb_switch *sw) * uni-directional mode and we don't want to change it's TMU * mode. */ - tb_switch_tmu_rate_write(sw, TB_SWITCH_TMU_RATE_OFF); + ret = tb_switch_tmu_rate_write(sw, TB_SWITCH_TMU_RATE_OFF); + return ret; tb_port_tmu_time_sync_disable(up); ret = tb_port_tmu_time_sync_disable(down); diff --git a/drivers/video/aperture.c b/drivers/video/aperture.c index 5c94abdb1ad6d..3e4a1f55f51b3 100644 --- a/drivers/video/aperture.c +++ b/drivers/video/aperture.c @@ -298,14 +298,6 @@ int aperture_remove_conflicting_devices(resource_size_t base, resource_size_t si aperture_detach_devices(base, size); - /* - * If this is the primary adapter, there could be a VGA device - * that consumes the VGA framebuffer I/O range. Remove this device - * as well. - */ - if (primary) - aperture_detach_devices(VGA_FB_PHYS_BASE, VGA_FB_PHYS_SIZE); - return 0; } EXPORT_SYMBOL(aperture_remove_conflicting_devices); @@ -344,13 +336,22 @@ int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *na aperture_detach_devices(base, size); } - /* - * WARNING: Apparently we must kick fbdev drivers before vgacon, - * otherwise the vga fbdev driver falls over. - */ - ret = vga_remove_vgacon(pdev); - if (ret) - return ret; + if (primary) { + /* + * If this is the primary adapter, there could be a VGA device + * that consumes the VGA framebuffer I/O range. Remove this + * device as well. + */ + aperture_detach_devices(VGA_FB_PHYS_BASE, VGA_FB_PHYS_SIZE); + + /* + * WARNING: Apparently we must kick fbdev drivers before vgacon, + * otherwise the vga fbdev driver falls over. + */ + ret = vga_remove_vgacon(pdev); + if (ret) + return ret; + } return 0; diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c index 8b28c9bddd974..50c384ce28837 100644 --- a/drivers/video/fbdev/aty/radeon_base.c +++ b/drivers/video/fbdev/aty/radeon_base.c @@ -2238,14 +2238,6 @@ static const struct bin_attribute edid2_attr = { .read = radeon_show_edid2, }; -static int radeon_kick_out_firmware_fb(struct pci_dev *pdev) -{ - resource_size_t base = pci_resource_start(pdev, 0); - resource_size_t size = pci_resource_len(pdev, 0); - - return aperture_remove_conflicting_devices(base, size, false, KBUILD_MODNAME); -} - static int radeonfb_pci_register(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -2296,7 +2288,7 @@ static int radeonfb_pci_register(struct pci_dev *pdev, rinfo->fb_base_phys = pci_resource_start (pdev, 0); rinfo->mmio_base_phys = pci_resource_start (pdev, 2); - ret = radeon_kick_out_firmware_fb(pdev); + ret = aperture_remove_conflicting_pci_devices(pdev, KBUILD_MODNAME); if (ret) goto err_release_fb; diff --git a/fs/attr.c b/fs/attr.c index da2f90138c099..bc0896c7aa191 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -47,6 +47,7 @@ int setattr_should_drop_sgid(struct user_namespace *mnt_userns, return ATTR_KILL_SGID; return 0; } +EXPORT_SYMBOL(setattr_should_drop_sgid); /** * setattr_should_drop_suidgid - determine whether the set{g,u}id bit needs to diff --git a/fs/internal.h b/fs/internal.h index 46caa33373a48..42df013f7fe76 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -242,5 +242,3 @@ ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *po /* * fs/attr.c */ -int setattr_should_drop_sgid(struct user_namespace *mnt_userns, - const struct inode *inode); diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index c4e0da6db7195..9ec91017a7f3c 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -27,7 +27,7 @@ * * Called with j_list_lock held. */ -static inline void __buffer_unlink_first(struct journal_head *jh) +static inline void __buffer_unlink(struct journal_head *jh) { transaction_t *transaction = jh->b_cp_transaction; @@ -40,23 +40,6 @@ static inline void __buffer_unlink_first(struct journal_head *jh) } } -/* - * Unlink a buffer from a transaction checkpoint(io) list. - * - * Called with j_list_lock held. - */ -static inline void __buffer_unlink(struct journal_head *jh) -{ - transaction_t *transaction = jh->b_cp_transaction; - - __buffer_unlink_first(jh); - if (transaction->t_checkpoint_io_list == jh) { - transaction->t_checkpoint_io_list = jh->b_cpnext; - if (transaction->t_checkpoint_io_list == jh) - transaction->t_checkpoint_io_list = NULL; - } -} - /* * Check a checkpoint buffer could be release or not. * @@ -366,50 +349,10 @@ int jbd2_cleanup_journal_tail(journal_t *journal) /* Checkpoint list management */ -/* - * journal_clean_one_cp_list - * - * Find all the written-back checkpoint buffers in the given list and - * release them. If 'destroy' is set, clean all buffers unconditionally. - * - * Called with j_list_lock held. - * Returns 1 if we freed the transaction, 0 otherwise. - */ -static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) -{ - struct journal_head *last_jh; - struct journal_head *next_jh = jh; - - if (!jh) - return 0; - - last_jh = jh->b_cpprev; - do { - jh = next_jh; - next_jh = jh->b_cpnext; - - if (!destroy && __cp_buffer_busy(jh)) - return 0; - - if (__jbd2_journal_remove_checkpoint(jh)) - return 1; - /* - * This function only frees up some memory - * if possible so we dont have an obligation - * to finish processing. Bail out if preemption - * requested: - */ - if (need_resched()) - return 0; - } while (jh != last_jh); - - return 0; -} - /* * journal_shrink_one_cp_list * - * Find 'nr_to_scan' written-back checkpoint buffers in the given list + * Find all the written-back checkpoint buffers in the given list * and try to release them. If the whole transaction is released, set * the 'released' parameter. Return the number of released checkpointed * buffers. @@ -417,15 +360,15 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) * Called with j_list_lock held. */ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, - unsigned long *nr_to_scan, - bool *released) + bool destroy, bool *released) { struct journal_head *last_jh; struct journal_head *next_jh = jh; unsigned long nr_freed = 0; int ret; - if (!jh || *nr_to_scan == 0) + *released = false; + if (!jh) return 0; last_jh = jh->b_cpprev; @@ -433,12 +376,15 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, jh = next_jh; next_jh = jh->b_cpnext; - (*nr_to_scan)--; - if (__cp_buffer_busy(jh)) - continue; + if (destroy) { + ret = __jbd2_journal_remove_checkpoint(jh); + } else { + ret = jbd2_journal_try_remove_checkpoint(jh); + if (ret < 0) + continue; + } nr_freed++; - ret = __jbd2_journal_remove_checkpoint(jh); if (ret) { *released = true; break; @@ -446,7 +392,7 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, if (need_resched()) break; - } while (jh != last_jh && *nr_to_scan); + } while (jh != last_jh); return nr_freed; } @@ -464,11 +410,11 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan) { transaction_t *transaction, *last_transaction, *next_transaction; - bool released; + bool __maybe_unused released; tid_t first_tid = 0, last_tid = 0, next_tid = 0; tid_t tid = 0; unsigned long nr_freed = 0; - unsigned long nr_scanned = *nr_to_scan; + unsigned long freed; again: spin_lock(&journal->j_list_lock); @@ -497,19 +443,11 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, transaction = next_transaction; next_transaction = transaction->t_cpnext; tid = transaction->t_tid; - released = false; - nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list, - nr_to_scan, &released); - if (*nr_to_scan == 0) - break; - if (need_resched() || spin_needbreak(&journal->j_list_lock)) - break; - if (released) - continue; - - nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list, - nr_to_scan, &released); + freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list, + false, &released); + nr_freed += freed; + (*nr_to_scan) -= min(*nr_to_scan, freed); if (*nr_to_scan == 0) break; if (need_resched() || spin_needbreak(&journal->j_list_lock)) @@ -530,9 +468,8 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, if (*nr_to_scan && next_tid) goto again; out: - nr_scanned -= *nr_to_scan; trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid, - nr_freed, nr_scanned, next_tid); + nr_freed, next_tid); return nr_freed; } @@ -548,7 +485,7 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) { transaction_t *transaction, *last_transaction, *next_transaction; - int ret; + bool released; transaction = journal->j_checkpoint_transactions; if (!transaction) @@ -559,8 +496,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) do { transaction = next_transaction; next_transaction = transaction->t_cpnext; - ret = journal_clean_one_cp_list(transaction->t_checkpoint_list, - destroy); + journal_shrink_one_cp_list(transaction->t_checkpoint_list, + destroy, &released); /* * This function only frees up some memory if possible so we * dont have an obligation to finish processing. Bail out if @@ -568,23 +505,12 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) */ if (need_resched()) return; - if (ret) - continue; - /* - * It is essential that we are as careful as in the case of - * t_checkpoint_list with removing the buffer from the list as - * we can possibly see not yet submitted buffers on io_list - */ - ret = journal_clean_one_cp_list(transaction-> - t_checkpoint_io_list, destroy); - if (need_resched()) - return; /* * Stop scanning if we couldn't free the transaction. This * avoids pointless scanning of transactions which still * weren't checkpointed. */ - if (!ret) + if (!released) return; } while (transaction != last_transaction); } @@ -663,7 +589,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) jbd2_journal_put_journal_head(jh); /* Is this transaction empty? */ - if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list) + if (transaction->t_checkpoint_list) return 0; /* @@ -694,6 +620,34 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) return 1; } +/* + * Check the checkpoint buffer and try to remove it from the checkpoint + * list if it's clean. Returns -EBUSY if it is not clean, returns 1 if + * it frees the transaction, 0 otherwise. + * + * This function is called with j_list_lock held. + */ +int jbd2_journal_try_remove_checkpoint(struct journal_head *jh) +{ + struct buffer_head *bh = jh2bh(jh); + + if (!trylock_buffer(bh)) + return -EBUSY; + if (buffer_dirty(bh)) { + unlock_buffer(bh); + return -EBUSY; + } + unlock_buffer(bh); + + /* + * Buffer is clean and the IO has finished (we held the buffer + * lock) so the checkpoint is done. We can safely remove the + * buffer from this transaction. + */ + JBUFFER_TRACE(jh, "remove from checkpoint list"); + return __jbd2_journal_remove_checkpoint(jh); +} + /* * journal_insert_checkpoint: put a committed buffer onto a checkpoint * list so that we know when it is safe to clean the transaction out of @@ -755,7 +709,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact J_ASSERT(transaction->t_forget == NULL); J_ASSERT(transaction->t_shadow_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); - J_ASSERT(transaction->t_checkpoint_io_list == NULL); J_ASSERT(atomic_read(&transaction->t_updates) == 0); J_ASSERT(journal->j_committing_transaction != transaction); J_ASSERT(journal->j_running_transaction != transaction); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 885a7a6cc53e6..f1d9db6686e31 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -1171,8 +1171,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) spin_lock(&journal->j_list_lock); commit_transaction->t_state = T_FINISHED; /* Check if the transaction can be dropped now that we are finished */ - if (commit_transaction->t_checkpoint_list == NULL && - commit_transaction->t_checkpoint_io_list == NULL) { + if (commit_transaction->t_checkpoint_list == NULL) { __jbd2_journal_drop_transaction(journal, commit_transaction); jbd2_journal_free_transaction(commit_transaction); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 18611241f4513..6ef5022949c46 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1784,8 +1784,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh) * Otherwise, if the buffer has been written to disk, * it is safe to remove the checkpoint and drop it. */ - if (!buffer_dirty(bh)) { - __jbd2_journal_remove_checkpoint(jh); + if (jbd2_journal_try_remove_checkpoint(jh) >= 0) { spin_unlock(&journal->j_list_lock); goto drop; } @@ -2112,20 +2111,14 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) jh = bh2jh(bh); - if (buffer_locked(bh) || buffer_dirty(bh)) - goto out; - if (jh->b_next_transaction != NULL || jh->b_transaction != NULL) - goto out; + return; spin_lock(&journal->j_list_lock); - if (jh->b_cp_transaction != NULL) { - /* written-back checkpointed metadata buffer */ - JBUFFER_TRACE(jh, "remove from checkpoint list"); - __jbd2_journal_remove_checkpoint(jh); - } + /* Remove written-back checkpointed metadata buffer */ + if (jh->b_cp_transaction != NULL) + jbd2_journal_try_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); -out: return; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1707f46b1335c..cf34d0c309459 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -474,20 +474,26 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, return result; } -static void -nfs_direct_join_group(struct list_head *list, struct inode *inode) +static void nfs_direct_join_group(struct list_head *list, struct inode *inode) { - struct nfs_page *req, *next; + struct nfs_page *req, *subreq; list_for_each_entry(req, list, wb_list) { - if (req->wb_head != req || req->wb_this_page == req) + if (req->wb_head != req) continue; - for (next = req->wb_this_page; - next != req->wb_head; - next = next->wb_this_page) { - nfs_list_remove_request(next); - nfs_release_request(next); - } + subreq = req->wb_this_page; + if (subreq == req) + continue; + do { + /* + * Remove subrequests from this list before freeing + * them in the call to nfs_join_page_group(). + */ + if (!list_empty(&subreq->wb_list)) { + nfs_list_remove_request(subreq); + nfs_release_request(subreq); + } + } while ((subreq = subreq->wb_this_page) != req); nfs_join_page_group(req, inode); } } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6b2cfa59a1a2b..e0c1fb98f907a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -717,9 +717,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, if ((attr->ia_valid & ATTR_KILL_SUID) != 0 && inode->i_mode & S_ISUID) inode->i_mode &= ~S_ISUID; - if ((attr->ia_valid & ATTR_KILL_SGID) != 0 && - (inode->i_mode & (S_ISGID | S_IXGRP)) == - (S_ISGID | S_IXGRP)) + if (setattr_should_drop_sgid(&init_user_ns, inode)) inode->i_mode &= ~S_ISGID; if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ecb428512fe1a..7c33bba179d2f 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1359,7 +1359,6 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, for (i = 0; i < np; i++) { pages[i] = alloc_page(GFP_KERNEL); if (!pages[i]) { - np = i + 1; err = -ENOMEM; goto out; } @@ -1383,8 +1382,8 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, } while (exception.retry); out: - while (--np >= 0) - __free_page(pages[np]); + while (--i >= 0) + __free_page(pages[i]); kfree(pages); return err; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 177cb7b089b9a..1044305e77996 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5995,9 +5995,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, out_ok: ret = res.acl_len; out_free: - for (i = 0; i < npages; i++) - if (pages[i]) - __free_page(pages[i]); + while (--i >= 0) + __free_page(pages[i]); if (res.acl_scratch) __free_page(res.acl_scratch); kfree(pages); @@ -7171,8 +7170,15 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) goto out_restart; break; - case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: + if (data->arg.new_lock_owner != 0 && + nfs4_refresh_open_old_stateid(&data->arg.open_stateid, + lsp->ls_state)) + goto out_restart; + if (nfs4_refresh_lock_old_stateid(&data->arg.lock_stateid, lsp)) + goto out_restart; + fallthrough; + case -NFS4ERR_BAD_STATEID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: if (data->arg.new_lock_owner != 0) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c5dc0cd6f7031..96714e105d7bf 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1368,9 +1368,9 @@ static void revoke_delegation(struct nfs4_delegation *dp) WARN_ON(!list_empty(&dp->dl_recall_lru)); if (clp->cl_minorversion) { + spin_lock(&clp->cl_lock); dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; refcount_inc(&dp->dl_stid.sc_count); - spin_lock(&clp->cl_lock); list_add(&dp->dl_recall_lru, &clp->cl_revoked); spin_unlock(&clp->cl_lock); } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 5c69dce742ae9..ae888ce2b247e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -321,7 +321,9 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) iap->ia_mode &= ~S_ISGID; } else { /* set ATTR_KILL_* bits and let VFS handle it */ - iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); + iap->ia_valid |= ATTR_KILL_SUID; + iap->ia_valid |= + setattr_should_drop_sgid(&init_user_ns, inode); } } } diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h index 05f2cc03d03d9..b235d6833e27d 100644 --- a/include/drm/display/drm_dp.h +++ b/include/drm/display/drm_dp.h @@ -1525,7 +1525,7 @@ enum drm_dp_phy { #define DP_BRANCH_OUI_HEADER_SIZE 0xc #define DP_RECEIVER_CAP_SIZE 0xf -#define DP_DSC_RECEIVER_CAP_SIZE 0xf +#define DP_DSC_RECEIVER_CAP_SIZE 0x10 /* DSC Capabilities 0x60 through 0x6F */ #define EDP_PSR_RECEIVER_CAP_SIZE 2 #define EDP_DISPLAY_CTL_CAP_SIZE 3 #define DP_LTTPR_COMMON_CAP_SIZE 8 diff --git a/include/drm/drm_aperture.h b/include/drm/drm_aperture.h index 7096703c39493..cbe33b49fd5dc 100644 --- a/include/drm/drm_aperture.h +++ b/include/drm/drm_aperture.h @@ -13,14 +13,13 @@ int devm_aperture_acquire_from_firmware(struct drm_device *dev, resource_size_t resource_size_t size); int drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size, - bool primary, const struct drm_driver *req_driver); + const struct drm_driver *req_driver); int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const struct drm_driver *req_driver); /** * drm_aperture_remove_framebuffers - remove all existing framebuffers - * @primary: also kick vga16fb if present * @req_driver: requesting DRM driver * * This function removes all graphics device drivers. Use this function on systems @@ -30,9 +29,9 @@ int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, * 0 on success, or a negative errno code otherwise */ static inline int -drm_aperture_remove_framebuffers(bool primary, const struct drm_driver *req_driver) +drm_aperture_remove_framebuffers(const struct drm_driver *req_driver) { - return drm_aperture_remove_conflicting_framebuffers(0, (resource_size_t)-1, primary, + return drm_aperture_remove_conflicting_framebuffers(0, (resource_size_t)-1, req_driver); } diff --git a/include/linux/clk.h b/include/linux/clk.h index 1ef0133242374..06f1b292f8a00 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -183,6 +183,39 @@ int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale); */ bool clk_is_match(const struct clk *p, const struct clk *q); +/** + * clk_rate_exclusive_get - get exclusivity over the rate control of a + * producer + * @clk: clock source + * + * This function allows drivers to get exclusive control over the rate of a + * provider. It prevents any other consumer to execute, even indirectly, + * opereation which could alter the rate of the provider or cause glitches + * + * If exlusivity is claimed more than once on clock, even by the same driver, + * the rate effectively gets locked as exclusivity can't be preempted. + * + * Must not be called from within atomic context. + * + * Returns success (0) or negative errno. + */ +int clk_rate_exclusive_get(struct clk *clk); + +/** + * clk_rate_exclusive_put - release exclusivity over the rate control of a + * producer + * @clk: clock source + * + * This function allows drivers to release the exclusivity it previously got + * from clk_rate_exclusive_get() + * + * The caller must balance the number of clk_rate_exclusive_get() and + * clk_rate_exclusive_put() calls. + * + * Must not be called from within atomic context. + */ +void clk_rate_exclusive_put(struct clk *clk); + #else static inline int clk_notifier_register(struct clk *clk, @@ -236,6 +269,13 @@ static inline bool clk_is_match(const struct clk *p, const struct clk *q) return p == q; } +static inline int clk_rate_exclusive_get(struct clk *clk) +{ + return 0; +} + +static inline void clk_rate_exclusive_put(struct clk *clk) {} + #endif #ifdef CONFIG_HAVE_CLK_PREPARE @@ -583,38 +623,6 @@ struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id); */ struct clk *devm_get_clk_from_child(struct device *dev, struct device_node *np, const char *con_id); -/** - * clk_rate_exclusive_get - get exclusivity over the rate control of a - * producer - * @clk: clock source - * - * This function allows drivers to get exclusive control over the rate of a - * provider. It prevents any other consumer to execute, even indirectly, - * opereation which could alter the rate of the provider or cause glitches - * - * If exlusivity is claimed more than once on clock, even by the same driver, - * the rate effectively gets locked as exclusivity can't be preempted. - * - * Must not be called from within atomic context. - * - * Returns success (0) or negative errno. - */ -int clk_rate_exclusive_get(struct clk *clk); - -/** - * clk_rate_exclusive_put - release exclusivity over the rate control of a - * producer - * @clk: clock source - * - * This function allows drivers to release the exclusivity it previously got - * from clk_rate_exclusive_get() - * - * The caller must balance the number of clk_rate_exclusive_get() and - * clk_rate_exclusive_put() calls. - * - * Must not be called from within atomic context. - */ -void clk_rate_exclusive_put(struct clk *clk); /** * clk_enable - inform the system when the clock source should be running. @@ -974,14 +982,6 @@ static inline void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks) {} static inline void devm_clk_put(struct device *dev, struct clk *clk) {} - -static inline int clk_rate_exclusive_get(struct clk *clk) -{ - return 0; -} - -static inline void clk_rate_exclusive_put(struct clk *clk) {} - static inline int clk_enable(struct clk *clk) { return 0; diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index d58e0476ee8e3..0348dba5680ef 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -71,8 +71,10 @@ extern void cpuset_init_smp(void); extern void cpuset_force_rebuild(void); extern void cpuset_update_active_cpus(void); extern void cpuset_wait_for_hotplug(void); -extern void cpuset_read_lock(void); -extern void cpuset_read_unlock(void); +extern void inc_dl_tasks_cs(struct task_struct *task); +extern void dec_dl_tasks_cs(struct task_struct *task); +extern void cpuset_lock(void); +extern void cpuset_unlock(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern bool cpuset_cpus_allowed_fallback(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); @@ -196,8 +198,10 @@ static inline void cpuset_update_active_cpus(void) static inline void cpuset_wait_for_hotplug(void) { } -static inline void cpuset_read_lock(void) { } -static inline void cpuset_read_unlock(void) { } +static inline void inc_dl_tasks_cs(struct task_struct *task) { } +static inline void dec_dl_tasks_cs(struct task_struct *task) { } +static inline void cpuset_lock(void) { } +static inline void cpuset_unlock(void) { } static inline void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask) diff --git a/include/linux/fs.h b/include/linux/fs.h index 12e9a61238cfe..aa66fa13594d7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3146,6 +3146,8 @@ extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct user_namespace *, struct inode *); extern int file_remove_privs(struct file *); +int setattr_should_drop_sgid(struct user_namespace *mnt_userns, + const struct inode *inode); /* * This must be used for allocating filesystems specific inodes to set diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0b7242370b567..ebb1608d9dcd2 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -622,12 +622,6 @@ struct transaction_s */ struct journal_head *t_checkpoint_list; - /* - * Doubly-linked circular list of all buffers submitted for IO while - * checkpointing. [j_list_lock] - */ - struct journal_head *t_checkpoint_io_list; - /* * Doubly-linked circular list of metadata buffers being * shadowed by log IO. The IO buffers on the iobuf list and @@ -1441,6 +1435,7 @@ extern void jbd2_journal_commit_transaction(journal_t *); void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy); unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan); int __jbd2_journal_remove_checkpoint(struct journal_head *); +int jbd2_journal_try_remove_checkpoint(struct journal_head *jh); void jbd2_journal_destroy_checkpoint(journal_t *journal); void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); diff --git a/include/linux/mm.h b/include/linux/mm.h index b8ed44f401b58..104ec00823da8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1727,6 +1727,25 @@ static inline size_t folio_size(struct folio *folio) return PAGE_SIZE << folio_order(folio); } +/** + * folio_estimated_sharers - Estimate the number of sharers of a folio. + * @folio: The folio. + * + * folio_estimated_sharers() aims to serve as a function to efficiently + * estimate the number of processes sharing a folio. This is done by + * looking at the precise mapcount of the first subpage in the folio, and + * assuming the other subpages are the same. This may not be true for large + * folios. If you want exact mapcounts for exact calculations, look at + * page_mapcount() or folio_total_mapcount(). + * + * Return: The estimated number of processes sharing a folio. + */ +static inline int folio_estimated_sharers(struct folio *folio) +{ + return page_mapcount(folio_page(folio, 0)); +} + + #ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE static inline int arch_make_page_accessible(struct page *page) { @@ -3091,6 +3110,16 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page) if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) smp_rmb(); + /* + * During GUP-fast we might not get called on the head page for a + * hugetlb page that is mapped using cont-PTE, because GUP-fast does + * not work with the abstracted hugetlb PTEs that always point at the + * head page. For hugetlb, PageAnonExclusive only applies on the head + * page (as it cannot be partially COW-shared), so lookup the head page. + */ + if (unlikely(!PageHead(page) && PageHuge(page))) + page = compound_head(page); + /* * Note that PageKsm() pages cannot be exclusive, and consequently, * cannot get pinned. diff --git a/include/linux/raid_class.h b/include/linux/raid_class.h index 5cdfcb873a8f0..772d45b2a60a0 100644 --- a/include/linux/raid_class.h +++ b/include/linux/raid_class.h @@ -77,7 +77,3 @@ DEFINE_RAID_ATTRIBUTE(enum raid_state, state) struct raid_template *raid_class_attach(struct raid_function_template *); void raid_class_release(struct raid_template *); - -int __must_check raid_component_add(struct raid_template *, struct device *, - struct device *); - diff --git a/include/linux/sched.h b/include/linux/sched.h index ffb6eb55cd135..0cac69902ec58 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1846,7 +1846,9 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags) } extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); -extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_effective_cpus); +extern int task_can_attach(struct task_struct *p); +extern int dl_bw_alloc(int cpu, u64 dl_bw); +extern void dl_bw_free(int cpu, u64 dl_bw); #ifdef CONFIG_SMP extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); diff --git a/include/net/bonding.h b/include/net/bonding.h index 17329a19f0c64..9a3ac960dfe15 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -727,23 +727,14 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond, } /* Caller must hold rcu_read_lock() for read */ -static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac) +static inline bool bond_slave_has_mac_rcu(struct bonding *bond, const u8 *mac) { struct list_head *iter; struct slave *tmp; - struct netdev_hw_addr *ha; bond_for_each_slave_rcu(bond, tmp, iter) if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) return true; - - if (netdev_uc_empty(bond->dev)) - return false; - - netdev_for_each_uc_addr(ha, bond->dev) - if (ether_addr_equal_64bits(mac, ha->addr)) - return true; - return false; } diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index c8ef3b881f03d..c2432c2addc82 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -222,8 +222,8 @@ struct inet_sock { __s16 uc_ttl; __u16 cmsg_flags; struct ip_options_rcu __rcu *inet_opt; + atomic_t inet_id; __be16 inet_sport; - __u16 inet_id; __u8 tos; __u8 min_ttl; diff --git a/include/net/ip.h b/include/net/ip.h index 530e7257e4389..1872f570abeda 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -532,8 +532,19 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, * generator as much as we can. */ if (sk && inet_sk(sk)->inet_daddr) { - iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += segs; + int val; + + /* avoid atomic operations for TCP, + * as we hold socket lock at this point. + */ + if (sk_is_tcp(sk)) { + sock_owned_by_me(sk); + val = atomic_read(&inet_sk(sk)->inet_id); + atomic_set(&inet_sk(sk)->inet_id, val + segs); + } else { + val = atomic_add_return(segs, &inet_sk(sk)->inet_id); + } + iph->id = htons(val); return; } if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 72b739dc6d530..8a338c33118f9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6444,6 +6444,7 @@ void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap, * marks frames marked in the bitmap as having been filtered. Afterwards, it * checks if any frames in the window starting from @ssn can now be released * (in case they were only waiting for frames that were filtered.) + * (Only work correctly if @max_rx_aggregation_subframes <= 64 frames) */ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, u16 ssn, u64 filtered, diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index bf8bb33578250..9f881b74f32ed 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -189,8 +189,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, int rtnl_delete_link(struct net_device *dev); int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm); -int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, - struct netlink_ext_ack *exterr); +int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer, + struct netlink_ext_ack *exterr); struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid); #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) diff --git a/include/net/sock.h b/include/net/sock.h index 699408944952c..d1f936ed97556 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1320,6 +1320,7 @@ struct proto { /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. + * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes. * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ @@ -1448,7 +1449,7 @@ static inline bool sk_has_memory_pressure(const struct sock *sk) static inline bool sk_under_global_memory_pressure(const struct sock *sk) { return sk->sk_prot->memory_pressure && - !!*sk->sk_prot->memory_pressure; + !!READ_ONCE(*sk->sk_prot->memory_pressure); } static inline bool sk_under_memory_pressure(const struct sock *sk) @@ -1460,7 +1461,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; - return !!*sk->sk_prot->memory_pressure; + return !!READ_ONCE(*sk->sk_prot->memory_pressure); } static inline long @@ -1537,7 +1538,7 @@ proto_memory_pressure(struct proto *prot) { if (!prot->memory_pressure) return false; - return !!*prot->memory_pressure; + return !!READ_ONCE(*prot->memory_pressure); } diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 8f5ee380d3093..5646ae15a957a 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -462,11 +462,9 @@ TRACE_EVENT(jbd2_shrink_scan_exit, TRACE_EVENT(jbd2_shrink_checkpoint_list, TP_PROTO(journal_t *journal, tid_t first_tid, tid_t tid, tid_t last_tid, - unsigned long nr_freed, unsigned long nr_scanned, - tid_t next_tid), + unsigned long nr_freed, tid_t next_tid), - TP_ARGS(journal, first_tid, tid, last_tid, nr_freed, - nr_scanned, next_tid), + TP_ARGS(journal, first_tid, tid, last_tid, nr_freed, next_tid), TP_STRUCT__entry( __field(dev_t, dev) @@ -474,7 +472,6 @@ TRACE_EVENT(jbd2_shrink_checkpoint_list, __field(tid_t, tid) __field(tid_t, last_tid) __field(unsigned long, nr_freed) - __field(unsigned long, nr_scanned) __field(tid_t, next_tid) ), @@ -484,15 +481,14 @@ TRACE_EVENT(jbd2_shrink_checkpoint_list, __entry->tid = tid; __entry->last_tid = last_tid; __entry->nr_freed = nr_freed; - __entry->nr_scanned = nr_scanned; __entry->next_tid = next_tid; ), TP_printk("dev %d,%d shrink transaction %u-%u(%u) freed %lu " - "scanned %lu next transaction %u", + "next transaction %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->first_tid, __entry->tid, __entry->last_tid, - __entry->nr_freed, __entry->nr_scanned, __entry->next_tid) + __entry->nr_freed, __entry->next_tid) ); #endif /* _TRACE_JBD2_H */ diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 3526389ac2180..cd922d2bef5f5 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -15,6 +15,7 @@ struct io_msg { struct file *file; + struct file *src_file; u64 user_data; u32 len; u32 cmd; @@ -23,33 +24,12 @@ struct io_msg { u32 flags; }; -static int io_msg_ring_data(struct io_kiocb *req) +static void io_double_unlock_ctx(struct io_ring_ctx *octx) { - struct io_ring_ctx *target_ctx = req->file->private_data; - struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); - - if (msg->src_fd || msg->dst_fd || msg->flags) - return -EINVAL; - if (target_ctx->flags & IORING_SETUP_R_DISABLED) - return -EBADFD; - - if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0, true)) - return 0; - - return -EOVERFLOW; -} - -static void io_double_unlock_ctx(struct io_ring_ctx *ctx, - struct io_ring_ctx *octx, - unsigned int issue_flags) -{ - if (issue_flags & IO_URING_F_UNLOCKED) - mutex_unlock(&ctx->uring_lock); mutex_unlock(&octx->uring_lock); } -static int io_double_lock_ctx(struct io_ring_ctx *ctx, - struct io_ring_ctx *octx, +static int io_double_lock_ctx(struct io_ring_ctx *octx, unsigned int issue_flags) { /* @@ -62,60 +42,86 @@ static int io_double_lock_ctx(struct io_ring_ctx *ctx, return -EAGAIN; return 0; } + mutex_lock(&octx->uring_lock); + return 0; +} - /* Always grab smallest value ctx first. We know ctx != octx. */ - if (ctx < octx) { - mutex_lock(&ctx->uring_lock); - mutex_lock(&octx->uring_lock); - } else { - mutex_lock(&octx->uring_lock); - mutex_lock(&ctx->uring_lock); - } +void io_msg_ring_cleanup(struct io_kiocb *req) +{ + struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); - return 0; + if (WARN_ON_ONCE(!msg->src_file)) + return; + + fput(msg->src_file); + msg->src_file = NULL; } -static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) +static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags) { struct io_ring_ctx *target_ctx = req->file->private_data; struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); - struct io_ring_ctx *ctx = req->ctx; - unsigned long file_ptr; - struct file *src_file; int ret; - if (msg->len) - return -EINVAL; - if (target_ctx == ctx) + if (msg->src_fd || msg->dst_fd || msg->flags) return -EINVAL; if (target_ctx->flags & IORING_SETUP_R_DISABLED) return -EBADFD; - ret = io_double_lock_ctx(ctx, target_ctx, issue_flags); - if (unlikely(ret)) - return ret; + ret = -EOVERFLOW; + if (target_ctx->flags & IORING_SETUP_IOPOLL) { + if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) + return -EAGAIN; + if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0, true)) + ret = 0; + io_double_unlock_ctx(target_ctx); + } else { + if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0, true)) + ret = 0; + } - ret = -EBADF; - if (unlikely(msg->src_fd >= ctx->nr_user_files)) - goto out_unlock; + return ret; +} - msg->src_fd = array_index_nospec(msg->src_fd, ctx->nr_user_files); - file_ptr = io_fixed_file_slot(&ctx->file_table, msg->src_fd)->file_ptr; - if (!file_ptr) - goto out_unlock; +static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); + struct io_ring_ctx *ctx = req->ctx; + struct file *file = NULL; + unsigned long file_ptr; + int idx = msg->src_fd; + + io_ring_submit_lock(ctx, issue_flags); + if (likely(idx < ctx->nr_user_files)) { + idx = array_index_nospec(idx, ctx->nr_user_files); + file_ptr = io_fixed_file_slot(&ctx->file_table, idx)->file_ptr; + file = (struct file *) (file_ptr & FFS_MASK); + if (file) + get_file(file); + } + io_ring_submit_unlock(ctx, issue_flags); + return file; +} - src_file = (struct file *) (file_ptr & FFS_MASK); - get_file(src_file); +static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_ring_ctx *target_ctx = req->file->private_data; + struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); + struct file *src_file = msg->src_file; + int ret; + + if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) + return -EAGAIN; ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd); - if (ret < 0) { - fput(src_file); + if (ret < 0) goto out_unlock; - } + + msg->src_file = NULL; + req->flags &= ~REQ_F_NEED_CLEANUP; if (msg->flags & IORING_MSG_RING_CQE_SKIP) goto out_unlock; - /* * If this fails, the target still received the file descriptor but * wasn't notified of the fact. This means that if this request @@ -125,10 +131,29 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0, true)) ret = -EOVERFLOW; out_unlock: - io_double_unlock_ctx(ctx, target_ctx, issue_flags); + io_double_unlock_ctx(target_ctx); return ret; } +static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_ring_ctx *target_ctx = req->file->private_data; + struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); + struct io_ring_ctx *ctx = req->ctx; + struct file *src_file = msg->src_file; + + if (target_ctx == ctx) + return -EINVAL; + if (!src_file) { + src_file = io_msg_grab_file(req, issue_flags); + if (!src_file) + return -EBADF; + msg->src_file = src_file; + req->flags |= REQ_F_NEED_CLEANUP; + } + return io_msg_install_complete(req, issue_flags); +} + int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); @@ -136,6 +161,7 @@ int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(sqe->buf_index || sqe->personality)) return -EINVAL; + msg->src_file = NULL; msg->user_data = READ_ONCE(sqe->off); msg->len = READ_ONCE(sqe->len); msg->cmd = READ_ONCE(sqe->addr); @@ -159,7 +185,7 @@ int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) switch (msg->cmd) { case IORING_MSG_DATA: - ret = io_msg_ring_data(req); + ret = io_msg_ring_data(req, issue_flags); break; case IORING_MSG_SEND_FD: ret = io_msg_send_fd(req, issue_flags); diff --git a/io_uring/msg_ring.h b/io_uring/msg_ring.h index fb9601f202d07..3987ee6c0e5f1 100644 --- a/io_uring/msg_ring.h +++ b/io_uring/msg_ring.h @@ -2,3 +2,4 @@ int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags); +void io_msg_ring_cleanup(struct io_kiocb *req); diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 04dd2c983fce4..3aa0d65c50e34 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -445,6 +445,7 @@ const struct io_op_def io_op_defs[] = { .name = "MSG_RING", .prep = io_msg_ring_prep, .issue = io_msg_ring, + .cleanup = io_msg_ring_cleanup, }, [IORING_OP_FSETXATTR] = { .needs_file = 1, diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 73f11e4db3a4d..97ecca43386d9 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -6681,6 +6682,9 @@ void cgroup_exit(struct task_struct *tsk) list_add_tail(&tsk->cg_list, &cset->dying_tasks); cset->nr_tasks--; + if (dl_task(tsk)) + dec_dl_tasks_cs(tsk); + WARN_ON_ONCE(cgroup_task_frozen(tsk)); if (unlikely(!(tsk->flags & PF_KTHREAD) && test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags))) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index e276db7228451..db3e05b6b4dd2 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -193,6 +193,14 @@ struct cpuset { int use_parent_ecpus; int child_ecpus_count; + /* + * number of SCHED_DEADLINE tasks attached to this cpuset, so that we + * know when to rebuild associated root domain bandwidth information. + */ + int nr_deadline_tasks; + int nr_migrate_dl_tasks; + u64 sum_migrate_dl_bw; + /* Invalid partition error code, not lock protected */ enum prs_errcode prs_err; @@ -245,6 +253,20 @@ static inline struct cpuset *parent_cs(struct cpuset *cs) return css_cs(cs->css.parent); } +void inc_dl_tasks_cs(struct task_struct *p) +{ + struct cpuset *cs = task_cs(p); + + cs->nr_deadline_tasks++; +} + +void dec_dl_tasks_cs(struct task_struct *p) +{ + struct cpuset *cs = task_cs(p); + + cs->nr_deadline_tasks--; +} + /* bits in struct cpuset flags field */ typedef enum { CS_ONLINE, @@ -366,22 +388,23 @@ static struct cpuset top_cpuset = { if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) /* - * There are two global locks guarding cpuset structures - cpuset_rwsem and + * There are two global locks guarding cpuset structures - cpuset_mutex and * callback_lock. We also require taking task_lock() when dereferencing a * task's cpuset pointer. See "The task_lock() exception", at the end of this - * comment. The cpuset code uses only cpuset_rwsem write lock. Other - * kernel subsystems can use cpuset_read_lock()/cpuset_read_unlock() to - * prevent change to cpuset structures. + * comment. The cpuset code uses only cpuset_mutex. Other kernel subsystems + * can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset + * structures. Note that cpuset_mutex needs to be a mutex as it is used in + * paths that rely on priority inheritance (e.g. scheduler - on RT) for + * correctness. * * A task must hold both locks to modify cpusets. If a task holds - * cpuset_rwsem, it blocks others wanting that rwsem, ensuring that it - * is the only task able to also acquire callback_lock and be able to - * modify cpusets. It can perform various checks on the cpuset structure - * first, knowing nothing will change. It can also allocate memory while - * just holding cpuset_rwsem. While it is performing these checks, various - * callback routines can briefly acquire callback_lock to query cpusets. - * Once it is ready to make the changes, it takes callback_lock, blocking - * everyone else. + * cpuset_mutex, it blocks others, ensuring that it is the only task able to + * also acquire callback_lock and be able to modify cpusets. It can perform + * various checks on the cpuset structure first, knowing nothing will change. + * It can also allocate memory while just holding cpuset_mutex. While it is + * performing these checks, various callback routines can briefly acquire + * callback_lock to query cpusets. Once it is ready to make the changes, it + * takes callback_lock, blocking everyone else. * * Calls to the kernel memory allocator can not be made while holding * callback_lock, as that would risk double tripping on callback_lock @@ -403,16 +426,16 @@ static struct cpuset top_cpuset = { * guidelines for accessing subsystem state in kernel/cgroup.c */ -DEFINE_STATIC_PERCPU_RWSEM(cpuset_rwsem); +static DEFINE_MUTEX(cpuset_mutex); -void cpuset_read_lock(void) +void cpuset_lock(void) { - percpu_down_read(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); } -void cpuset_read_unlock(void) +void cpuset_unlock(void) { - percpu_up_read(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); } static DEFINE_SPINLOCK(callback_lock); @@ -496,7 +519,7 @@ static inline bool partition_is_populated(struct cpuset *cs, * One way or another, we guarantee to return some non-empty subset * of cpu_online_mask. * - * Call with callback_lock or cpuset_rwsem held. + * Call with callback_lock or cpuset_mutex held. */ static void guarantee_online_cpus(struct task_struct *tsk, struct cpumask *pmask) @@ -538,7 +561,7 @@ static void guarantee_online_cpus(struct task_struct *tsk, * One way or another, we guarantee to return some non-empty subset * of node_states[N_MEMORY]. * - * Call with callback_lock or cpuset_rwsem held. + * Call with callback_lock or cpuset_mutex held. */ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) { @@ -550,7 +573,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) /* * update task's spread flag if cpuset's page/slab spread flag is set * - * Call with callback_lock or cpuset_rwsem held. The check can be skipped + * Call with callback_lock or cpuset_mutex held. The check can be skipped * if on default hierarchy. */ static void cpuset_update_task_spread_flags(struct cpuset *cs, @@ -575,7 +598,7 @@ static void cpuset_update_task_spread_flags(struct cpuset *cs, * * One cpuset is a subset of another if all its allowed CPUs and * Memory Nodes are a subset of the other, and its exclusive flags - * are only set if the other's are set. Call holding cpuset_rwsem. + * are only set if the other's are set. Call holding cpuset_mutex. */ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) @@ -713,7 +736,7 @@ static int validate_change_legacy(struct cpuset *cur, struct cpuset *trial) * If we replaced the flag and mask values of the current cpuset * (cur) with those values in the trial cpuset (trial), would * our various subset and exclusive rules still be valid? Presumes - * cpuset_rwsem held. + * cpuset_mutex held. * * 'cur' is the address of an actual, in-use cpuset. Operations * such as list traversal that depend on the actual address of the @@ -829,7 +852,7 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr, rcu_read_unlock(); } -/* Must be called with cpuset_rwsem held. */ +/* Must be called with cpuset_mutex held. */ static inline int nr_cpusets(void) { /* jump label reference count + the top-level cpuset */ @@ -855,7 +878,7 @@ static inline int nr_cpusets(void) * domains when operating in the severe memory shortage situations * that could cause allocation failures below. * - * Must be called with cpuset_rwsem held. + * Must be called with cpuset_mutex held. * * The three key local variables below are: * cp - cpuset pointer, used (together with pos_css) to perform a @@ -1066,11 +1089,14 @@ static int generate_sched_domains(cpumask_var_t **domains, return ndoms; } -static void update_tasks_root_domain(struct cpuset *cs) +static void dl_update_tasks_root_domain(struct cpuset *cs) { struct css_task_iter it; struct task_struct *task; + if (cs->nr_deadline_tasks == 0) + return; + css_task_iter_start(&cs->css, 0, &it); while ((task = css_task_iter_next(&it))) @@ -1079,12 +1105,12 @@ static void update_tasks_root_domain(struct cpuset *cs) css_task_iter_end(&it); } -static void rebuild_root_domains(void) +static void dl_rebuild_rd_accounting(void) { struct cpuset *cs = NULL; struct cgroup_subsys_state *pos_css; - percpu_rwsem_assert_held(&cpuset_rwsem); + lockdep_assert_held(&cpuset_mutex); lockdep_assert_cpus_held(); lockdep_assert_held(&sched_domains_mutex); @@ -1107,7 +1133,7 @@ static void rebuild_root_domains(void) rcu_read_unlock(); - update_tasks_root_domain(cs); + dl_update_tasks_root_domain(cs); rcu_read_lock(); css_put(&cs->css); @@ -1121,7 +1147,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[], { mutex_lock(&sched_domains_mutex); partition_sched_domains_locked(ndoms_new, doms_new, dattr_new); - rebuild_root_domains(); + dl_rebuild_rd_accounting(); mutex_unlock(&sched_domains_mutex); } @@ -1134,7 +1160,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[], * 'cpus' is removed, then call this routine to rebuild the * scheduler's dynamic sched domains. * - * Call with cpuset_rwsem held. Takes cpus_read_lock(). + * Call with cpuset_mutex held. Takes cpus_read_lock(). */ static void rebuild_sched_domains_locked(void) { @@ -1145,7 +1171,7 @@ static void rebuild_sched_domains_locked(void) int ndoms; lockdep_assert_cpus_held(); - percpu_rwsem_assert_held(&cpuset_rwsem); + lockdep_assert_held(&cpuset_mutex); /* * If we have raced with CPU hotplug, return early to avoid @@ -1196,9 +1222,9 @@ static void rebuild_sched_domains_locked(void) void rebuild_sched_domains(void) { cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); rebuild_sched_domains_locked(); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); } @@ -1208,7 +1234,7 @@ void rebuild_sched_domains(void) * @new_cpus: the temp variable for the new effective_cpus mask * * Iterate through each task of @cs updating its cpus_allowed to the - * effective cpuset's. As this function is called with cpuset_rwsem held, + * effective cpuset's. As this function is called with cpuset_mutex held, * cpuset membership stays stable. */ static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus) @@ -1317,7 +1343,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, int old_prs, new_prs; int part_error = PERR_NONE; /* Partition error? */ - percpu_rwsem_assert_held(&cpuset_rwsem); + lockdep_assert_held(&cpuset_mutex); /* * The parent must be a partition root. @@ -1540,7 +1566,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, * * On legacy hierarchy, effective_cpus will be the same with cpu_allowed. * - * Called with cpuset_rwsem held + * Called with cpuset_mutex held */ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, bool force) @@ -1700,7 +1726,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, struct cpuset *sibling; struct cgroup_subsys_state *pos_css; - percpu_rwsem_assert_held(&cpuset_rwsem); + lockdep_assert_held(&cpuset_mutex); /* * Check all its siblings and call update_cpumasks_hier() @@ -1950,12 +1976,12 @@ static void *cpuset_being_rebound; * @cs: the cpuset in which each task's mems_allowed mask needs to be changed * * Iterate through each task of @cs updating its mems_allowed to the - * effective cpuset's. As this function is called with cpuset_rwsem held, + * effective cpuset's. As this function is called with cpuset_mutex held, * cpuset membership stays stable. */ static void update_tasks_nodemask(struct cpuset *cs) { - static nodemask_t newmems; /* protected by cpuset_rwsem */ + static nodemask_t newmems; /* protected by cpuset_mutex */ struct css_task_iter it; struct task_struct *task; @@ -1968,7 +1994,7 @@ static void update_tasks_nodemask(struct cpuset *cs) * take while holding tasklist_lock. Forks can happen - the * mpol_dup() cpuset_being_rebound check will catch such forks, * and rebind their vma mempolicies too. Because we still hold - * the global cpuset_rwsem, we know that no other rebind effort + * the global cpuset_mutex, we know that no other rebind effort * will be contending for the global variable cpuset_being_rebound. * It's ok if we rebind the same mm twice; mpol_rebind_mm() * is idempotent. Also migrate pages in each mm to new nodes. @@ -2014,7 +2040,7 @@ static void update_tasks_nodemask(struct cpuset *cs) * * On legacy hierarchy, effective_mems will be the same with mems_allowed. * - * Called with cpuset_rwsem held + * Called with cpuset_mutex held */ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) { @@ -2067,7 +2093,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) * mempolicies and if the cpuset is marked 'memory_migrate', * migrate the tasks pages to the new memory. * - * Call with cpuset_rwsem held. May take callback_lock during call. + * Call with cpuset_mutex held. May take callback_lock during call. * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, * lock each such tasks mm->mmap_lock, scan its vma's and rebind * their mempolicies to the cpusets new mems_allowed. @@ -2159,7 +2185,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) * @cs: the cpuset in which each task's spread flags needs to be changed * * Iterate through each task of @cs updating its spread flags. As this - * function is called with cpuset_rwsem held, cpuset membership stays + * function is called with cpuset_mutex held, cpuset membership stays * stable. */ static void update_tasks_flags(struct cpuset *cs) @@ -2179,7 +2205,7 @@ static void update_tasks_flags(struct cpuset *cs) * cs: the cpuset to update * turning_on: whether the flag is being set or cleared * - * Call with cpuset_rwsem held. + * Call with cpuset_mutex held. */ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, @@ -2229,7 +2255,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, * @new_prs: new partition root state * Return: 0 if successful, != 0 if error * - * Call with cpuset_rwsem held. + * Call with cpuset_mutex held. */ static int update_prstate(struct cpuset *cs, int new_prs) { @@ -2467,19 +2493,26 @@ static int cpuset_can_attach_check(struct cpuset *cs) return 0; } -/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */ +static void reset_migrate_dl_data(struct cpuset *cs) +{ + cs->nr_migrate_dl_tasks = 0; + cs->sum_migrate_dl_bw = 0; +} + +/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */ static int cpuset_can_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; - struct cpuset *cs; + struct cpuset *cs, *oldcs; struct task_struct *task; int ret; /* used later by cpuset_attach() */ cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css)); + oldcs = cpuset_attach_old_cs; cs = css_cs(css); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); /* Check to see if task is allowed in the cpuset */ ret = cpuset_can_attach_check(cs); @@ -2487,21 +2520,46 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) goto out_unlock; cgroup_taskset_for_each(task, css, tset) { - ret = task_can_attach(task, cs->effective_cpus); + ret = task_can_attach(task); if (ret) goto out_unlock; ret = security_task_setscheduler(task); if (ret) goto out_unlock; + + if (dl_task(task)) { + cs->nr_migrate_dl_tasks++; + cs->sum_migrate_dl_bw += task->dl.dl_bw; + } } + if (!cs->nr_migrate_dl_tasks) + goto out_success; + + if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) { + int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus); + + if (unlikely(cpu >= nr_cpu_ids)) { + reset_migrate_dl_data(cs); + ret = -EINVAL; + goto out_unlock; + } + + ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw); + if (ret) { + reset_migrate_dl_data(cs); + goto out_unlock; + } + } + +out_success: /* * Mark attach is in progress. This makes validate_change() fail * changes which zero cpus/mems_allowed. */ cs->attach_in_progress++; out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); return ret; } @@ -2513,15 +2571,23 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset) cgroup_taskset_first(tset, &css); cs = css_cs(css); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); cs->attach_in_progress--; if (!cs->attach_in_progress) wake_up(&cpuset_attach_wq); - percpu_up_write(&cpuset_rwsem); + + if (cs->nr_migrate_dl_tasks) { + int cpu = cpumask_any(cs->effective_cpus); + + dl_bw_free(cpu, cs->sum_migrate_dl_bw); + reset_migrate_dl_data(cs); + } + + mutex_unlock(&cpuset_mutex); } /* - * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task() + * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach_task() * but we can't allocate it dynamically there. Define it global and * allocate from cpuset_init(). */ @@ -2530,7 +2596,7 @@ static nodemask_t cpuset_attach_nodemask_to; static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task) { - percpu_rwsem_assert_held(&cpuset_rwsem); + lockdep_assert_held(&cpuset_mutex); if (cs != &top_cpuset) guarantee_online_cpus(task, cpus_attach); @@ -2558,7 +2624,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) cs = css_cs(css); lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */ - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); guarantee_online_mems(cs, &cpuset_attach_nodemask_to); @@ -2594,11 +2660,17 @@ static void cpuset_attach(struct cgroup_taskset *tset) cs->old_mems_allowed = cpuset_attach_nodemask_to; + if (cs->nr_migrate_dl_tasks) { + cs->nr_deadline_tasks += cs->nr_migrate_dl_tasks; + oldcs->nr_deadline_tasks -= cs->nr_migrate_dl_tasks; + reset_migrate_dl_data(cs); + } + cs->attach_in_progress--; if (!cs->attach_in_progress) wake_up(&cpuset_attach_wq); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); } /* The various types of files and directories in a cpuset file system */ @@ -2630,7 +2702,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, int retval = 0; cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) { retval = -ENODEV; goto out_unlock; @@ -2666,7 +2738,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); return retval; } @@ -2679,7 +2751,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, int retval = -ENODEV; cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; @@ -2692,7 +2764,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); return retval; } @@ -2725,7 +2797,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, * operation like this one can lead to a deadlock through kernfs * active_ref protection. Let's break the protection. Losing the * protection is okay as we check whether @cs is online after - * grabbing cpuset_rwsem anyway. This only happens on the legacy + * grabbing cpuset_mutex anyway. This only happens on the legacy * hierarchies. */ css_get(&cs->css); @@ -2733,7 +2805,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, flush_work(&cpuset_hotplug_work); cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; @@ -2757,7 +2829,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, free_cpuset(trialcs); out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); @@ -2905,13 +2977,13 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, css_get(&cs->css); cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; retval = update_prstate(cs, val); out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); css_put(&cs->css); return retval ?: nbytes; @@ -3124,7 +3196,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) return 0; cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); set_bit(CS_ONLINE, &cs->flags); if (is_spread_page(parent)) @@ -3175,7 +3247,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) cpumask_copy(cs->effective_cpus, parent->cpus_allowed); spin_unlock_irq(&callback_lock); out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); return 0; } @@ -3196,7 +3268,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) struct cpuset *cs = css_cs(css); cpus_read_lock(); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); if (is_partition_valid(cs)) update_prstate(cs, 0); @@ -3215,7 +3287,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) cpuset_dec(); clear_bit(CS_ONLINE, &cs->flags); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); cpus_read_unlock(); } @@ -3228,7 +3300,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) static void cpuset_bind(struct cgroup_subsys_state *root_css) { - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); spin_lock_irq(&callback_lock); if (is_in_v2_mode()) { @@ -3241,7 +3313,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) } spin_unlock_irq(&callback_lock); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); } /* @@ -3262,14 +3334,14 @@ static int cpuset_can_fork(struct task_struct *task, struct css_set *cset) return 0; lockdep_assert_held(&cgroup_mutex); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); /* Check to see if task is allowed in the cpuset */ ret = cpuset_can_attach_check(cs); if (ret) goto out_unlock; - ret = task_can_attach(task, cs->effective_cpus); + ret = task_can_attach(task); if (ret) goto out_unlock; @@ -3283,7 +3355,7 @@ static int cpuset_can_fork(struct task_struct *task, struct css_set *cset) */ cs->attach_in_progress++; out_unlock: - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); return ret; } @@ -3299,11 +3371,11 @@ static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset) if (same_cs) return; - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); cs->attach_in_progress--; if (!cs->attach_in_progress) wake_up(&cpuset_attach_wq); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); } /* @@ -3331,7 +3403,7 @@ static void cpuset_fork(struct task_struct *task) } /* CLONE_INTO_CGROUP */ - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); guarantee_online_mems(cs, &cpuset_attach_nodemask_to); cpuset_attach_task(cs, task); @@ -3339,7 +3411,7 @@ static void cpuset_fork(struct task_struct *task) if (!cs->attach_in_progress) wake_up(&cpuset_attach_wq); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); } struct cgroup_subsys cpuset_cgrp_subsys = { @@ -3369,8 +3441,6 @@ struct cgroup_subsys cpuset_cgrp_subsys = { int __init cpuset_init(void) { - BUG_ON(percpu_init_rwsem(&cpuset_rwsem)); - BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)); BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL)); @@ -3442,7 +3512,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs, is_empty = cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); /* * Move tasks to the nearest ancestor with execution resources, @@ -3452,7 +3522,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs, if (is_empty) remove_tasks_in_empty_cpuset(cs); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); } static void @@ -3503,14 +3573,14 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); /* * We have raced with task attaching. We wait until attaching * is finished, so we won't attach a task to an empty cpuset. */ if (cs->attach_in_progress) { - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); goto retry; } @@ -3604,7 +3674,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems, cpus_updated, mems_updated); - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); } /** @@ -3634,7 +3704,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) if (on_dfl && !alloc_cpumasks(NULL, &tmp)) ptmp = &tmp; - percpu_down_write(&cpuset_rwsem); + mutex_lock(&cpuset_mutex); /* fetch the available cpus/mems and find out which changed how */ cpumask_copy(&new_cpus, cpu_active_mask); @@ -3691,7 +3761,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) update_tasks_nodemask(&top_cpuset); } - percpu_up_write(&cpuset_rwsem); + mutex_unlock(&cpuset_mutex); /* if cpus or mems changed, we need to propagate to descendants */ if (cpus_updated || mems_updated) { @@ -4101,7 +4171,7 @@ void __cpuset_memory_pressure_bump(void) * - Used for /proc//cpuset. * - No need to task_lock(tsk) on this tsk->cpuset reference, as it * doesn't really matter if tsk->cpuset changes after we read it, - * and we take cpuset_rwsem, keeping cpuset_attach() from changing it + * and we take cpuset_mutex, keeping cpuset_attach() from changing it * anyway. */ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b23dcbeacdf33..0f6a92737c912 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7475,6 +7475,7 @@ static int __sched_setscheduler(struct task_struct *p, int reset_on_fork; int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; struct rq *rq; + bool cpuset_locked = false; /* The pi code expects interrupts enabled */ BUG_ON(pi && in_interrupt()); @@ -7524,8 +7525,14 @@ static int __sched_setscheduler(struct task_struct *p, return retval; } - if (pi) - cpuset_read_lock(); + /* + * SCHED_DEADLINE bandwidth accounting relies on stable cpusets + * information. + */ + if (dl_policy(policy) || dl_policy(p->policy)) { + cpuset_locked = true; + cpuset_lock(); + } /* * Make sure no PI-waiters arrive (or leave) while we are @@ -7601,8 +7608,8 @@ static int __sched_setscheduler(struct task_struct *p, if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { policy = oldpolicy = -1; task_rq_unlock(rq, p, &rf); - if (pi) - cpuset_read_unlock(); + if (cpuset_locked) + cpuset_unlock(); goto recheck; } @@ -7669,7 +7676,8 @@ static int __sched_setscheduler(struct task_struct *p, task_rq_unlock(rq, p, &rf); if (pi) { - cpuset_read_unlock(); + if (cpuset_locked) + cpuset_unlock(); rt_mutex_adjust_pi(p); } @@ -7681,8 +7689,8 @@ static int __sched_setscheduler(struct task_struct *p, unlock: task_rq_unlock(rq, p, &rf); - if (pi) - cpuset_read_unlock(); + if (cpuset_locked) + cpuset_unlock(); return retval; } @@ -9075,8 +9083,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur, return ret; } -int task_can_attach(struct task_struct *p, - const struct cpumask *cs_effective_cpus) +int task_can_attach(struct task_struct *p) { int ret = 0; @@ -9089,21 +9096,9 @@ int task_can_attach(struct task_struct *p, * success of set_cpus_allowed_ptr() on all attached tasks * before cpus_mask may be changed. */ - if (p->flags & PF_NO_SETAFFINITY) { + if (p->flags & PF_NO_SETAFFINITY) ret = -EINVAL; - goto out; - } - if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span, - cs_effective_cpus)) { - int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus); - - if (unlikely(cpu >= nr_cpu_ids)) - return -EINVAL; - ret = dl_cpu_busy(cpu, p); - } - -out: return ret; } @@ -9385,7 +9380,7 @@ static void cpuset_cpu_active(void) static int cpuset_cpu_inactive(unsigned int cpu) { if (!cpuhp_tasks_frozen) { - int ret = dl_cpu_busy(cpu, NULL); + int ret = dl_bw_check_overflow(cpu); if (ret) return ret; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index f7d381b6c3133..9ce9810861ba5 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -16,6 +16,8 @@ * Fabio Checconi */ +#include + /* * Default limits for DL period; on the top end we guard against small util * tasks still getting ridiculously long effective runtimes, on the bottom end we @@ -2597,6 +2599,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p) if (task_on_rq_queued(p) && p->dl.dl_runtime) task_non_contending(p); + /* + * In case a task is setscheduled out from SCHED_DEADLINE we need to + * keep track of that on its cpuset (for correct bandwidth tracking). + */ + dec_dl_tasks_cs(p); + if (!task_on_rq_queued(p)) { /* * Inactive timer is armed. However, p is leaving DEADLINE and @@ -2637,6 +2645,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) put_task_struct(p); + /* + * In case a task is setscheduled to SCHED_DEADLINE we need to keep + * track of that on its cpuset (for correct bandwidth tracking). + */ + inc_dl_tasks_cs(p); + /* If p is not queued we will update its parameters at next wakeup. */ if (!task_on_rq_queued(p)) { add_rq_bw(&p->dl, &rq->dl); @@ -3023,26 +3037,38 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, return ret; } -int dl_cpu_busy(int cpu, struct task_struct *p) +enum dl_bw_request { + dl_bw_req_check_overflow = 0, + dl_bw_req_alloc, + dl_bw_req_free +}; + +static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw) { - unsigned long flags, cap; + unsigned long flags; struct dl_bw *dl_b; - bool overflow; + bool overflow = 0; rcu_read_lock_sched(); dl_b = dl_bw_of(cpu); raw_spin_lock_irqsave(&dl_b->lock, flags); - cap = dl_bw_capacity(cpu); - overflow = __dl_overflow(dl_b, cap, 0, p ? p->dl.dl_bw : 0); - if (!overflow && p) { - /* - * We reserve space for this task in the destination - * root_domain, as we can't fail after this point. - * We will free resources in the source root_domain - * later on (see set_cpus_allowed_dl()). - */ - __dl_add(dl_b, p->dl.dl_bw, dl_bw_cpus(cpu)); + if (req == dl_bw_req_free) { + __dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu)); + } else { + unsigned long cap = dl_bw_capacity(cpu); + + overflow = __dl_overflow(dl_b, cap, 0, dl_bw); + + if (req == dl_bw_req_alloc && !overflow) { + /* + * We reserve space in the destination + * root_domain, as we can't fail after this point. + * We will free resources in the source root_domain + * later on (see set_cpus_allowed_dl()). + */ + __dl_add(dl_b, dl_bw, dl_bw_cpus(cpu)); + } } raw_spin_unlock_irqrestore(&dl_b->lock, flags); @@ -3050,6 +3076,21 @@ int dl_cpu_busy(int cpu, struct task_struct *p) return overflow ? -EBUSY : 0; } + +int dl_bw_check_overflow(int cpu) +{ + return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0); +} + +int dl_bw_alloc(int cpu, u64 dl_bw) +{ + return dl_bw_manage(dl_bw_req_alloc, cpu, dl_bw); +} + +void dl_bw_free(int cpu, u64 dl_bw) +{ + dl_bw_manage(dl_bw_req_free, cpu, dl_bw); +} #endif #ifdef CONFIG_SCHED_DEBUG diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d6d488e8eb554..b62d53d7c264f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -330,7 +330,7 @@ extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr); extern bool __checkparam_dl(const struct sched_attr *attr); extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr); extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); -extern int dl_cpu_busy(int cpu, struct task_struct *p); +extern int dl_bw_check_overflow(int cpu); #ifdef CONFIG_CGROUP_SCHED diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index af33c5a4166d4..1a87cb70f1eb5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4128,8 +4128,15 @@ static void *s_start(struct seq_file *m, loff_t *pos) * will point to the same string as current_trace->name. */ mutex_lock(&trace_types_lock); - if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) + if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) { + /* Close iter->trace before switching to the new current tracer */ + if (iter->trace->close) + iter->trace->close(iter); *iter->trace = *tr->current_trace; + /* Reopen the new current tracer */ + if (iter->trace->open) + iter->trace->open(iter); + } mutex_unlock(&trace_types_lock); #ifdef CONFIG_TRACER_MAX_TRACE @@ -5189,11 +5196,17 @@ int tracing_set_cpumask(struct trace_array *tr, !cpumask_test_cpu(cpu, tracing_cpumask_new)) { atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); +#ifdef CONFIG_TRACER_MAX_TRACE + ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu); +#endif } if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && cpumask_test_cpu(cpu, tracing_cpumask_new)) { atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); +#ifdef CONFIG_TRACER_MAX_TRACE + ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu); +#endif } } arch_spin_unlock(&tr->max_lock); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 590b3d51afae9..ba37f768e2f27 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -231,7 +231,8 @@ static void irqsoff_trace_open(struct trace_iterator *iter) { if (is_graph(iter->tr)) graph_trace_open(iter); - + else + iter->private = NULL; } static void irqsoff_trace_close(struct trace_iterator *iter) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 330aee1c1a49e..0469a04a355f2 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -168,6 +168,8 @@ static void wakeup_trace_open(struct trace_iterator *iter) { if (is_graph(iter->tr)) graph_trace_open(iter); + else + iter->private = NULL; } static void wakeup_trace_close(struct trace_iterator *iter) diff --git a/lib/clz_ctz.c b/lib/clz_ctz.c index 0d3a686b5ba29..fb8c0c5c2bd27 100644 --- a/lib/clz_ctz.c +++ b/lib/clz_ctz.c @@ -28,36 +28,16 @@ int __weak __clzsi2(int val) } EXPORT_SYMBOL(__clzsi2); -int __weak __clzdi2(long val); -int __weak __ctzdi2(long val); -#if BITS_PER_LONG == 32 - -int __weak __clzdi2(long val) +int __weak __clzdi2(u64 val); +int __weak __clzdi2(u64 val) { - return 32 - fls((int)val); + return 64 - fls64(val); } EXPORT_SYMBOL(__clzdi2); -int __weak __ctzdi2(long val) +int __weak __ctzdi2(u64 val); +int __weak __ctzdi2(u64 val) { - return __ffs((u32)val); + return __ffs64(val); } EXPORT_SYMBOL(__ctzdi2); - -#elif BITS_PER_LONG == 64 - -int __weak __clzdi2(long val) -{ - return 64 - fls64((u64)val); -} -EXPORT_SYMBOL(__clzdi2); - -int __weak __ctzdi2(long val) -{ - return __ffs64((u64)val); -} -EXPORT_SYMBOL(__ctzdi2); - -#else -#error BITS_PER_LONG not 32 or 64 -#endif diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 47d0c95b9a01e..250b4c67fac8f 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -4333,6 +4333,9 @@ static inline bool mas_wr_append(struct ma_wr_state *wr_mas) struct ma_state *mas = wr_mas->mas; unsigned char node_pivots = mt_pivots[wr_mas->type]; + if (mt_in_rcu(mas->tree)) + return false; + if ((mas->index != wr_mas->r_min) && (mas->last == wr_mas->r_max)) { if (new_end < node_pivots) wr_mas->pivots[new_end] = wr_mas->pivots[end]; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3c78e1e8b2ad6..2ec38f08e4f0e 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1134,7 +1134,6 @@ static void set_iter_tags(struct radix_tree_iter *iter, void __rcu **radix_tree_iter_resume(void __rcu **slot, struct radix_tree_iter *iter) { - slot++; iter->index = __radix_tree_iter_add(iter, 1); iter->next_index = iter->index; iter->tags = 0; diff --git a/mm/madvise.c b/mm/madvise.c index d03e149ffe6e8..5973399b2f9b7 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -654,8 +654,8 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, * deactivate all pages. */ if (folio_test_large(folio)) { - if (folio_mapcount(folio) != 1) - goto out; + if (folio_estimated_sharers(folio) != 1) + break; folio_get(folio); if (!folio_trylock(folio)) { folio_put(folio); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 4457f9423e2c1..99de0328d1bed 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2591,10 +2591,13 @@ int soft_offline_page(unsigned long pfn, int flags) if (ret > 0) { ret = soft_offline_in_use_page(page); } else if (ret == 0) { - if (!page_handle_poison(page, true, false) && try_again) { - try_again = false; - flags &= ~MF_COUNT_INCREASED; - goto retry; + if (!page_handle_poison(page, true, false)) { + if (try_again) { + try_again = false; + flags &= ~MF_COUNT_INCREASED; + goto retry; + } + ret = -EBUSY; } } diff --git a/mm/shmem.c b/mm/shmem.c index aba041a3df739..10365ced5b1fc 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -800,14 +800,16 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping, XA_STATE(xas, &mapping->i_pages, start); struct page *page; unsigned long swapped = 0; + unsigned long max = end - 1; rcu_read_lock(); - xas_for_each(&xas, page, end - 1) { + xas_for_each(&xas, page, max) { if (xas_retry(&xas, page)) continue; if (xa_is_value(page)) swapped++; - + if (xas.xa_index == max) + break; if (need_resched()) { xas_pause(&xas); cond_resched_rcu(); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d5dc361dc104d..80bd104a4d42e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2909,6 +2909,10 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot) free_vm_area(area); return NULL; } + + flush_cache_vmap((unsigned long)area->addr, + (unsigned long)area->addr + count * PAGE_SIZE); + return area->addr; } EXPORT_SYMBOL_GPL(vmap_pfn); diff --git a/net/Makefile b/net/Makefile index 6a62e5b273781..0914bea9c335f 100644 --- a/net/Makefile +++ b/net/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ +obj-$(CONFIG_NET_DEVLINK) += devlink/ obj-$(CONFIG_NET_DSA) += dsa/ obj-$(CONFIG_ATALK) += appletalk/ obj-$(CONFIG_X25) += x25/ diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index f1741fbfb6178..98a624f32b946 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -506,7 +506,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb, struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_elp_packet *elp_packet; struct batadv_hard_iface *primary_if; - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr; bool res; int ret = NET_RX_DROP; @@ -514,6 +514,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb, if (!res) goto free_skb; + ethhdr = eth_hdr(skb); if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) goto free_skb; diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 033639df96d85..9f4815f4c8e89 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -124,8 +124,10 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb, { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - if (hard_iface->if_status != BATADV_IF_ACTIVE) + if (hard_iface->if_status != BATADV_IF_ACTIVE) { + kfree_skb(skb); return; + } batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX); batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES, @@ -986,7 +988,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb, { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_ogm2_packet *ogm_packet; - struct ethhdr *ethhdr = eth_hdr(skb); + struct ethhdr *ethhdr; int ogm_offset; u8 *packet_pos; int ret = NET_RX_DROP; @@ -1000,6 +1002,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb, if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN)) goto free_skb; + ethhdr = eth_hdr(skb); if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) goto free_skb; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 41c1ad33d009f..24c9c0c3f3166 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -630,7 +630,19 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) */ void batadv_update_min_mtu(struct net_device *soft_iface) { - soft_iface->mtu = batadv_hardif_min_mtu(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(soft_iface); + int limit_mtu; + int mtu; + + mtu = batadv_hardif_min_mtu(soft_iface); + + if (bat_priv->mtu_set_by_user) + limit_mtu = bat_priv->mtu_set_by_user; + else + limit_mtu = ETH_DATA_LEN; + + mtu = min(mtu, limit_mtu); + dev_set_mtu(soft_iface, mtu); /* Check if the local translate table should be cleaned up to match a * new (and smaller) MTU. diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index a5e4a4e976cf3..86e0664e0511b 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -495,7 +495,10 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]; atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr)); + + rtnl_lock(); batadv_update_min_mtu(bat_priv->soft_iface); + rtnl_unlock(); } if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) { diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 0f5c0679b55a2..38d411a52f331 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -154,11 +154,14 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) { + struct batadv_priv *bat_priv = netdev_priv(dev); + /* check ranges */ if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev)) return -EINVAL; dev->mtu = new_mtu; + bat_priv->mtu_set_by_user = new_mtu; return 0; } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 01d30c1e412c7..5d8cee74772fe 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -774,7 +774,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (roamed_back) { batadv_tt_global_free(bat_priv, tt_global, "Roaming canceled"); - tt_global = NULL; } else { /* The global entry has to be marked as ROAMING and * has to be kept for consistency purpose diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 758cd797a063b..76791815b26ba 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1546,6 +1546,12 @@ struct batadv_priv { /** @soft_iface: net device which holds this struct as private data */ struct net_device *soft_iface; + /** + * @mtu_set_by_user: MTU was set once by user + * protected by rtnl_lock + */ + int mtu_set_by_user; + /** * @bat_counters: mesh internal traffic statistic counters (see * batadv_counters) diff --git a/net/can/isotp.c b/net/can/isotp.c index b3c2a49b189cc..8c97f4061ffd7 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -175,12 +175,6 @@ static bool isotp_register_rxid(struct isotp_sock *so) return (isotp_bc_flags(so) == 0); } -static bool isotp_register_txecho(struct isotp_sock *so) -{ - /* all modes but SF_BROADCAST register for tx echo skbs */ - return (isotp_bc_flags(so) != CAN_ISOTP_SF_BROADCAST); -} - static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer) { struct isotp_sock *so = container_of(hrtimer, struct isotp_sock, @@ -1176,7 +1170,7 @@ static int isotp_release(struct socket *sock) lock_sock(sk); /* remove current filters & unregister */ - if (so->bound && isotp_register_txecho(so)) { + if (so->bound) { if (so->ifindex) { struct net_device *dev; @@ -1293,14 +1287,12 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id), isotp_rcv, sk, "isotp", sk); - if (isotp_register_txecho(so)) { - /* no consecutive frame echo skb in flight */ - so->cfecho = 0; + /* no consecutive frame echo skb in flight */ + so->cfecho = 0; - /* register for echo skb's */ - can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id), - isotp_rcv_echo, sk, "isotpe", sk); - } + /* register for echo skb's */ + can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id), + isotp_rcv_echo, sk, "isotpe", sk); dev_put(dev); @@ -1521,7 +1513,7 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg, case NETDEV_UNREGISTER: lock_sock(sk); /* remove current filters & unregister */ - if (so->bound && isotp_register_txecho(so)) { + if (so->bound) { if (isotp_register_rxid(so)) can_rx_unregister(dev_net(dev), dev, so->rxid, SINGLE_MASK(so->rxid), diff --git a/net/can/raw.c b/net/can/raw.c index 4abab2c3011a3..8c104339d538d 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -84,6 +84,8 @@ struct raw_sock { struct sock sk; int bound; int ifindex; + struct net_device *dev; + netdevice_tracker dev_tracker; struct list_head notifier; int loopback; int recv_own_msgs; @@ -277,21 +279,24 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg, if (!net_eq(dev_net(dev), sock_net(sk))) return; - if (ro->ifindex != dev->ifindex) + if (ro->dev != dev) return; switch (msg) { case NETDEV_UNREGISTER: lock_sock(sk); /* remove current filters & unregister */ - if (ro->bound) + if (ro->bound) { raw_disable_allfilters(dev_net(dev), dev, sk); + netdev_put(dev, &ro->dev_tracker); + } if (ro->count > 1) kfree(ro->filter); ro->ifindex = 0; ro->bound = 0; + ro->dev = NULL; ro->count = 0; release_sock(sk); @@ -337,6 +342,7 @@ static int raw_init(struct sock *sk) ro->bound = 0; ro->ifindex = 0; + ro->dev = NULL; /* set default filter to single entry dfilter */ ro->dfilter.can_id = 0; @@ -383,18 +389,14 @@ static int raw_release(struct socket *sock) list_del(&ro->notifier); spin_unlock(&raw_notifier_lock); + rtnl_lock(); lock_sock(sk); /* remove current filters & unregister */ if (ro->bound) { - if (ro->ifindex) { - struct net_device *dev; - - dev = dev_get_by_index(sock_net(sk), ro->ifindex); - if (dev) { - raw_disable_allfilters(dev_net(dev), dev, sk); - dev_put(dev); - } + if (ro->dev) { + raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk); + netdev_put(ro->dev, &ro->dev_tracker); } else { raw_disable_allfilters(sock_net(sk), NULL, sk); } @@ -405,6 +407,7 @@ static int raw_release(struct socket *sock) ro->ifindex = 0; ro->bound = 0; + ro->dev = NULL; ro->count = 0; free_percpu(ro->uniq); @@ -412,6 +415,8 @@ static int raw_release(struct socket *sock) sock->sk = NULL; release_sock(sk); + rtnl_unlock(); + sock_put(sk); return 0; @@ -422,6 +427,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); + struct net_device *dev = NULL; int ifindex; int err = 0; int notify_enetdown = 0; @@ -431,24 +437,23 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (addr->can_family != AF_CAN) return -EINVAL; + rtnl_lock(); lock_sock(sk); if (ro->bound && addr->can_ifindex == ro->ifindex) goto out; if (addr->can_ifindex) { - struct net_device *dev; - dev = dev_get_by_index(sock_net(sk), addr->can_ifindex); if (!dev) { err = -ENODEV; goto out; } if (dev->type != ARPHRD_CAN) { - dev_put(dev); err = -ENODEV; - goto out; + goto out_put_dev; } + if (!(dev->flags & IFF_UP)) notify_enetdown = 1; @@ -456,7 +461,9 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) /* filters set by default/setsockopt */ err = raw_enable_allfilters(sock_net(sk), dev, sk); - dev_put(dev); + if (err) + goto out_put_dev; + } else { ifindex = 0; @@ -467,26 +474,30 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (!err) { if (ro->bound) { /* unregister old filters */ - if (ro->ifindex) { - struct net_device *dev; - - dev = dev_get_by_index(sock_net(sk), - ro->ifindex); - if (dev) { - raw_disable_allfilters(dev_net(dev), - dev, sk); - dev_put(dev); - } + if (ro->dev) { + raw_disable_allfilters(dev_net(ro->dev), + ro->dev, sk); + /* drop reference to old ro->dev */ + netdev_put(ro->dev, &ro->dev_tracker); } else { raw_disable_allfilters(sock_net(sk), NULL, sk); } } ro->ifindex = ifindex; ro->bound = 1; + /* bind() ok -> hold a reference for new ro->dev */ + ro->dev = dev; + if (ro->dev) + netdev_hold(ro->dev, &ro->dev_tracker, GFP_KERNEL); } - out: +out_put_dev: + /* remove potential reference from dev_get_by_index() */ + if (dev) + dev_put(dev); +out: release_sock(sk); + rtnl_unlock(); if (notify_enetdown) { sk->sk_err = ENETDOWN; @@ -552,9 +563,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, rtnl_lock(); lock_sock(sk); - if (ro->bound && ro->ifindex) { - dev = dev_get_by_index(sock_net(sk), ro->ifindex); - if (!dev) { + dev = ro->dev; + if (ro->bound && dev) { + if (dev->reg_state != NETREG_REGISTERED) { if (count > 1) kfree(filter); err = -ENODEV; @@ -595,7 +606,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, ro->count = count; out_fil: - dev_put(dev); release_sock(sk); rtnl_unlock(); @@ -613,9 +623,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, rtnl_lock(); lock_sock(sk); - if (ro->bound && ro->ifindex) { - dev = dev_get_by_index(sock_net(sk), ro->ifindex); - if (!dev) { + dev = ro->dev; + if (ro->bound && dev) { + if (dev->reg_state != NETREG_REGISTERED) { err = -ENODEV; goto out_err; } @@ -639,7 +649,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, ro->err_mask = err_mask; out_err: - dev_put(dev); release_sock(sk); rtnl_unlock(); diff --git a/net/core/Makefile b/net/core/Makefile index 5857cec87b839..10edd66a8a372 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_LWTUNNEL) += lwtunnel.o obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o obj-$(CONFIG_DST_CACHE) += dst_cache.o obj-$(CONFIG_HWBM) += hwbm.o -obj-$(CONFIG_NET_DEVLINK) += devlink.o obj-$(CONFIG_GRO_CELLS) += gro_cells.o obj-$(CONFIG_FAILOVER) += failover.o obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2758b3f7c0214..9d4507aa736b7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2220,13 +2220,27 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) return err; } -int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, - struct netlink_ext_ack *exterr) +int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer, + struct netlink_ext_ack *exterr) { - return nla_parse_deprecated(tb, IFLA_MAX, head, len, ifla_policy, + const struct ifinfomsg *ifmp; + const struct nlattr *attrs; + size_t len; + + ifmp = nla_data(nla_peer); + attrs = nla_data(nla_peer) + sizeof(struct ifinfomsg); + len = nla_len(nla_peer) - sizeof(struct ifinfomsg); + + if (ifmp->ifi_index < 0) { + NL_SET_ERR_MSG_ATTR(exterr, nla_peer, + "ifindex can't be negative"); + return -EINVAL; + } + + return nla_parse_deprecated(tb, IFLA_MAX, attrs, len, ifla_policy, exterr); } -EXPORT_SYMBOL(rtnl_nla_parse_ifla); +EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg); struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) { @@ -3451,6 +3465,9 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (ifm->ifi_index > 0) { link_specified = true; dev = __dev_get_by_index(net, ifm->ifi_index); + } else if (ifm->ifi_index < 0) { + NL_SET_ERR_MSG(extack, "ifindex can't be negative"); + return -EINVAL; } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) { link_specified = true; dev = rtnl_dev_get(net, tb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b780827f5e0a5..bfececa9e244e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -130,7 +130,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr, inet->inet_sport, inet->inet_dport); - inet->inet_id = get_random_u16(); + atomic_set(&inet->inet_id, get_random_u16()); err = dccp_connect(sk); rt = NULL; @@ -430,7 +430,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; - newinet->inet_id = get_random_u16(); + atomic_set(&newinet->inet_id, get_random_u16()); if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) goto put_and_exit; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index abc02d25edc14..c522c76a9f89f 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -312,11 +312,15 @@ EXPORT_SYMBOL_GPL(dccp_disconnect); __poll_t dccp_poll(struct file *file, struct socket *sock, poll_table *wait) { - __poll_t mask; struct sock *sk = sock->sk; + __poll_t mask; + u8 shutdown; + int state; sock_poll_wait(file, sock, wait); - if (sk->sk_state == DCCP_LISTEN) + + state = inet_sk_state_load(sk); + if (state == DCCP_LISTEN) return inet_csk_listen_poll(sk); /* Socket is not locked. We are protected from async events @@ -325,20 +329,21 @@ __poll_t dccp_poll(struct file *file, struct socket *sock, */ mask = 0; - if (sk->sk_err) + if (READ_ONCE(sk->sk_err)) mask = EPOLLERR; + shutdown = READ_ONCE(sk->sk_shutdown); - if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) + if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED) mask |= EPOLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) + if (shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; /* Connected? */ - if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { + if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { if (atomic_read(&sk->sk_rmem_alloc) > 0) mask |= EPOLLIN | EPOLLRDNORM; - if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { + if (!(shutdown & SEND_SHUTDOWN)) { if (sk_stream_is_writeable(sk)) { mask |= EPOLLOUT | EPOLLWRNORM; } else { /* send SIGIO later */ @@ -356,7 +361,6 @@ __poll_t dccp_poll(struct file *file, struct socket *sock, } return mask; } - EXPORT_SYMBOL_GPL(dccp_poll); int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) diff --git a/net/devlink/Makefile b/net/devlink/Makefile new file mode 100644 index 0000000000000..3a60959f71eea --- /dev/null +++ b/net/devlink/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y := leftover.o diff --git a/net/core/devlink.c b/net/devlink/leftover.c similarity index 99% rename from net/core/devlink.c rename to net/devlink/leftover.c index 5a4a4b34ac15c..63188d6a50fe9 100644 --- a/net/core/devlink.c +++ b/net/devlink/leftover.c @@ -9727,6 +9727,7 @@ static void devlink_notify_unregister(struct devlink *devlink) struct devlink_param_item *param_item; struct devlink_trap_item *trap_item; struct devlink_port *devlink_port; + struct devlink_linecard *linecard; struct devlink_rate *rate_node; struct devlink_region *region; @@ -9753,6 +9754,8 @@ static void devlink_notify_unregister(struct devlink *devlink) list_for_each_entry_reverse(devlink_port, &devlink->port_list, list) devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); + list_for_each_entry_reverse(linecard, &devlink->linecard_list, list) + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL); devlink_notify(devlink, DEVLINK_CMD_DEL); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ebb737ac9e894..04853c83c85c4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -340,7 +340,7 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, else inet->pmtudisc = IP_PMTUDISC_WANT; - inet->inet_id = 0; + atomic_set(&inet->inet_id, 0); sock_init_data(sock, sk); diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 4d1af0cd7d99e..cb5dbee9e018f 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -73,7 +73,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len reuseport_has_conns_set(sk); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); - inet->inet_id = get_random_u16(); + atomic_set(&inet->inet_id, get_random_u16()); sk_dst_set(sk, &rt->dst); err = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 08921b96f9728..f9b8a4a1d2edc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -312,7 +312,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr)); } - inet->inet_id = get_random_u16(); + atomic_set(&inet->inet_id, get_random_u16()); if (tcp_fastopen_defer_connect(sk, &err)) return err; @@ -1539,7 +1539,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; - newinet->inet_id = get_random_u16(); + atomic_set(&newinet->inet_id, get_random_u16()); /* Set ToS of the new socket based upon the value of incoming SYN. * ECT bits are set later in tcp_init_transfer(). diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0f81492da0b46..55dc0610e8633 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1102,7 +1102,8 @@ static inline bool ieee80211_rx_reorder_ready(struct tid_ampdu_rx *tid_agg_rx, struct sk_buff *tail = skb_peek_tail(frames); struct ieee80211_rx_status *status; - if (tid_agg_rx->reorder_buf_filtered & BIT_ULL(index)) + if (tid_agg_rx->reorder_buf_filtered && + tid_agg_rx->reorder_buf_filtered & BIT_ULL(index)) return true; if (!tail) @@ -1143,7 +1144,8 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, } no_frame: - tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index); + if (tid_agg_rx->reorder_buf_filtered) + tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index); tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num); } @@ -4162,6 +4164,7 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, u16 ssn, u64 filtered, u16 received_mpdus) { + struct ieee80211_local *local; struct sta_info *sta; struct tid_ampdu_rx *tid_agg_rx; struct sk_buff_head frames; @@ -4179,6 +4182,11 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, sta = container_of(pubsta, struct sta_info, sta); + local = sta->sdata->local; + WARN_ONCE(local->hw.max_rx_aggregation_subframes > 64, + "RX BA marker can't support max_rx_aggregation_subframes %u > 64\n", + local->hw.max_rx_aggregation_subframes); + if (!ieee80211_rx_data_set_sta(&rx, sta, -1)) return; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4c2df7af73f76..3c5cac9bd9b70 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10509,7 +10509,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, deleted = 0; mutex_lock(&nft_net->commit_mutex); if (!list_empty(&nf_tables_destroy_list)) - rcu_barrier(); + nf_tables_trans_destroy_flush_work(); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 32cfd0a84b0e2..8c16681884b7e 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -901,12 +901,14 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k, int mask_bits) { - int rule = f->rules++, group, ret, bit_offset = 0; + int rule = f->rules, group, ret, bit_offset = 0; - ret = pipapo_resize(f, f->rules - 1, f->rules); + ret = pipapo_resize(f, f->rules, f->rules + 1); if (ret) return ret; + f->rules++; + for (group = 0; group < f->groups; group++) { int i, v; u8 mask; @@ -1051,7 +1053,9 @@ static int pipapo_expand(struct nft_pipapo_field *f, step++; if (step >= len) { if (!masks) { - pipapo_insert(f, base, 0); + err = pipapo_insert(f, base, 0); + if (err < 0) + return err; masks = 1; } goto out; @@ -1234,6 +1238,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, else ret = pipapo_expand(f, start, end, f->groups * f->bb); + if (ret < 0) + return ret; + if (f->bsize > bsize_max) bsize_max = f->bsize; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 01d07e6a68119..e8f988e1c7e64 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1550,10 +1550,28 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, return 0; } +static bool req_create_or_replace(struct nlmsghdr *n) +{ + return (n->nlmsg_flags & NLM_F_CREATE && + n->nlmsg_flags & NLM_F_REPLACE); +} + +static bool req_create_exclusive(struct nlmsghdr *n) +{ + return (n->nlmsg_flags & NLM_F_CREATE && + n->nlmsg_flags & NLM_F_EXCL); +} + +static bool req_change(struct nlmsghdr *n) +{ + return (!(n->nlmsg_flags & NLM_F_CREATE) && + !(n->nlmsg_flags & NLM_F_REPLACE) && + !(n->nlmsg_flags & NLM_F_EXCL)); +} + /* * Create/change qdisc. */ - static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -1647,27 +1665,35 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, * * We know, that some child q is already * attached to this parent and have choice: - * either to change it or to create/graft new one. + * 1) change it or 2) create/graft new one. + * If the requested qdisc kind is different + * than the existing one, then we choose graft. + * If they are the same then this is "change" + * operation - just let it fallthrough.. * * 1. We are allowed to create/graft only - * if CREATE and REPLACE flags are set. + * if the request is explicitly stating + * "please create if it doesn't exist". * - * 2. If EXCL is set, requestor wanted to say, - * that qdisc tcm_handle is not expected + * 2. If the request is to exclusive create + * then the qdisc tcm_handle is not expected * to exist, so that we choose create/graft too. * * 3. The last case is when no flags are set. + * This will happen when for example tc + * utility issues a "change" command. * Alas, it is sort of hole in API, we * cannot decide what to do unambiguously. - * For now we select create/graft, if - * user gave KIND, which does not match existing. + * For now we select create/graft. */ - if ((n->nlmsg_flags & NLM_F_CREATE) && - (n->nlmsg_flags & NLM_F_REPLACE) && - ((n->nlmsg_flags & NLM_F_EXCL) || - (tca[TCA_KIND] && - nla_strcmp(tca[TCA_KIND], q->ops->id)))) - goto create_n_graft; + if (tca[TCA_KIND] && + nla_strcmp(tca[TCA_KIND], q->ops->id)) { + if (req_create_or_replace(n) || + req_create_exclusive(n)) + goto create_n_graft; + else if (req_change(n)) + goto create_n_graft2; + } } } } else { @@ -1701,6 +1727,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); return -ENOENT; } +create_n_graft2: if (clid == TC_H_INGRESS) { if (dev_ingress_queue(dev)) { q = qdisc_create(dev, dev_ingress_queue(dev), diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c806d272107ac..a11b0d903514c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -98,7 +98,7 @@ struct percpu_counter sctp_sockets_allocated; static void sctp_enter_memory_pressure(struct sock *sk) { - sctp_memory_pressure = 1; + WRITE_ONCE(sctp_memory_pressure, 1); } @@ -9472,7 +9472,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->inet_rcv_saddr = inet->inet_rcv_saddr; newinet->inet_dport = htons(asoc->peer.port); newinet->pmtudisc = inet->pmtudisc; - newinet->inet_id = get_random_u16(); + atomic_set(&newinet->inet_id, get_random_u16()); newinet->uc_ttl = inet->uc_ttl; newinet->mc_loop = 1; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index b098fde373abf..28c0771c4e8c3 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -935,9 +935,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, if (!rep->rr_rdmabuf) goto out_free; - if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) - goto out_free_regbuf; - rep->rr_cid.ci_completion_id = atomic_inc_return(&r_xprt->rx_ep->re_completion_ids); @@ -956,8 +953,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, spin_unlock(&buf->rb_lock); return rep; -out_free_regbuf: - rpcrdma_regbuf_free(rep->rr_rdmabuf); out_free: kfree(rep); out: @@ -1363,6 +1358,10 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) rep = rpcrdma_rep_create(r_xprt, temp); if (!rep) break; + if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) { + rpcrdma_rep_put(buf, rep); + break; + } rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id; trace_xprtrdma_post_recv(rep); diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index adcfb63b3550d..6f9ff4643dcbc 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -2005,6 +2005,7 @@ static int filename_trans_read_helper(struct policydb *p, void *fp) if (!datum) goto out; + datum->next = NULL; *dst = datum; /* ebitmap_read() will at least init the bitmap */ @@ -2017,7 +2018,6 @@ static int filename_trans_read_helper(struct policydb *p, void *fp) goto out; datum->otype = le32_to_cpu(buf[0]); - datum->next = NULL; dst = &datum->next; } diff --git a/sound/pci/ymfpci/ymfpci.c b/sound/pci/ymfpci/ymfpci.c index 82d4e0fda91be..d62a0e2ddf609 100644 --- a/sound/pci/ymfpci/ymfpci.c +++ b/sound/pci/ymfpci/ymfpci.c @@ -150,8 +150,8 @@ static inline int snd_ymfpci_create_gameport(struct snd_ymfpci *chip, int dev, i void snd_ymfpci_free_gameport(struct snd_ymfpci *chip) { } #endif /* SUPPORT_JOYSTICK */ -static int snd_card_ymfpci_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +static int __snd_card_ymfpci_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) { static int dev; struct snd_card *card; @@ -333,6 +333,12 @@ static int snd_card_ymfpci_probe(struct pci_dev *pci, return 0; } +static int snd_card_ymfpci_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + return snd_card_free_on_error(&pci->dev, __snd_card_ymfpci_probe(pci, pci_id)); +} + static struct pci_driver ymfpci_driver = { .name = KBUILD_MODNAME, .id_table = snd_ymfpci_ids, diff --git a/sound/soc/amd/Kconfig b/sound/soc/amd/Kconfig index 3968c478c9381..44d4e6e51a358 100644 --- a/sound/soc/amd/Kconfig +++ b/sound/soc/amd/Kconfig @@ -71,6 +71,7 @@ config SND_SOC_AMD_RENOIR_MACH config SND_SOC_AMD_ACP5x tristate "AMD Audio Coprocessor-v5.x I2S support" depends on X86 && PCI + select SND_AMD_ACP_CONFIG help This option enables ACP v5.x support on AMD platform diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index c1ca3ceac5f2f..9a9571c3f08c0 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -217,7 +217,7 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "82"), + DMI_MATCH(DMI_PRODUCT_NAME, "82V2"), } }, { @@ -248,6 +248,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Redmi Book Pro 14 2022"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "M6500RC"), + } + }, { .driver_data = &acp6x_card, .matches = { diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c index f2b5032daa6ae..2f4b0ee93aced 100644 --- a/sound/soc/codecs/cs35l41.c +++ b/sound/soc/codecs/cs35l41.c @@ -167,7 +167,7 @@ static int cs35l41_get_fs_mon_config_index(int freq) static const DECLARE_TLV_DB_RANGE(dig_vol_tlv, 0, 0, TLV_DB_SCALE_ITEM(TLV_DB_GAIN_MUTE, 0, 1), 1, 913, TLV_DB_MINMAX_ITEM(-10200, 1200)); -static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 0, 1, 1); +static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 50, 100, 0); static const struct snd_kcontrol_new dre_ctrl = SOC_DAPM_SINGLE("Switch", CS35L41_PWR_CTRL3, 20, 1, 0); diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh index 47ab90596acb2..6358df5752f90 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh @@ -57,8 +57,8 @@ ip link add name veth2-bond type veth peer name veth2-end # add ports ip link set fbond master fab-br0 -ip link set veth1-bond down master fbond -ip link set veth2-bond down master fbond +ip link set veth1-bond master fbond +ip link set veth2-bond master fbond # bring up ip link set veth1-end up diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh index 7d9e73a43a49b..0c47faff9274b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh @@ -98,12 +98,12 @@ sb_occ_etc_check() port_pool_test() { - local exp_max_occ=288 + local exp_max_occ=$(devlink_cell_size_get) local max_occ devlink sb occupancy clearmax $DEVLINK_DEV - $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ + $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ -t ip -q devlink sb occupancy snapshot $DEVLINK_DEV @@ -126,12 +126,12 @@ port_pool_test() port_tc_ip_test() { - local exp_max_occ=288 + local exp_max_occ=$(devlink_cell_size_get) local max_occ devlink sb occupancy clearmax $DEVLINK_DEV - $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ + $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ -t ip -q devlink sb occupancy snapshot $DEVLINK_DEV @@ -154,16 +154,12 @@ port_tc_ip_test() port_tc_arp_test() { - local exp_max_occ=96 + local exp_max_occ=$(devlink_cell_size_get) local max_occ - if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then - exp_max_occ=144 - fi - devlink sb occupancy clearmax $DEVLINK_DEV - $MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q + $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q devlink sb occupancy snapshot $DEVLINK_DEV diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 69c58362c0edf..48d1a68be1d52 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -71,14 +71,60 @@ TEST_GEN_FILES += bind_bhash TEST_GEN_PROGS += sk_bind_sendto_listen TEST_GEN_PROGS += sk_connect_zero_addr TEST_PROGS += test_ingress_egress_chaining.sh +TEST_GEN_FILES += nat6to4.o TEST_FILES := settings include ../lib.mk -include bpf/Makefile - $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread + +# Rules to generate bpf obj nat6to4.o +CLANG ?= clang +SCRATCH_DIR := $(OUTPUT)/tools +BUILD_DIR := $(SCRATCH_DIR)/build +BPFDIR := $(abspath ../../../lib/bpf) +APIDIR := $(abspath ../../../include/uapi) + +CCINCLUDE += -I../bpf +CCINCLUDE += -I../../../../usr/include/ +CCINCLUDE += -I$(SCRATCH_DIR)/include + +BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a + +MAKE_DIRS := $(BUILD_DIR)/libbpf +$(MAKE_DIRS): + mkdir -p $@ + +# Get Clang's default includes on this system, as opposed to those seen by +# '-target bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +define get_sys_includes +$(shell $(1) $(2) -v -E - &1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) $(2) -dM -E -