From 939e303f1d0b2ed49484fcf4e505a913e333b9c2 Mon Sep 17 00:00:00 2001 From: Hang Zhang Date: Fri, 24 Apr 2020 19:02:29 -0700 Subject: [PATCH] [WIP] ImageNet training with mxnet gluon (#33) --- README.md | 22 ++++++++++++++++-- ablation.md | 4 ++-- resnest/gluon/model_store.py | 4 ++-- resnest/torch/ablation.py | 4 ++-- resnest/transforms.py | 2 +- scripts/gluon/README.md | 44 ++++++++++++++++++++++++++++++++++++ scripts/gluon/verify.py | 2 +- 7 files changed, 72 insertions(+), 10 deletions(-) create mode 100644 scripts/gluon/README.md diff --git a/README.md b/README.md index 76dfff5..eecab83 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![Unit Test](https://github.com/zhanghang1989/ResNeSt/workflows/Unit%20Test/badge.svg)](https://github.com/zhanghang1989/ResNeSt/actions) +[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/resnest-split-attention-networks/semantic-segmentation-on-ade20k)](https://paperswithcode.com/sota/semantic-segmentation-on-ade20k?p=resnest-split-attention-networks) + # ResNeSt Split-Attention Network, A New ResNet Variant. It significantly boosts the performance of downstream models such as Mask R-CNN, Cascade R-CNN and DeepLabV3. @@ -198,7 +200,7 @@ Training code and pretrained models are released at our [Detectron2 Fork](https: mIoU% - Deeplab-V3
+ Deeplab-V3
ResNet-50 80.39 42.1 @@ -218,6 +220,11 @@ Training code and pretrained models are released at our [Detectron2 Fork](https: 82.07 46.91 + + ResNeSt-269 (ours) + 82.62 + 47.60 + @@ -253,9 +260,20 @@ python verify.py --model resnest50 --crop-size 224 ## How to Train -- Training with Gluon: Please visit [GluonCV Toolkit](https://gluon-cv.mxnet.io/model_zoo/classification.html#resnest). +### ImageNet Models + +- Training with MXNet Gluon: Please visit [Gluon folder](./scripts/gluon/). - Training with PyTorch: Please visit [PyTorch Encoding Toolkit](https://hangzhang.org/PyTorch-Encoding/model_zoo/imagenet.html) (slightly worse than Gluon implementation). +### Detectron Models + +For object detection and instance segmentation models, please visit our [detectron2-ResNeSt fork](https://github.com/zhanghang1989/detectron2-ResNeSt). + +### Semantic Segmentation + +- Training with PyTorch: [Encoding Toolkit](https://hangzhang.org/PyTorch-Encoding/model_zoo/segmentation.html). +- Training with MXNet: [GluonCV Toolkit](https://gluon-cv.mxnet.io/model_zoo/segmentation.html#ade20k-dataset). + ## Reference **ResNeSt: Split-Attention Networks** [[arXiv](https://arxiv.org/pdf/2004.08955.pdf)] diff --git a/ablation.md b/ablation.md index b0db836..125db4f 100644 --- a/ablation.md +++ b/ablation.md @@ -6,8 +6,8 @@ | ResNeSt-50-fast | 2s1x64d | 27.5M | 4.34 | 80.53 | 80.65 | | ResNeSt-50-fast | 4s1x64d | 31.9M | 4.35 | 80.76 | 80.90 | | ResNeSt-50-fast | 1s2x40d | 25.9M | 4.38 | 80.59 | 80.72 | -| ResNeSt-50-fast | 2s2x40d | 26.9M | 4.38 | xx.xx | 80.85 | -| ResNeSt-50-fast | 4s2x40d | 30.4M | 4.41 | 80.86 | 80.92 | +| ResNeSt-50-fast | 2s2x40d | 26.9M | 4.38 | 80.61 | 80.84 | +| ResNeSt-50-fast | 4s2x40d | 30.4M | 4.41 | 81.14 | 81.17 | | ResNeSt-50-fast | 1s4x24d | 25.7M | 4.42 | 80.99 | 80.97 | ### PyTorch Models diff --git a/resnest/gluon/model_store.py b/resnest/gluon/model_store.py index 0623735..53a9529 100644 --- a/resnest/gluon/model_store.py +++ b/resnest/gluon/model_store.py @@ -24,8 +24,8 @@ ('85eb779a5e313d74b5e5390dae02aa8082a0f469', 'resnest50_fast_2s1x64d'), ('3f215532c6d8e07a10df116309993d4479fc3e4b', 'resnest50_fast_4s1x64d'), ('af3514c2ec757a3a9666a75b82f142ed47d55bee', 'resnest50_fast_1s2x40d'), - ('d4a7f303531a333d8ad5cf6f73cab84d0a2dd752', 'resnest50_fast_2s2x40d'), - ('1a9f15bcd4ffddd793acdea05de01f73b096e614', 'resnest50_fast_4s2x40d'), + ('2db13245aa4967cf5e8617cb4911880dd41628a4', 'resnest50_fast_2s2x40d'), + ('b24d515797832e02da4da9c8a15effd5e44cfb56', 'resnest50_fast_4s2x40d'), ('7318153ddb5e542a20cc6c58192f3c6209cff9ed', 'resnest50_fast_1s4x24d'), ]} diff --git a/resnest/torch/ablation.py b/resnest/torch/ablation.py index d3756a7..2b89e7e 100644 --- a/resnest/torch/ablation.py +++ b/resnest/torch/ablation.py @@ -21,8 +21,8 @@ ('44938639', 'resnest50_fast_2s1x64d'), ('f74f3fc3', 'resnest50_fast_4s1x64d'), ('32830b84', 'resnest50_fast_1s2x40d'), - ('0e48a197', 'resnest50_fast_2s2x40d'), - ('59057aca', 'resnest50_fast_4s2x40d'), + ('9d126481', 'resnest50_fast_2s2x40d'), + ('41d14ed0', 'resnest50_fast_4s2x40d'), ('d4a4f76f', 'resnest50_fast_1s4x24d'), ]} diff --git a/resnest/transforms.py b/resnest/transforms.py index 579f5ce..c1c7125 100644 --- a/resnest/transforms.py +++ b/resnest/transforms.py @@ -374,7 +374,7 @@ def __call__(self, img): if area < self.min_covered * (original_width * original_height): continue if width == original_width and height == original_height: - return self._fallback(img) # https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/preprocessing.py#L102 + return self._fallback(img) x = random.randint(0, original_width - width) y = random.randint(0, original_height - height) diff --git a/scripts/gluon/README.md b/scripts/gluon/README.md new file mode 100644 index 0000000..3362011 --- /dev/null +++ b/scripts/gluon/README.md @@ -0,0 +1,44 @@ +## Train ResNeSt with MXNet Gluon + +For training with PyTorch, please visit [PyTorch Encoding Toolkit](https://hangzhang.org/PyTorch-Encoding/model_zoo/imagenet.html) + +### Install MXNet with Horovod + +```bash +# assuming you have CUDA 10.0 on your machine +pip install mxnet-cu100 +HOROVOD_GPU_ALLREDUCE=NCCL pip install -v --no-cache-dir horovod +pip install --no-cache mpi4py +``` + +### Prepare ImageNet recordio data format + +- Unfortunately ,this is required for training using MXNet Gluon. Please follow the [GluonCV tutorial](https://gluon-cv.mxnet.io/build/examples_datasets/recordio.html) to prepare the data. +- Copy the data into ramdisk (optional): + + ``` + cd ~/ + sudo mkdir -p /media/ramdisk + sudo mount -t tmpfs -o size=200G tmpfs /media/ramdisk + cp -r /home/ubuntu/data/ILSVRC2012/ /media/ramdisk + ``` + +### Training command + +Using ResNeSt-50 as the target model: + +```bash +horovodrun -np 64 --hostfile hosts python train.py \ +--rec-train /media/ramdisk/ILSVRC2012/train.rec \ +--rec-val /media/ramdisk/ILSVRC2012/val.rec \ +--model resnest50 --lr 0.05 --num-epochs 270 --batch-size 128 \ +--use-rec --dtype float32 --warmup-epochs 5 --last-gamma --no-wd \ +--label-smoothing --mixup --save-dir params_ resnest50 \ +--log-interval 50 --eval-frequency 5 --auto_aug --input-size 224 +``` + +### Verify pretrained model + +```bash +python verify.py --model resnest50 --crop-size 224 --resume params_ resnest50/imagenet-resnest50-269.params +``` \ No newline at end of file diff --git a/scripts/gluon/verify.py b/scripts/gluon/verify.py index ab2714c..5e7f7e2 100644 --- a/scripts/gluon/verify.py +++ b/scripts/gluon/verify.py @@ -8,7 +8,6 @@ from mxnet.gluon.data.vision import transforms from mxnet.contrib.quantization import * -from gluoncv.data import imagenet from resnest.gluon import get_model from PIL import Image @@ -136,6 +135,7 @@ def test(network, ctx, val_data, batch_fn): ]) if not opt.rec_dir: + from gluoncv.data import imagenet val_data = gluon.data.DataLoader( imagenet.classification.ImageNet(opt.data_dir, train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers)