#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
# Copyright (c) 2020 Ross Wightman
# This file has been modified by Megvii ("Megvii Modifications").
# All Megvii Modifications are Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
"""EfficientNet Series
EfficientNet: `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks"
<https://arxiv.org/abs/1905.11946>`_
References:
https://github.com/facebookresearch/pycls/blob/main/pycls/models/effnet.py
https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/efficientnet.py
https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/mobilenetv3.py
"""
import math
from numbers import Real
from typing import Any, Callable, Mapping, Sequence, Union
import megengine.hub as hub
import megengine.module as M
from basecls.layers import (
SE,
DropPath,
activation,
build_head,
conv2d,
init_weights,
make_divisible,
norm2d,
)
from basecls.utils import recursive_update, registers
from .mbnet import MBConv
from .resnet import AnyStage, SimpleStem
__all__ = ["FuseMBConv", "EffNet"]
[文档]class FuseMBConv(M.Module):
"""Fusing the proj conv1x1 and depthwise conv into a conv2d.
Args:
w_in: input width.
w_out: output width.
stride: stride of conv.
kernel: kernel of conv.
exp_r: expansion ratio.
se_r: SE ratio.
has_skip: whether apply skip connection.
drop_path_prob: drop path probability.
norm_name: normalization function.
act_name: activation function.
"""
def __init__(
self,
w_in: int,
w_out: int,
stride: int,
kernel: int,
exp_r: float,
se_r: float,
has_skip: bool,
drop_path_prob: float,
norm_name: str,
act_name: str,
**kwargs,
):
super().__init__()
# Expansion
w_mid = w_in
w_exp = int(w_in * exp_r)
if exp_r != 1.0:
self.exp = conv2d(w_in, w_exp, kernel, stride=stride)
self.exp_bn = norm2d(norm_name, w_exp)
self.exp_act = activation(act_name)
w_mid = w_exp
# SE
if se_r > 0.0:
w_se = int(w_in * se_r)
self.se = SE(w_mid, w_se, act_name)
# PWConv
self.proj = conv2d(
w_mid, w_out, 1 if exp_r != 1.0 else kernel, stride=1 if exp_r != 1.0 else stride
)
self.proj_bn = norm2d(norm_name, w_out)
self.has_proj_act = exp_r == 1.0
if self.has_proj_act:
self.proj_act = activation(act_name)
# Skip
self.has_skip = has_skip and w_in == w_out and stride == 1
if self.has_skip:
self.drop_path = DropPath(drop_path_prob)
[文档] def forward(self, x):
x_p = x
if getattr(self, "exp", None) is not None:
x = self.exp(x)
x = self.exp_bn(x)
x = self.exp_act(x)
if getattr(self, "se", None) is not None:
x = self.se(x)
x = self.proj(x)
x = self.proj_bn(x)
if self.has_proj_act:
x = self.proj_act(x)
if self.has_skip:
x = self.drop_path(x)
x = x + x_p
return x
[文档]@registers.models.register()
class EffNet(M.Module):
"""EfficientNet model.
Args:
stem_w: stem width.
block_name: block name.
depths: depth for each stage (number of blocks in the stage).
widths: width for each stage (width of each block in the stage).
strides: strides for each stage (applies to the first block of each stage).
kernels: kernel sizes for each stage.
exp_rs: expansion ratios for MBConv blocks in each stage.
se_r: Squeeze-and-Excitation (SE) ratio. Default: ``0.25``
drop_path_prob: drop path probability. Default: ``0.0``
depth_mult: depth multiplier. Default: ``1.0``
width_mult: width multiplier. Default: ``1.0``
omit_mult: omit multiplier for stem width, head width, the first stage depth and
the last stage depth, enabled in EfficientNet-Lite. Default: ``False``
norm_name: normalization function. Default: ``"BN"``
act_name: activation function. Default: ``"silu"``
head: head args. Default: ``None``
"""
def __init__(
self,
stem_w: int,
block_name: Union[Union[str, Callable], Sequence[Union[str, Callable]]],
depths: Sequence[int],
widths: Sequence[int],
strides: Sequence[int],
kernels: Sequence[int],
exp_rs: Union[float, Sequence[Union[float, Sequence[float]]]] = 1.0,
se_rs: Union[float, Sequence[Union[float, Sequence[float]]]] = 0.0,
drop_path_prob: float = 0.0,
depth_mult: float = 1.0,
width_mult: float = 1.0,
omit_mult: bool = False,
norm_name: str = "BN",
act_name: str = "silu",
head: Mapping[str, Any] = None,
):
super().__init__()
depths = [
d if omit_mult and i in (0, len(depths) - 1) else math.ceil(d * depth_mult)
for i, d in enumerate(depths)
]
self.depths = depths
stem_w = stem_w if omit_mult else make_divisible(stem_w * width_mult, round_limit=0.9)
self.stem = SimpleStem(3, stem_w, norm_name, act_name)
if isinstance(block_name, (str, Callable)):
block_name = [block_name] * len(depths)
block_func = [self.get_block_func(bn) for bn in block_name]
widths = [make_divisible(w * width_mult, round_limit=0.9) for w in widths]
if isinstance(exp_rs, Real):
exp_rs = [exp_rs] * len(depths)
if isinstance(se_rs, Real):
se_rs = [se_rs] * len(depths)
drop_path_prob_iter = (i / sum(depths) * drop_path_prob for i in range(sum(depths)))
drop_path_probs = [[next(drop_path_prob_iter) for _ in range(d)] for d in depths]
model_args = [depths, widths, strides, block_func, kernels, exp_rs, se_rs, drop_path_probs]
prev_w = stem_w
for i, (d, w, s, bf, k, exp_r, se_r, dp_p) in enumerate(zip(*model_args)):
stage = AnyStage(
prev_w,
w,
s,
d,
bf,
kernel=k,
exp_r=exp_r,
se_r=se_r,
se_from_exp=False,
se_act_name=act_name,
se_approx=False,
se_rd_fn=int,
has_proj_act=False,
has_skip=True,
drop_path_prob=dp_p,
norm_name=norm_name,
act_name=act_name,
)
setattr(self, f"s{i + 1}", stage)
prev_w = w
if head:
if head.get("width", 0) > 0 and not omit_mult:
head["width"] = make_divisible(head["width"] * width_mult, round_limit=0.9)
self.head = build_head(prev_w, head, norm_name, act_name)
self.apply(init_weights)
[文档] def forward(self, x):
x = self.stem(x)
for i in range(len(self.depths)):
stage = getattr(self, f"s{i + 1}")
x = stage(x)
if getattr(self, "head", None) is not None:
x = self.head(x)
return x
[文档] @staticmethod
def get_block_func(name: Union[str, Callable]):
"""Retrieves the block function by name."""
if callable(name):
return name
if isinstance(name, str):
block_funcs = {
"FuseMBConv": FuseMBConv,
"MBConv": MBConv,
}
if name in block_funcs.keys():
return block_funcs[name]
raise ValueError(f"Block '{name}' not supported")
def _build_effnet(**kwargs):
model_args = dict(
stem_w=32,
block_name=MBConv,
depths=[1, 2, 2, 3, 3, 4, 1],
widths=[16, 24, 40, 80, 112, 192, 320],
strides=[1, 2, 2, 2, 1, 2, 1],
kernels=[3, 3, 5, 3, 5, 5, 3],
exp_rs=[1, 6, 6, 6, 6, 6, 6],
se_rs=0.25,
drop_path_prob=0.2,
head=dict(name="ClsHead", width=1280, dropout_prob=0.2),
)
recursive_update(model_args, kwargs)
return EffNet(**model_args)
def _build_effnet_lite(**kwargs):
model_args = dict(se_rs=0.0, omit_mult=True, act_name="relu6")
recursive_update(model_args, kwargs)
return _build_effnet(**model_args)
def _build_effnetv2(**kwargs):
model_args = dict(
stem_w=32,
block_name=[FuseMBConv, FuseMBConv, FuseMBConv, MBConv, MBConv, MBConv],
depths=[1, 2, 2, 3, 5, 8],
widths=[16, 32, 48, 96, 112, 192],
strides=[1, 2, 2, 2, 1, 2],
kernels=[3, 3, 3, 3, 3, 3],
exp_rs=[1, 4, 4, 4, 6, 6],
se_rs=[0, 0, 0, 0.25, 0.25, 0.25],
)
recursive_update(model_args, kwargs)
return _build_effnet(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b0/effnet_b0.pkl"
)
def effnet_b0(**kwargs):
model_args = dict(depth_mult=1.0, width_mult=1.0)
recursive_update(model_args, kwargs)
return _build_effnet(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b1/effnet_b1.pkl"
)
def effnet_b1(**kwargs):
model_args = dict(depth_mult=1.1, width_mult=1.0)
recursive_update(model_args, kwargs)
return _build_effnet(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b2/effnet_b2.pkl"
)
def effnet_b2(**kwargs):
model_args = dict(depth_mult=1.2, width_mult=1.1, head=dict(dropout_prob=0.3))
recursive_update(model_args, kwargs)
return _build_effnet(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b3/effnet_b3.pkl"
)
def effnet_b3(**kwargs):
model_args = dict(depth_mult=1.4, width_mult=1.2, head=dict(dropout_prob=0.3))
recursive_update(model_args, kwargs)
return _build_effnet(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b4/effnet_b4.pkl"
)
def effnet_b4(**kwargs):
model_args = dict(depth_mult=1.8, width_mult=1.4, head=dict(dropout_prob=0.4))
recursive_update(model_args, kwargs)
return _build_effnet(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b5/effnet_b5.pkl"
)
def effnet_b5(**kwargs):
model_args = dict(depth_mult=2.2, width_mult=1.6, head=dict(dropout_prob=0.4))
recursive_update(model_args, kwargs)
return _build_effnet(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b6/effnet_b6.pkl"
)
def effnet_b6(**kwargs):
model_args = dict(depth_mult=2.6, width_mult=1.8, head=dict(dropout_prob=0.5))
recursive_update(model_args, kwargs)
return _build_effnet(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b7/effnet_b7.pkl"
)
def effnet_b7(**kwargs):
model_args = dict(depth_mult=3.1, width_mult=2.0, head=dict(dropout_prob=0.5))
recursive_update(model_args, kwargs)
return _build_effnet(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b8/effnet_b8.pkl"
)
def effnet_b8(**kwargs):
model_args = dict(depth_mult=3.6, width_mult=2.2, head=dict(dropout_prob=0.5))
recursive_update(model_args, kwargs)
return _build_effnet(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_l2/effnet_l2.pkl"
)
def effnet_l2(**kwargs):
model_args = dict(depth_mult=5.3, width_mult=4.3, head=dict(dropout_prob=0.5))
recursive_update(model_args, kwargs)
return _build_effnet(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b0_lite/effnet_b0_lite.pkl"
)
def effnet_b0_lite(**kwargs):
model_args = dict(depth_mult=1.0, width_mult=1.0)
recursive_update(model_args, kwargs)
return _build_effnet_lite(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b1_lite/effnet_b1_lite.pkl"
)
def effnet_b1_lite(**kwargs):
model_args = dict(depth_mult=1.1, width_mult=1.0)
recursive_update(model_args, kwargs)
return _build_effnet_lite(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b2_lite/effnet_b2_lite.pkl"
)
def effnet_b2_lite(**kwargs):
model_args = dict(depth_mult=1.2, width_mult=1.1, head=dict(dropout_prob=0.3))
recursive_update(model_args, kwargs)
return _build_effnet_lite(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b3_lite/effnet_b3_lite.pkl"
)
def effnet_b3_lite(**kwargs):
model_args = dict(depth_mult=1.4, width_mult=1.2, head=dict(dropout_prob=0.3))
recursive_update(model_args, kwargs)
return _build_effnet_lite(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnet_b4_lite/effnet_b4_lite.pkl"
)
def effnet_b4_lite(**kwargs):
model_args = dict(depth_mult=1.8, width_mult=1.4, head=dict(dropout_prob=0.3))
recursive_update(model_args, kwargs)
return _build_effnet_lite(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnetv2_b0/effnetv2_b0.pkl"
)
def effnetv2_b0(**kwargs):
model_args = dict(depth_mult=1.0, width_mult=1.0)
recursive_update(model_args, kwargs)
return _build_effnetv2(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnetv2_b1/effnetv2_b1.pkl"
)
def effnetv2_b1(**kwargs):
model_args = dict(depth_mult=1.1, width_mult=1.0)
recursive_update(model_args, kwargs)
return _build_effnetv2(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnetv2_b2/effnetv2_b2.pkl"
)
def effnetv2_b2(**kwargs):
model_args = dict(depth_mult=1.2, width_mult=1.1, head=dict(dropout_prob=0.3))
recursive_update(model_args, kwargs)
return _build_effnetv2(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnetv2_b3/effnetv2_b3.pkl"
)
def effnetv2_b3(**kwargs):
model_args = dict(depth_mult=1.4, width_mult=1.2, head=dict(dropout_prob=0.3))
recursive_update(model_args, kwargs)
return _build_effnetv2(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnetv2_s/effnetv2_s.pkl"
)
def effnetv2_s(**kwargs):
model_args = dict(stem_w=24, depths=[2, 4, 4, 6, 9, 15], widths=[24, 48, 64, 128, 160, 256])
recursive_update(model_args, kwargs)
return _build_effnetv2(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnetv2_m/effnetv2_m.pkl"
)
def effnetv2_m(**kwargs):
model_args = dict(
stem_w=24,
block_name=[FuseMBConv, FuseMBConv, FuseMBConv, MBConv, MBConv, MBConv, MBConv],
depths=[3, 5, 5, 7, 14, 18, 5],
widths=[24, 48, 80, 160, 176, 304, 512],
strides=[1, 2, 2, 2, 1, 2, 1],
kernels=[3, 3, 3, 3, 3, 3, 3],
exp_rs=[1, 4, 4, 4, 6, 6, 6],
se_rs=[0, 0, 0, 0.25, 0.25, 0.25, 0.25],
head=dict(dropout_prob=0.3),
)
recursive_update(model_args, kwargs)
return _build_effnetv2(**model_args)
@registers.models.register()
@hub.pretrained(
"https://data.megengine.org.cn/research/basecls/models/effnet/effnetv2_l/effnetv2_l.pkl"
)
def effnetv2_l(**kwargs):
model_args = dict(
stem_w=32,
block_name=[FuseMBConv, FuseMBConv, FuseMBConv, MBConv, MBConv, MBConv, MBConv],
depths=[4, 7, 7, 10, 19, 25, 7],
widths=[32, 64, 96, 192, 224, 384, 640],
strides=[1, 2, 2, 2, 1, 2, 1],
kernels=[3, 3, 3, 3, 3, 3, 3],
exp_rs=[1, 4, 4, 4, 6, 6, 6],
se_rs=[0, 0, 0, 0.25, 0.25, 0.25, 0.25],
head=dict(dropout_prob=0.4),
)
recursive_update(model_args, kwargs)
return _build_effnetv2(**model_args)