basecls.models.resmlp 源代码

#!/usr/bin/env python3
# Copyright (c) 2015-present, Facebook, Inc.
# This file has been modified by Megvii ("Megvii Modifications").
# All Megvii Modifications are Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
"""ResMLP Series

ResMLP: `"ResMLP: Feedforward networks for image classification with data-efficient training"
<https://arxiv.org/abs/2105.03404>`_

References:
    https://github.com/facebookresearch/deit/blob/main/resmlp_models.py
"""
import megengine as mge
import megengine.functional as F
import megengine.hub as hub
import megengine.module as M

from basecls.layers import DropPath, init_vit_weights
from basecls.utils import recursive_update, registers

from .vit import FFN, PatchEmbed

__all__ = ["Affine", "ResMLPBlock", "ResMLP"]


[文档]class Affine(M.Module): """ResMLP Affine Layer.""" def __init__(self, dim: int): super().__init__() self.alpha = mge.Parameter(F.ones(dim)) self.beta = mge.Parameter(F.zeros(dim))
[文档] def forward(self, x): return self.alpha * x + self.beta
[文档]class ResMLPBlock(M.Module): """ResMLP block. Args: dim: Number of input channels. drop: Dropout ratio. drop_path: Stochastic depth rate. num_patches: Number of patches. init_scale: Initial value for LayerScale. ffn_ratio: Ratio of ffn hidden dim to embedding dim. act_name: activation function. """ def __init__( self, dim: int, drop: float, drop_path: float, num_patches: int, init_scale: float, ffn_ratio: float, act_name: str, ): super().__init__() self.norm1 = Affine(dim) self.attn = M.Linear(num_patches, num_patches) self.drop_path = DropPath(drop_path) if drop_path > 0.0 else None self.norm2 = Affine(dim) self.ffn = FFN( in_features=dim, hidden_features=int(ffn_ratio * dim), act_name=act_name, drop=drop ) self.gamma1 = mge.Parameter(init_scale * F.ones((dim))) self.gamma2 = mge.Parameter(init_scale * F.ones((dim)))
[文档] def forward(self, x): if self.drop_path: x = x + self.drop_path( self.gamma1 * self.attn(self.norm1(x).transpose(0, 2, 1)).transpose(0, 2, 1) ) x = x + self.drop_path(self.gamma2 * self.ffn(self.norm2(x))) else: x = x + self.gamma1 * self.attn(self.norm1(x).transpose(0, 2, 1)).transpose(0, 2, 1) x = x + self.gamma2 * self.ffn(self.norm2(x)) return x
[文档]@registers.models.register() class ResMLP(M.Module): """ResMLP model. Args: img_size: Input image size. Default: ``224`` patch_size: Patch token size. Default: ``16`` in_chans: Number of input image channels. Default: ``3`` embed_dim: Number of linear projection output channels. Default: ``768`` depth: Depth of Transformer Encoder layer. Default: ``12`` drop_rate: Dropout rate. Default: ``0.0`` drop_path_rate: Stochastic depth rate. Default: ``0.0`` embed_layer: Patch embedding layer. Default: :py:class:`PatchEmbed` init_scale: Initial value for LayerScale. Default: ``1e-4`` ffn_ratio: Ratio of ffn hidden dim to embedding dim. Default: ``4.0`` act_name: Activation function. Default: ``"gelu"`` num_classes: Number of classes. Default: ``1000`` """ def __init__( self, img_size: int = 224, patch_size: int = 16, in_chans: int = 3, embed_dim: int = 768, depth: int = 12, drop_rate: float = 0.0, drop_path_rate: float = 0.0, embed_layer: M.Module = PatchEmbed, init_scale: float = 1e-4, ffn_ratio: float = 4.0, act_name: str = "gelu", num_classes: int = 1000, **kwargs, ): super().__init__() self.num_classes = num_classes self.num_features = self.embed_dim = embed_dim self.patch_embed = embed_layer(img_size, patch_size, in_chans, embed_dim) num_patches = self.patch_embed.num_patches dpr = [drop_path_rate for _ in range(depth)] self.blocks = [ ResMLPBlock( dim=embed_dim, drop=drop_rate, drop_path=dpr[i], num_patches=num_patches, init_scale=init_scale, ffn_ratio=ffn_ratio, act_name=act_name, ) for i in range(depth) ] self.norm = Affine(embed_dim) self.head = M.Linear(embed_dim, num_classes) if num_classes > 0 else None self.apply(init_vit_weights)
[文档] def forward(self, x): B = x.shape[0] x = self.patch_embed(x) for blk in self.blocks: x = blk(x) x = self.norm(x) x = x.mean(axis=1).reshape(B, 1, -1) x = x[:, 0] if self.head: x = self.head(x) return x
def _build_resmlp(**kwargs): model_args = dict(embed_dim=384, drop_path_rate=0.05) recursive_update(model_args, kwargs) return ResMLP(**model_args) @registers.models.register() @hub.pretrained( "https://data.megengine.org.cn/research/basecls/models/resmlp/resmlp_s12/resmlp_s12.pkl" ) def resmlp_s12(**kwargs): model_args = dict(depth=12, init_scale=0.1) recursive_update(model_args, kwargs) return _build_resmlp(**model_args) @registers.models.register() @hub.pretrained( "https://data.megengine.org.cn/research/basecls/models/resmlp/resmlp_s24/resmlp_s24.pkl" ) def resmlp_s24(**kwargs): model_args = dict(depth=24, init_scale=1e-5) recursive_update(model_args, kwargs) return _build_resmlp(**model_args) @registers.models.register() @hub.pretrained( "https://data.megengine.org.cn/research/basecls/models/resmlp/resmlp_s36/resmlp_s36.pkl" ) def resmlp_s36(**kwargs): model_args = dict(depth=36, init_scale=1e-6) recursive_update(model_args, kwargs) return _build_resmlp(**model_args) @registers.models.register() @hub.pretrained( "https://data.megengine.org.cn/research/basecls/models/resmlp/resmlp_b24/resmlp_b24.pkl" ) def resmlp_b24(**kwargs): model_args = dict(patch_size=8, embed_dim=768, depth=24, init_scale=1e-6) recursive_update(model_args, kwargs) return _build_resmlp(**model_args)