basecls.models#
- basecls.models.build_model(cfg)[源代码]#
The factory function to build model.
备注
if
cfg.model
does not have the attrhead
, this function will build model with the default head. Otherwise ifcfg.model.head
isNone
, this function will build model without any head.备注
if
cfg.model.head
does not have the attrw_out
andcfg.num_classes
exists,w_out
will be overridden bycfg.num_classes
.- 参数
cfg (
ConfigDict
) – config for building model.- 返回类型
- 返回
A model.
- basecls.models.sync_model(model)[源代码]#
Sync parameters and buffers.
- 参数
model (
Module
) – model for syncing.
- class basecls.models.EffNet(stem_w, block_name, depths, widths, strides, kernels, exp_rs=1.0, se_rs=0.0, drop_path_prob=0.0, depth_mult=1.0, width_mult=1.0, omit_mult=False, norm_name='BN', act_name='silu', head=None)[源代码]#
基类:
Module
EfficientNet model.
- 参数
stem_w (
int
) – stem width.block_name (
Union
[str
,Callable
,Sequence
[Union
[str
,Callable
]]]) – block name.depths (
Sequence
[int
]) – depth for each stage (number of blocks in the stage).widths (
Sequence
[int
]) – width for each stage (width of each block in the stage).strides (
Sequence
[int
]) – strides for each stage (applies to the first block of each stage).exp_rs (
Union
[float
,Sequence
[Union
[float
,Sequence
[float
]]]]) – expansion ratios for MBConv blocks in each stage.se_r – Squeeze-and-Excitation (SE) ratio. Default:
0.25
drop_path_prob (
float
) – drop path probability. Default:0.0
depth_mult (
float
) – depth multiplier. Default:1.0
width_mult (
float
) – width multiplier. Default:1.0
omit_mult (
bool
) – omit multiplier for stem width, head width, the first stage depth and the last stage depth, enabled in EfficientNet-Lite. Default:False
norm_name (
str
) – normalization function. Default:"BN"
act_name (
str
) – activation function. Default:"silu"
head (
Optional
[Mapping
[str
,Any
]]) – head args. Default:None
- class basecls.models.HRNet(stage_modules, stage_blocks, stage_block_names, stage_channels, w_stem=64, multi_scale_output=True, merge_block_name='bottleneck', merge_channels=[32, 64, 128, 256], norm_name='BN', act_name='relu', head=None, **kwargs)[源代码]#
基类:
Module
HRNet model.
- 参数
stage_modules (
List
[int
]) – Number of modules for each stage.stage_blocks (
List
[List
[int
]]) – Number of blocks for each module in stages.stage_block_names (
List
[str
]) – Branch block types for each stage.stage_channels (
List
[List
[int
]]) – Number of channels for each stage.w_stem (
int
) – Stem width. Default:64
multi_scale_output (
bool
) – Whether output multi-scale features. Default:True
merge_block_name (
str
) – Merge block type. Default:"bottleneck"
merge_channels (
List
[int
]) – Channels of each scale in merge block. Default:[32, 64, 128, 256]
norm_name (
str
) – Normalization layer. Default:"BN"
act_name (
str
) – Activation function. Default:"relu"
head (
Optional
[Mapping
[str
,Any
]]) – head args. Default:None
- class basecls.models.MBNet(stem_w, depths, widths, strides, kernels, exp_rs=1.0, se_rs=0.0, stage_act_names=None, has_proj_act=False, has_skip=True, drop_path_prob=0.0, width_mult=1.0, norm_name='BN', act_name='relu6', head=None)[源代码]#
基类:
Module
MobileNet model.
- 参数
stem_w (
int
) – stem width.depths (
Sequence
[int
]) – depth for each stage (number of blocks in the stage).widths (
Sequence
[int
]) – width for each stage (width of each block in the stage).strides (
Sequence
[int
]) – strides for each stage (applies to the first block of each stage).exp_rs (
Union
[float
,Sequence
[Union
[float
,Sequence
[float
]]]]) – expansion ratios for MobileNet basic blocks in each stage. Default:1.0
se_rs (
Union
[float
,Sequence
[Union
[float
,Sequence
[float
]]]]) – Squeeze-and-Excitation (SE) ratios. Default:0.0
stage_act_names (
Optional
[Sequence
[str
]]) – activation function for stages. Default:None
has_proj_act (
bool
) – whether apply activation to output. Default:False
has_skip (
bool
) – whether apply skip connection. Default:True
drop_path_prob (
float
) – drop path probability. Default:0.0
width_mult (
float
) – width multiplier. Default:1.0
norm_name (
str
) – normalization function. Default:"BN"
act_name (
str
) – activation function. Default:"relu6"
head (
Optional
[Mapping
[str
,Any
]]) – head args. Default:None
- class basecls.models.RegNet(stem_name, stem_w, block_name, depth, w0, wa, wm, group_w, stride=2, bot_mul=1.0, se_r=0.0, drop_path_prob=0.0, zero_init_final_gamma=False, norm_name='BN', act_name='relu', head=None)[源代码]#
基类:
ResNet
RegNet model.
- 参数
stem_w (
int
) – stem width.depth (
int
) – depth.w0 (
int
) – initial width.wa (
float
) – slope.wm (
float
) – quantization.group_w (
int
) – group width for each stage (applies to bottleneck block).stride (
int
) – stride for each stage (applies to the first block of each stage). Default:2
bot_mul (
float
) – bottleneck multiplier for each stage (applies to bottleneck block). Default:1.0
se_r (
float
) – Squeeze-and-Excitation (SE) ratio. Default:0.0
drop_path_prob (
float
) – drop path probability. Default:0.0
zero_init_final_gamma (
bool
) – enable zero-initialize or not. Default:False
norm_name (
str
) – normalization function. Default:"BN"
act_name (
str
) – activation function. Default:"relu"
head (
Optional
[Mapping
[str
,Any
]]) – head args. Default:None
- class basecls.models.RepVGG(num_blocks, width_multiplier, head=None, groups=1, se_r=0.0, act_name='relu', deploy=False)[源代码]#
基类:
Module
RepVGG Model.
Use
RepVGG.convert_to_deploy()
to convert a trainingRepVGG
to deploy:model = RepVGG(..., deploy=False) model.load_state_dict(...) _ = RepVGG.convert_to_deploy(model)
- 参数
width_multiplier (
Sequence
[int
]) – RepVGG widths,base_width
is[64, 128, 256, 512]
.head (
Optional
[Mapping
[str
,Any
]]) – head args. Default:None
groups (
Union
[int
,List
[Union
[int
,List
[int
]]]]) – number of groups for blocks. Default:1
se_r (
float
) – Squeeze-and-Excitation (SE) ratio. Default:0.0
act_name (
str
) – activation function. Default:"relu"
deploy (
bool
) – switch a reparamed RepVGG into deploy mode. Default:False
- class basecls.models.ResMLP(img_size=224, patch_size=16, in_chans=3, embed_dim=768, depth=12, drop_rate=0.0, drop_path_rate=0.0, embed_layer=PatchEmbed, init_scale=1e-4, ffn_ratio=4.0, act_name='gelu', num_classes=1000, **kwargs)[源代码]#
基类:
Module
ResMLP model.
- 参数
img_size (
int
) – Input image size. Default:224
patch_size (
int
) – Patch token size. Default:16
in_chans (
int
) – Number of input image channels. Default:3
embed_dim (
int
) – Number of linear projection output channels. Default:768
depth (
int
) – Depth of Transformer Encoder layer. Default:12
drop_rate (
float
) – Dropout rate. Default:0.0
drop_path_rate (
float
) – Stochastic depth rate. Default:0.0
embed_layer (
Module
) – Patch embedding layer. Default:PatchEmbed
init_scale (
float
) – Initial value for LayerScale. Default:1e-4
ffn_ratio (
float
) – Ratio of ffn hidden dim to embedding dim. Default:4.0
act_name (
str
) – Activation function. Default:"gelu"
num_classes (
int
) – Number of classes. Default:1000
- class basecls.models.ResNet(stem_name, stem_w, block_name, depths, widths, strides, bot_muls=1.0, group_ws=None, se_r=0.0, avg_down=False, drop_path_prob=0.0, zero_init_final_gamma=False, norm_name='BN', act_name='relu', head=None)[源代码]#
基类:
Module
ResNet model.
- 参数
stem_w (
int
) – stem width.depths (
Sequence
[int
]) – depth for each stage (number of blocks in the stage).widths (
Sequence
[int
]) – width for each stage (width of each block in the stage).strides (
Sequence
[int
]) – strides for each stage (applies to the first block of each stage).bot_muls (
Union
[float
,Sequence
[float
]]) – bottleneck multipliers for each stage (applies to bottleneck block). Default:1.0
group_ws (
Optional
[Sequence
[int
]]) – group widths for each stage (applies to bottleneck block). Default:None
se_r (
float
) – Squeeze-and-Excitation (SE) ratio. Default:0.0
drop_path_prob (
float
) – drop path probability. Default:0.0
zero_init_final_gamma (
bool
) – enable zero-initialize or not. Default:False
norm_name (
str
) – normalization function. Default:"BN"
act_name (
str
) – activation function. Default:"relu"
head (
Optional
[Mapping
[str
,Any
]]) – head args. Default:None
- class basecls.models.SNetV2(block, stem_w, depths, widths, strides, kernels, use_maxpool=True, se_r=0.0, drop_path_prob=0.0, norm_name='BN', act_name='relu', head=None)[源代码]#
基类:
Module
ShufflenetV2 model.
- 参数
block (
Callable
) – building block to use,SNV2XceptionBlock
for v2+.stem_w (
int
) – width for stem layer.depths (
Sequence
[int
]) – depth for each stage (number of blocks in the stage).widths (
Sequence
[int
]) – width for each stage (width of each block in the stage).strides (
Sequence
[int
]) – strides for each stage (applies to the first block of each stage).use_maxpool (
bool
) – whether use maxpool stride 2 after stem. Default:True
se_r (
float
) – Squeeze-and-Excitation (SE) ratio. Default:0.0
drop_path_prob (
float
) – drop path probability. Default:0.0
norm_name (
str
) – normalization function. Default:"BN"
act_name (
str
) – activation function. Default:"relu6"
head (
Optional
[Mapping
[str
,Any
]]) – head args. Default:None
- class basecls.models.SwinTransformer(img_size=224, patch_size=4, in_chans=3, embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24], window_size=7, ffn_ratio=4.0, qkv_bias=True, qk_scale=None, ape=False, patch_norm=True, drop_rate=0.0, attn_drop_rate=0.0, drop_path_rate=0.1, embed_layer=PatchEmbed, norm_name='LN', act_name='gelu', num_classes=1000, **kwargs)[源代码]#
基类:
Module
- Swin Transformer
- A PyTorch impl of :
Swin Transformer: Hierarchical Vision Transformer using Shifted Windows - https://arxiv.org/pdf/2103.14030
- 参数
img_size (
int
) – Input image size. Default:224
patch_size (
int
) – Patch size. Default:4
in_chans (
int
) – Number of input image channels. Default:3
embed_dim (
int
) – Patch embedding dimension. Default:96
depths (
Sequence
[int
]) – Depth of each Swin Transformer layer.num_heads (
Sequence
[int
]) – Number of attention heads in different layers.window_size (
int
) – Window size. Default:7
ffn_ratio (
float
) – Ratio of ffn hidden dim to embedding dim. Default:4.0
qkv_bias (
bool
) – If True, add a learnable bias to query, key, value. Default:True
qk_scale (
Optional
[float
]) – Override default qk scale of head_dim ** -0.5 if set. Default:None
ape (
bool
) – If True, add absolute position embedding to the patch embedding. Default:False
patch_norm (
bool
) – If True, add normalization after patch embedding. Default:True
drop_rate (
float
) – Dropout rate. Default:0
attn_drop_rate (
float
) – Attention dropout rate. Default:0
drop_path_rate (
float
) – Stochastic depth rate. Default:0.1
embed_layer (
Module
) – Patch embedding layer. Default:PatchEmbed
norm_name (
str
) – Normalization layer. Default:"LN"
act_name (
str
) – Activation layer. Default:"gelu"
num_classes (
int
) – Number of classes for classification head. Default:1000
- class basecls.models.VGG(depths, widths, norm_name=None, act_name='relu', head=None)[源代码]#
基类:
Module
VGG model.
- 参数
depths (
Sequence
[int
]) – depth for each stage (number of blocks in the stage).widths (
Sequence
[int
]) – width for each stage (width of each block in the stage).norm_name (
Optional
[str
]) – normalization function. Default:None
act_name (
str
) – activation function. Default:"relu"
head (
Optional
[Mapping
[str
,Any
]]) – head args. Default:None
- class basecls.models.ViT(img_size=224, patch_size=16, in_chans=3, embed_dim=768, depth=12, num_heads=12, ffn_ratio=4.0, qkv_bias=True, qk_scale=None, representation_size=None, distilled=False, drop_rate=0.0, attn_drop_rate=0.0, drop_path_rate=0.0, embed_layer=PatchEmbed, norm_name='LN', act_name='gelu', num_classes=1000, **kwargs)[源代码]#
基类:
Module
ViT model.
- 参数
img_size (
int
) – Input image size. Default:224
patch_size (
int
) – Patch token size. Default:16
in_chans (
int
) – Number of input image channels. Default:3
embed_dim (
int
) – Number of linear projection output channels. Default:768
depth (
int
) – Depth of Transformer Encoder layer. Default:12
num_heads (
int
) – Number of attention heads. Default:12
ffn_ratio (
float
) – Ratio of ffn hidden dim to embedding dim. Default:4.0
qkv_bias (
bool
) – If True, add a learnable bias to query, key, value. Default:True
qk_scale (
Optional
[float
]) – Override default qk scale of head_dim ** -0.5 if set. Default:None
representation_size (
Optional
[int
]) – Size of representation layer (pre-logits). Default:None
distilled (
bool
) – Includes a distillation token and head. Default:False
drop_rate (
float
) – Dropout rate. Default:0.0
attn_drop_rate (
float
) – Attention dropout rate. Default:0.0
drop_path_rate (
float
) – Stochastic depth rate. Default:0.0
embed_layer (
Module
) – Patch embedding layer. Default:PatchEmbed
norm_name (
str
) – Normalization layer. Default:"LN"
act_name (
str
) – Activation function. Default:"gelu"
num_classes (
int
) – Number of classes. Default:1000
- load_state_dict(state_dict, strict=True)[源代码]#
Loads a given dictionary created by
state_dict()
into this module. Ifstrict
isTrue
, the keys ofstate_dict()
must exactly match the keys returned bystate_dict()
.Users can also pass a closure:
Function[key: str, var: Tensor] -> Optional[np.ndarray]
as a state_dict, in order to handle complex situations. For example, load everything except for the final linear classifier:state_dict = {...} # Dict[str, np.ndarray] model.load_state_dict({ k: None if k.startswith('fc') else v for k, v in state_dict.items() }, strict=False)
Here returning
None
means skipping parameterk
.To prevent shape mismatch (e.g. load PyTorch weights), we can reshape before loading:
state_dict = {...} def reshape_accordingly(k, v): return state_dict[k].reshape(v.shape) model.load_state_dict(reshape_accordingly)
We can also perform inplace re-initialization or pruning:
def reinit_and_pruning(k, v): if 'bias' in k: M.init.zero_(v) if 'conv' in k: