U <ºc¨ã@s6ddlZddlmZddlmZGdd„dejƒZdS)éN)Ú ShardedTensorcsLeZdZdejf‡fdd„ Zdd„Zdd„Zdd „Zd d„Z dd „Z ‡ZS)ÚSimpleMegatronLMNcsdtƒ ¡tj|dd|iŽ|_t ¡|_tj|dd|iŽ|_|dk r`|j |¡|j |¡dS)NrÚdtypeé) ÚsuperÚ__init__ÚnnZLinearÚfc1ZGELUÚgeluÚfc2Zcuda)ÚselfZlinear_sizeZrankr©Ú __class__©úZ/tmp/pip-unpacked-wheel-gikjz4vx/torch/testing/_internal/distributed/_shard/test_common.pyrs zSimpleMegatronLM.__init__cCs| | | |¡¡¡S©N)rr r )rÚinprrrÚforwardszSimpleMegatronLM.forwardcCsPt|jjtƒr|jj ¡}n|jj}t|jjtƒr@|jj ¡}n|jj}||fSr)Ú isinstancer ÚweightrZlocal_tensorr)rZweight1Zweight2rrrÚget_weightsszSimpleMegatronLM.get_weightscCs|jj|jjfSr)r Úbiasr©rrrrÚ get_biases!szSimpleMegatronLM.get_biasescCs|jjj|jjjfSr)r rÚgradrrrrrÚget_weight_grads$sz!SimpleMegatronLM.get_weight_gradscCs|jjj|jjjfSr)r rrrrrrrÚget_bias_grads'szSimpleMegatronLM.get_bias_grads)Ú__name__Ú __module__Ú__qualname__ÚtorchZfloat32rrrrrrÚ __classcell__rrr rrs r)r Ztorch.nnrZ'torch.distributed._shard.sharded_tensorrÚModulerrrrrÚs