mirror of
https://github.com/Vision-CAIR/MiniGPT-4.git
synced 2025-04-05 18:40:46 +00:00
5571 lines
217 KiB
Plaintext
5571 lines
217 KiB
Plaintext
|
digraph {
|
||
|
graph [size="778.8,778.8"]
|
||
|
node [align=left fontname=monospace fontsize=10 height=0.2 ranksep=0.1 shape=box style=filled]
|
||
|
140509988778688 [label="
|
||
|
(1, 49, 768)" fillcolor=darkolivegreen1]
|
||
|
140509588281712 [label=CatBackward0]
|
||
|
140509588282912 -> 140509588281712
|
||
|
140509588282912 [label=IndexBackward0]
|
||
|
140509588281808 -> 140509588282912
|
||
|
140509588281808 [label=SumBackward1]
|
||
|
140509588283152 -> 140509588281808
|
||
|
140509588283152 [label=MulBackward0]
|
||
|
140509588282864 -> 140509588283152
|
||
|
140509588282864 [label=CatBackward0]
|
||
|
140509591316848 -> 140509588282864
|
||
|
140509591316848 [label=UnsqueezeBackward0]
|
||
|
140509591314640 -> 140509591316848
|
||
|
140509591314640 [label=NativeLayerNormBackward0]
|
||
|
140509591317376 -> 140509591314640
|
||
|
140509591317376 [label=AddBackward0]
|
||
|
140509588312944 -> 140509591317376
|
||
|
140509588312944 [label=NativeDropoutBackward0]
|
||
|
140509588313424 -> 140509588312944
|
||
|
140509588313424 [label=ViewBackward0]
|
||
|
140509588313232 -> 140509588313424
|
||
|
140509588313232 [label=AddmmBackward0]
|
||
|
140509588312560 -> 140509588313232
|
||
|
140509588312560 [label=ToCopyBackward0]
|
||
|
140509591318384 -> 140509588312560
|
||
|
140509591260672 [label="encoder.layer.11.experts.experts.0.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591260672 -> 140509591318384
|
||
|
140509591318384 [label=AccumulateGrad]
|
||
|
140509588313040 -> 140509588313232
|
||
|
140509588313040 [label=ViewBackward0]
|
||
|
140509588312368 -> 140509588313040
|
||
|
140509588312368 [label=GeluBackward0]
|
||
|
140509588312176 -> 140509588312368
|
||
|
140509588312176 [label=ViewBackward0]
|
||
|
140509588313328 -> 140509588312176
|
||
|
140509588313328 [label=AddmmBackward0]
|
||
|
140509588313520 -> 140509588313328
|
||
|
140509588313520 [label=ToCopyBackward0]
|
||
|
140509588313808 -> 140509588313520
|
||
|
140509591261072 [label="encoder.layer.11.experts.experts.0.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591261072 -> 140509588313808
|
||
|
140509588313808 [label=AccumulateGrad]
|
||
|
140509588313616 -> 140509588313328
|
||
|
140509588313616 [label=ViewBackward0]
|
||
|
140509588314096 -> 140509588313616
|
||
|
140509588314096 [label=ToCopyBackward0]
|
||
|
140509588312608 -> 140509588314096
|
||
|
140509588312608 [label=SliceBackward0]
|
||
|
140509588314048 -> 140509588312608
|
||
|
140509588314048 [label=SliceBackward0]
|
||
|
140509588314288 -> 140509588314048
|
||
|
140509588314288 [label=SliceBackward0]
|
||
|
140509588314480 -> 140509588314288
|
||
|
140509588314480 [label=SliceBackward0]
|
||
|
140509588314528 -> 140509588314480
|
||
|
140509588314528 [label=SliceBackward0]
|
||
|
140509588314768 -> 140509588314528
|
||
|
140509588314768 [label=NativeLayerNormBackward0]
|
||
|
140509588314960 -> 140509588314768
|
||
|
140509588314960 [label=AddBackward0]
|
||
|
140509588315248 -> 140509588314960
|
||
|
140509588315248 [label=NativeDropoutBackward0]
|
||
|
140509588315632 -> 140509588315248
|
||
|
140509588315632 [label=ViewBackward0]
|
||
|
140509588315824 -> 140509588315632
|
||
|
140509588315824 [label=AddmmBackward0]
|
||
|
140509588316016 -> 140509588315824
|
||
|
140509588316016 [label=ToCopyBackward0]
|
||
|
140509588315968 -> 140509588316016
|
||
|
140509591290880 [label="encoder.layer.11.attention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591290880 -> 140509588315968
|
||
|
140509588315968 [label=AccumulateGrad]
|
||
|
140509588315728 -> 140509588315824
|
||
|
140509588315728 [label=ViewBackward0]
|
||
|
140509588316112 -> 140509588315728
|
||
|
140509588316112 [label=ViewBackward0]
|
||
|
140509588345136 -> 140509588316112
|
||
|
140509588345136 [label=CloneBackward0]
|
||
|
140509588345184 -> 140509588345136
|
||
|
140509588345184 [label=PermuteBackward0]
|
||
|
140509588345424 -> 140509588345184
|
||
|
140509588345424 [label=UnsafeViewBackward0]
|
||
|
140509588345616 -> 140509588345424
|
||
|
140509588345616 [label=BmmBackward0]
|
||
|
140509588345664 -> 140509588345616
|
||
|
140509588345664 [label=ReshapeAliasBackward0]
|
||
|
140509588346192 -> 140509588345664
|
||
|
140509588346192 [label=ExpandBackward0]
|
||
|
140509588346288 -> 140509588346192
|
||
|
140509588346288 [label=ToCopyBackward0]
|
||
|
140509588346480 -> 140509588346288
|
||
|
140509588346480 [label=NativeDropoutBackward0]
|
||
|
140509588346672 -> 140509588346480
|
||
|
140509588346672 [label=SoftmaxBackward0]
|
||
|
140509588346768 -> 140509588346672
|
||
|
140509588346768 [label=AddBackward0]
|
||
|
140509588346960 -> 140509588346768
|
||
|
140509588346960 [label=DivBackward0]
|
||
|
140509588347152 -> 140509588346960
|
||
|
140509588347152 [label=UnsafeViewBackward0]
|
||
|
140509588347248 -> 140509588347152
|
||
|
140509588347248 [label=BmmBackward0]
|
||
|
140509588347440 -> 140509588347248
|
||
|
140509588347440 [label=UnsafeViewBackward0]
|
||
|
140509588347536 -> 140509588347440
|
||
|
140509588347536 [label=CloneBackward0]
|
||
|
140509588347584 -> 140509588347536
|
||
|
140509588347584 [label=ExpandBackward0]
|
||
|
140509588347824 -> 140509588347584
|
||
|
140509588347824 [label=PermuteBackward0]
|
||
|
140509588348016 -> 140509588347824
|
||
|
140509588348016 [label=ViewBackward0]
|
||
|
140509588348064 -> 140509588348016
|
||
|
140509588348064 [label=ViewBackward0]
|
||
|
140509588348304 -> 140509588348064
|
||
|
140509588348304 [label=AddmmBackward0]
|
||
|
140509588348496 -> 140509588348304
|
||
|
140509588348496 [label=ToCopyBackward0]
|
||
|
140509588348784 -> 140509588348496
|
||
|
140509591291680 [label="encoder.layer.11.attention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591291680 -> 140509588348784
|
||
|
140509588348784 [label=AccumulateGrad]
|
||
|
140509588348592 -> 140509588348304
|
||
|
140509588348592 [label=ViewBackward0]
|
||
|
140509588348544 -> 140509588348592
|
||
|
140509588348544 [label=ToCopyBackward0]
|
||
|
140509588315344 -> 140509588348544
|
||
|
140509588315344 [label=CatBackward0]
|
||
|
140509588369568 -> 140509588315344
|
||
|
140509588369568 [label=SumBackward1]
|
||
|
140509588370096 -> 140509588369568
|
||
|
140509588370096 [label=MulBackward0]
|
||
|
140509588370192 -> 140509588370096
|
||
|
140509588370192 [label=CatBackward0]
|
||
|
140509588370288 -> 140509588370192
|
||
|
140509588370288 [label=UnsqueezeBackward0]
|
||
|
140509588370672 -> 140509588370288
|
||
|
140509588370672 [label=NativeLayerNormBackward0]
|
||
|
140509588370864 -> 140509588370672
|
||
|
140509588370864 [label=AddBackward0]
|
||
|
140509588371152 -> 140509588370864
|
||
|
140509588371152 [label=NativeDropoutBackward0]
|
||
|
140509588371248 -> 140509588371152
|
||
|
140509588371248 [label=ViewBackward0]
|
||
|
140509588371440 -> 140509588371248
|
||
|
140509588371440 [label=AddmmBackward0]
|
||
|
140509588371488 -> 140509588371440
|
||
|
140509588371488 [label=ToCopyBackward0]
|
||
|
140509588371920 -> 140509588371488
|
||
|
140509591285568 [label="encoder.layer.10.experts.experts.0.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591285568 -> 140509588371920
|
||
|
140509588371920 [label=AccumulateGrad]
|
||
|
140509588371632 -> 140509588371440
|
||
|
140509588371632 [label=ViewBackward0]
|
||
|
140509588372112 -> 140509588371632
|
||
|
140509588372112 [label=GeluBackward0]
|
||
|
140509588372304 -> 140509588372112
|
||
|
140509588372304 [label=ViewBackward0]
|
||
|
140509588372496 -> 140509588372304
|
||
|
140509588372496 [label=AddmmBackward0]
|
||
|
140509588372592 -> 140509588372496
|
||
|
140509588372592 [label=ToCopyBackward0]
|
||
|
140509588372976 -> 140509588372592
|
||
|
140509591285488 [label="encoder.layer.10.experts.experts.0.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591285488 -> 140509588372976
|
||
|
140509588372976 [label=AccumulateGrad]
|
||
|
140509588372400 -> 140509588372496
|
||
|
140509588372400 [label=ViewBackward0]
|
||
|
140509588372880 -> 140509588372400
|
||
|
140509588372880 [label=ToCopyBackward0]
|
||
|
140509588370960 -> 140509588372880
|
||
|
140509588370960 [label=SliceBackward0]
|
||
|
140509588373264 -> 140509588370960
|
||
|
140509588373264 [label=SliceBackward0]
|
||
|
140509588373456 -> 140509588373264
|
||
|
140509588373456 [label=NativeLayerNormBackward0]
|
||
|
140509588373360 -> 140509588373456
|
||
|
140509588373360 [label=AddBackward0]
|
||
|
140509588402672 -> 140509588373360
|
||
|
140509588402672 [label=NativeDropoutBackward0]
|
||
|
140509588402624 -> 140509588402672
|
||
|
140509588402624 [label=ViewBackward0]
|
||
|
140509588402864 -> 140509588402624
|
||
|
140509588402864 [label=AddmmBackward0]
|
||
|
140509588403056 -> 140509588402864
|
||
|
140509588403056 [label=ToCopyBackward0]
|
||
|
140509588403344 -> 140509588403056
|
||
|
140509591293840 [label="encoder.layer.10.crossattention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591293840 -> 140509588403344
|
||
|
140509588403344 [label=AccumulateGrad]
|
||
|
140509588403152 -> 140509588402864
|
||
|
140509588403152 [label=ViewBackward0]
|
||
|
140509588403632 -> 140509588403152
|
||
|
140509588403632 [label=ViewBackward0]
|
||
|
140509588403728 -> 140509588403632
|
||
|
140509588403728 [label=CloneBackward0]
|
||
|
140509588403920 -> 140509588403728
|
||
|
140509588403920 [label=PermuteBackward0]
|
||
|
140509588404112 -> 140509588403920
|
||
|
140509588404112 [label=UnsafeViewBackward0]
|
||
|
140509588404208 -> 140509588404112
|
||
|
140509588404208 [label=BmmBackward0]
|
||
|
140509588404400 -> 140509588404208
|
||
|
140509588404400 [label=ReshapeAliasBackward0]
|
||
|
140509588404496 -> 140509588404400
|
||
|
140509588404496 [label=ExpandBackward0]
|
||
|
140509588404544 -> 140509588404496
|
||
|
140509588404544 [label=ToCopyBackward0]
|
||
|
140509588404784 -> 140509588404544
|
||
|
140509588404784 [label=NativeDropoutBackward0]
|
||
|
140509588404976 -> 140509588404784
|
||
|
140509588404976 [label=SoftmaxBackward0]
|
||
|
140509588405024 -> 140509588404976
|
||
|
140509588405024 [label=AddBackward0]
|
||
|
140509588405264 -> 140509588405024
|
||
|
140509588405264 [label=DivBackward0]
|
||
|
140509588405456 -> 140509588405264
|
||
|
140509588405456 [label=UnsafeViewBackward0]
|
||
|
140509588405504 -> 140509588405456
|
||
|
140509588405504 [label=BmmBackward0]
|
||
|
140509588405744 -> 140509588405504
|
||
|
140509588405744 [label=UnsafeViewBackward0]
|
||
|
140509588406128 -> 140509588405744
|
||
|
140509588406128 [label=CloneBackward0]
|
||
|
140509588405984 -> 140509588406128
|
||
|
140509588405984 [label=ExpandBackward0]
|
||
|
140509588427056 -> 140509588405984
|
||
|
140509588427056 [label=PermuteBackward0]
|
||
|
140509588427152 -> 140509588427056
|
||
|
140509588427152 [label=ViewBackward0]
|
||
|
140509588427344 -> 140509588427152
|
||
|
140509588427344 [label=ViewBackward0]
|
||
|
140509588427536 -> 140509588427344
|
||
|
140509588427536 [label=AddmmBackward0]
|
||
|
140509588427632 -> 140509588427536
|
||
|
140509588427632 [label=ToCopyBackward0]
|
||
|
140509588428016 -> 140509588427632
|
||
|
140509591312160 [label="encoder.layer.10.crossattention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591312160 -> 140509588428016
|
||
|
140509588428016 [label=AccumulateGrad]
|
||
|
140509588427440 -> 140509588427536
|
||
|
140509588427440 [label=ViewBackward0]
|
||
|
140509588427920 -> 140509588427440
|
||
|
140509588427920 [label=ToCopyBackward0]
|
||
|
140509588402384 -> 140509588427920
|
||
|
140509588402384 [label=SliceBackward0]
|
||
|
140509588428304 -> 140509588402384
|
||
|
140509588428304 [label=SliceBackward0]
|
||
|
140509588428496 -> 140509588428304
|
||
|
140509588428496 [label=SliceBackward0]
|
||
|
140509588428592 -> 140509588428496
|
||
|
140509588428592 [label=NativeLayerNormBackward0]
|
||
|
140509588428784 -> 140509588428592
|
||
|
140509588428784 [label=AddBackward0]
|
||
|
140509588429072 -> 140509588428784
|
||
|
140509588429072 [label=NativeDropoutBackward0]
|
||
|
140509588429168 -> 140509588429072
|
||
|
140509588429168 [label=ViewBackward0]
|
||
|
140509588429360 -> 140509588429168
|
||
|
140509588429360 [label=AddmmBackward0]
|
||
|
140509588429408 -> 140509588429360
|
||
|
140509588429408 [label=ToCopyBackward0]
|
||
|
140509588429840 -> 140509588429408
|
||
|
140509591312960 [label="encoder.layer.10.attention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591312960 -> 140509588429840
|
||
|
140509588429840 [label=AccumulateGrad]
|
||
|
140509588429552 -> 140509588429360
|
||
|
140509588429552 [label=ViewBackward0]
|
||
|
140509588430032 -> 140509588429552
|
||
|
140509588430032 [label=ViewBackward0]
|
||
|
140509588430224 -> 140509588430032
|
||
|
140509588430224 [label=CloneBackward0]
|
||
|
140509588430416 -> 140509588430224
|
||
|
140509588430416 [label=PermuteBackward0]
|
||
|
140509588430512 -> 140509588430416
|
||
|
140509588430512 [label=UnsafeViewBackward0]
|
||
|
140509588430704 -> 140509588430512
|
||
|
140509588430704 [label=BmmBackward0]
|
||
|
140509588430608 -> 140509588430704
|
||
|
140509588430608 [label=ReshapeAliasBackward0]
|
||
|
140509588459728 -> 140509588430608
|
||
|
140509588459728 [label=ExpandBackward0]
|
||
|
140509588459824 -> 140509588459728
|
||
|
140509588459824 [label=ToCopyBackward0]
|
||
|
140509588460016 -> 140509588459824
|
||
|
140509588460016 [label=NativeDropoutBackward0]
|
||
|
140509588460064 -> 140509588460016
|
||
|
140509588460064 [label=SoftmaxBackward0]
|
||
|
140509588460304 -> 140509588460064
|
||
|
140509588460304 [label=AddBackward0]
|
||
|
140509588460496 -> 140509588460304
|
||
|
140509588460496 [label=DivBackward0]
|
||
|
140509588460544 -> 140509588460496
|
||
|
140509588460544 [label=UnsafeViewBackward0]
|
||
|
140509588460784 -> 140509588460544
|
||
|
140509588460784 [label=BmmBackward0]
|
||
|
140509588460976 -> 140509588460784
|
||
|
140509588460976 [label=UnsafeViewBackward0]
|
||
|
140509588461360 -> 140509588460976
|
||
|
140509588461360 [label=CloneBackward0]
|
||
|
140509588461552 -> 140509588461360
|
||
|
140509588461552 [label=ExpandBackward0]
|
||
|
140509588461648 -> 140509588461552
|
||
|
140509588461648 [label=PermuteBackward0]
|
||
|
140509588461840 -> 140509588461648
|
||
|
140509588461840 [label=ViewBackward0]
|
||
|
140509588462032 -> 140509588461840
|
||
|
140509588462032 [label=ViewBackward0]
|
||
|
140509588462128 -> 140509588462032
|
||
|
140509588462128 [label=AddmmBackward0]
|
||
|
140509588462320 -> 140509588462128
|
||
|
140509588462320 [label=ToCopyBackward0]
|
||
|
140509588462608 -> 140509588462320
|
||
|
140509591313360 [label="encoder.layer.10.attention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591313360 -> 140509588462608
|
||
|
140509588462608 [label=AccumulateGrad]
|
||
|
140509588461984 -> 140509588462128
|
||
|
140509588461984 [label=ViewBackward0]
|
||
|
140509588462464 -> 140509588461984
|
||
|
140509588462464 [label=ToCopyBackward0]
|
||
|
140509588428880 -> 140509588462464
|
||
|
140509588428880 [label=CatBackward0]
|
||
|
140509588462992 -> 140509588428880
|
||
|
140509588462992 [label=SumBackward1]
|
||
|
140509588462944 -> 140509588462992
|
||
|
140509588462944 [label=MulBackward0]
|
||
|
140509588463184 -> 140509588462944
|
||
|
140509588463184 [label=CatBackward0]
|
||
|
140509588463568 -> 140509588463184
|
||
|
140509588463568 [label=UnsqueezeBackward0]
|
||
|
140509588463424 -> 140509588463568
|
||
|
140509588463424 [label=NativeLayerNormBackward0]
|
||
|
140509587960112 -> 140509588463424
|
||
|
140509587960112 [label=AddBackward0]
|
||
|
140509587960400 -> 140509587960112
|
||
|
140509587960400 [label=NativeDropoutBackward0]
|
||
|
140509587960784 -> 140509587960400
|
||
|
140509587960784 [label=ViewBackward0]
|
||
|
140509587960976 -> 140509587960784
|
||
|
140509587960976 [label=AddmmBackward0]
|
||
|
140509587961168 -> 140509587960976
|
||
|
140509587961168 [label=ToCopyBackward0]
|
||
|
140509587961456 -> 140509587961168
|
||
|
140509591311680 [label="encoder.layer.9.experts.experts.0.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591311680 -> 140509587961456
|
||
|
140509587961456 [label=AccumulateGrad]
|
||
|
140509587960880 -> 140509587960976
|
||
|
140509587960880 [label=ViewBackward0]
|
||
|
140509587961360 -> 140509587960880
|
||
|
140509587961360 [label=GeluBackward0]
|
||
|
140509587961552 -> 140509587961360
|
||
|
140509587961552 [label=ViewBackward0]
|
||
|
140509587961600 -> 140509587961552
|
||
|
140509587961600 [label=AddmmBackward0]
|
||
|
140509587961840 -> 140509587961600
|
||
|
140509587961840 [label=ToCopyBackward0]
|
||
|
140509587962080 -> 140509587961840
|
||
|
140509591312000 [label="encoder.layer.9.experts.experts.0.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591312000 -> 140509587962080
|
||
|
140509587962080 [label=AccumulateGrad]
|
||
|
140509587961936 -> 140509587961600
|
||
|
140509587961936 [label=ViewBackward0]
|
||
|
140509587962416 -> 140509587961936
|
||
|
140509587962416 [label=ToCopyBackward0]
|
||
|
140509587960496 -> 140509587962416
|
||
|
140509587960496 [label=SliceBackward0]
|
||
|
140509587962512 -> 140509587960496
|
||
|
140509587962512 [label=SliceBackward0]
|
||
|
140509587962560 -> 140509587962512
|
||
|
140509587962560 [label=SliceBackward0]
|
||
|
140509587962800 -> 140509587962560
|
||
|
140509587962800 [label=SliceBackward0]
|
||
|
140509587962992 -> 140509587962800
|
||
|
140509587962992 [label=SliceBackward0]
|
||
|
140509587963040 -> 140509587962992
|
||
|
140509587963040 [label=NativeLayerNormBackward0]
|
||
|
140509587963280 -> 140509587963040
|
||
|
140509587963280 [label=AddBackward0]
|
||
|
140509587963520 -> 140509587963280
|
||
|
140509587963520 [label=NativeDropoutBackward0]
|
||
|
140509587963760 -> 140509587963520
|
||
|
140509587963760 [label=ViewBackward0]
|
||
|
140509587988784 -> 140509587963760
|
||
|
140509587988784 [label=AddmmBackward0]
|
||
|
140509587988976 -> 140509587988784
|
||
|
140509587988976 [label=ToCopyBackward0]
|
||
|
140509587989264 -> 140509587988976
|
||
|
140509591321152 [label="encoder.layer.9.attention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591321152 -> 140509587989264
|
||
|
140509587989264 [label=AccumulateGrad]
|
||
|
140509587988640 -> 140509587988784
|
||
|
140509587988640 [label=ViewBackward0]
|
||
|
140509587989120 -> 140509587988640
|
||
|
140509587989120 [label=ViewBackward0]
|
||
|
140509587989360 -> 140509587989120
|
||
|
140509587989360 [label=CloneBackward0]
|
||
|
140509587989552 -> 140509587989360
|
||
|
140509587989552 [label=PermuteBackward0]
|
||
|
140509587989600 -> 140509587989552
|
||
|
140509587989600 [label=UnsafeViewBackward0]
|
||
|
140509587989840 -> 140509587989600
|
||
|
140509587989840 [label=BmmBackward0]
|
||
|
140509587990032 -> 140509587989840
|
||
|
140509587990032 [label=ReshapeAliasBackward0]
|
||
|
140509587990416 -> 140509587990032
|
||
|
140509587990416 [label=ExpandBackward0]
|
||
|
140509587990608 -> 140509587990416
|
||
|
140509587990608 [label=ToCopyBackward0]
|
||
|
140509587990704 -> 140509587990608
|
||
|
140509587990704 [label=NativeDropoutBackward0]
|
||
|
140509587990896 -> 140509587990704
|
||
|
140509587990896 [label=SoftmaxBackward0]
|
||
|
140509587991088 -> 140509587990896
|
||
|
140509587991088 [label=AddBackward0]
|
||
|
140509587991184 -> 140509587991088
|
||
|
140509587991184 [label=DivBackward0]
|
||
|
140509587991376 -> 140509587991184
|
||
|
140509587991376 [label=UnsafeViewBackward0]
|
||
|
140509587991568 -> 140509587991376
|
||
|
140509587991568 [label=BmmBackward0]
|
||
|
140509587991664 -> 140509587991568
|
||
|
140509587991664 [label=UnsafeViewBackward0]
|
||
|
140509587991760 -> 140509587991664
|
||
|
140509587991760 [label=CloneBackward0]
|
||
|
140509587991952 -> 140509587991760
|
||
|
140509587991952 [label=ExpandBackward0]
|
||
|
140509587992000 -> 140509587991952
|
||
|
140509587992000 [label=PermuteBackward0]
|
||
|
140509587992240 -> 140509587992000
|
||
|
140509587992240 [label=ViewBackward0]
|
||
|
140509587992432 -> 140509587992240
|
||
|
140509587992432 [label=ViewBackward0]
|
||
|
140509587991520 -> 140509587992432
|
||
|
140509587991520 [label=AddmmBackward0]
|
||
|
140509588021456 -> 140509587991520
|
||
|
140509588021456 [label=ToCopyBackward0]
|
||
|
140509588021696 -> 140509588021456
|
||
|
140509591321952 [label="encoder.layer.9.attention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591321952 -> 140509588021696
|
||
|
140509588021696 [label=AccumulateGrad]
|
||
|
140509588021552 -> 140509587991520
|
||
|
140509588021552 [label=ViewBackward0]
|
||
|
140509588022032 -> 140509588021552
|
||
|
140509588022032 [label=ToCopyBackward0]
|
||
|
140509587963664 -> 140509588022032
|
||
|
140509587963664 [label=CatBackward0]
|
||
|
140509588022128 -> 140509587963664
|
||
|
140509588022128 [label=SumBackward1]
|
||
|
140509588022512 -> 140509588022128
|
||
|
140509588022512 [label=MulBackward0]
|
||
|
140509588022704 -> 140509588022512
|
||
|
140509588022704 [label=CatBackward0]
|
||
|
140509588022656 -> 140509588022704
|
||
|
140509588022656 [label=UnsqueezeBackward0]
|
||
|
140509588023184 -> 140509588022656
|
||
|
140509588023184 [label=NativeLayerNormBackward0]
|
||
|
140509588023280 -> 140509588023184
|
||
|
140509588023280 [label=AddBackward0]
|
||
|
140509588023664 -> 140509588023280
|
||
|
140509588023664 [label=NativeDropoutBackward0]
|
||
|
140509588023616 -> 140509588023664
|
||
|
140509588023616 [label=ViewBackward0]
|
||
|
140509588023856 -> 140509588023616
|
||
|
140509588023856 [label=AddmmBackward0]
|
||
|
140509588024048 -> 140509588023856
|
||
|
140509588024048 [label=ToCopyBackward0]
|
||
|
140509588024336 -> 140509588024048
|
||
|
140509591320272 [label="encoder.layer.8.experts.experts.0.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591320272 -> 140509588024336
|
||
|
140509588024336 [label=AccumulateGrad]
|
||
|
140509588024144 -> 140509588023856
|
||
|
140509588024144 [label=ViewBackward0]
|
||
|
140509588024624 -> 140509588024144
|
||
|
140509588024624 [label=GeluBackward0]
|
||
|
140509588024720 -> 140509588024624
|
||
|
140509588024720 [label=ViewBackward0]
|
||
|
140509588024912 -> 140509588024720
|
||
|
140509588024912 [label=AddmmBackward0]
|
||
|
140509588025104 -> 140509588024912
|
||
|
140509588025104 [label=ToCopyBackward0]
|
||
|
140509588025056 -> 140509588025104
|
||
|
140509591320192 [label="encoder.layer.8.experts.experts.0.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591320192 -> 140509588025056
|
||
|
140509588025056 [label=AccumulateGrad]
|
||
|
140509588024816 -> 140509588024912
|
||
|
140509588024816 [label=ViewBackward0]
|
||
|
140509588025200 -> 140509588024816
|
||
|
140509588025200 [label=ToCopyBackward0]
|
||
|
140509588023376 -> 140509588025200
|
||
|
140509588023376 [label=SliceBackward0]
|
||
|
140509588046224 -> 140509588023376
|
||
|
140509588046224 [label=SliceBackward0]
|
||
|
140509588046416 -> 140509588046224
|
||
|
140509588046416 [label=NativeLayerNormBackward0]
|
||
|
140509588046608 -> 140509588046416
|
||
|
140509588046608 [label=AddBackward0]
|
||
|
140509588046800 -> 140509588046608
|
||
|
140509588046800 [label=NativeDropoutBackward0]
|
||
|
140509588047184 -> 140509588046800
|
||
|
140509588047184 [label=ViewBackward0]
|
||
|
140509588047376 -> 140509588047184
|
||
|
140509588047376 [label=AddmmBackward0]
|
||
|
140509588047568 -> 140509588047376
|
||
|
140509588047568 [label=ToCopyBackward0]
|
||
|
140509588047856 -> 140509588047568
|
||
|
140509591341312 [label="encoder.layer.8.crossattention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591341312 -> 140509588047856
|
||
|
140509588047856 [label=AccumulateGrad]
|
||
|
140509588047280 -> 140509588047376
|
||
|
140509588047280 [label=ViewBackward0]
|
||
|
140509588047760 -> 140509588047280
|
||
|
140509588047760 [label=ViewBackward0]
|
||
|
140509588047952 -> 140509588047760
|
||
|
140509588047952 [label=CloneBackward0]
|
||
|
140509588048000 -> 140509588047952
|
||
|
140509588048000 [label=PermuteBackward0]
|
||
|
140509588048240 -> 140509588048000
|
||
|
140509588048240 [label=UnsafeViewBackward0]
|
||
|
140509588048432 -> 140509588048240
|
||
|
140509588048432 [label=BmmBackward0]
|
||
|
140509588048480 -> 140509588048432
|
||
|
140509588048480 [label=ReshapeAliasBackward0]
|
||
|
140509588049008 -> 140509588048480
|
||
|
140509588049008 [label=ExpandBackward0]
|
||
|
140509588049104 -> 140509588049008
|
||
|
140509588049104 [label=ToCopyBackward0]
|
||
|
140509588049296 -> 140509588049104
|
||
|
140509588049296 [label=NativeDropoutBackward0]
|
||
|
140509588049488 -> 140509588049296
|
||
|
140509588049488 [label=SoftmaxBackward0]
|
||
|
140509588049584 -> 140509588049488
|
||
|
140509588049584 [label=AddBackward0]
|
||
|
140509588049776 -> 140509588049584
|
||
|
140509588049776 [label=DivBackward0]
|
||
|
140509588049680 -> 140509588049776
|
||
|
140509588049680 [label=UnsafeViewBackward0]
|
||
|
140509588074656 -> 140509588049680
|
||
|
140509588074656 [label=BmmBackward0]
|
||
|
140509588074896 -> 140509588074656
|
||
|
140509588074896 [label=UnsafeViewBackward0]
|
||
|
140509588074992 -> 140509588074896
|
||
|
140509588074992 [label=CloneBackward0]
|
||
|
140509588075040 -> 140509588074992
|
||
|
140509588075040 [label=ExpandBackward0]
|
||
|
140509588075280 -> 140509588075040
|
||
|
140509588075280 [label=PermuteBackward0]
|
||
|
140509588075472 -> 140509588075280
|
||
|
140509588075472 [label=ViewBackward0]
|
||
|
140509588075520 -> 140509588075472
|
||
|
140509588075520 [label=ViewBackward0]
|
||
|
140509588075760 -> 140509588075520
|
||
|
140509588075760 [label=AddmmBackward0]
|
||
|
140509588075952 -> 140509588075760
|
||
|
140509588075952 [label=ToCopyBackward0]
|
||
|
140509588076240 -> 140509588075952
|
||
|
140509591342432 [label="encoder.layer.8.crossattention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591342432 -> 140509588076240
|
||
|
140509588076240 [label=AccumulateGrad]
|
||
|
140509588076048 -> 140509588075760
|
||
|
140509588076048 [label=ViewBackward0]
|
||
|
140509588076528 -> 140509588076048
|
||
|
140509588076528 [label=ToCopyBackward0]
|
||
|
140509588046896 -> 140509588076528
|
||
|
140509588046896 [label=SliceBackward0]
|
||
|
140509588076480 -> 140509588046896
|
||
|
140509588076480 [label=SliceBackward0]
|
||
|
140509588076720 -> 140509588076480
|
||
|
140509588076720 [label=SliceBackward0]
|
||
|
140509588076912 -> 140509588076720
|
||
|
140509588076912 [label=NativeLayerNormBackward0]
|
||
|
140509588076960 -> 140509588076912
|
||
|
140509588076960 [label=AddBackward0]
|
||
|
140509588077392 -> 140509588076960
|
||
|
140509588077392 [label=NativeDropoutBackward0]
|
||
|
140509588077776 -> 140509588077392
|
||
|
140509588077776 [label=ViewBackward0]
|
||
|
140509588077968 -> 140509588077776
|
||
|
140509588077968 [label=AddmmBackward0]
|
||
|
140509588078064 -> 140509588077968
|
||
|
140509588078064 [label=ToCopyBackward0]
|
||
|
140509588078448 -> 140509588078064
|
||
|
140509590823056 [label="encoder.layer.8.attention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590823056 -> 140509588078448
|
||
|
140509588078448 [label=AccumulateGrad]
|
||
|
140509588077872 -> 140509588077968
|
||
|
140509588077872 [label=ViewBackward0]
|
||
|
140509588078352 -> 140509588077872
|
||
|
140509588078352 [label=ViewBackward0]
|
||
|
140509588078400 -> 140509588078352
|
||
|
140509588078400 [label=CloneBackward0]
|
||
|
140509588078160 -> 140509588078400
|
||
|
140509588078160 [label=PermuteBackward0]
|
||
|
140509588103472 -> 140509588078160
|
||
|
140509588103472 [label=UnsafeViewBackward0]
|
||
|
140509588103520 -> 140509588103472
|
||
|
140509588103520 [label=BmmBackward0]
|
||
|
140509588103760 -> 140509588103520
|
||
|
140509588103760 [label=ReshapeAliasBackward0]
|
||
|
140509588104144 -> 140509588103760
|
||
|
140509588104144 [label=ExpandBackward0]
|
||
|
140509588104336 -> 140509588104144
|
||
|
140509588104336 [label=ToCopyBackward0]
|
||
|
140509588104528 -> 140509588104336
|
||
|
140509588104528 [label=NativeDropoutBackward0]
|
||
|
140509588104624 -> 140509588104528
|
||
|
140509588104624 [label=SoftmaxBackward0]
|
||
|
140509588104816 -> 140509588104624
|
||
|
140509588104816 [label=AddBackward0]
|
||
|
140509588105008 -> 140509588104816
|
||
|
140509588105008 [label=DivBackward0]
|
||
|
140509588105104 -> 140509588105008
|
||
|
140509588105104 [label=UnsafeViewBackward0]
|
||
|
140509588105296 -> 140509588105104
|
||
|
140509588105296 [label=BmmBackward0]
|
||
|
140509588105488 -> 140509588105296
|
||
|
140509588105488 [label=UnsafeViewBackward0]
|
||
|
140509588105440 -> 140509588105488
|
||
|
140509588105440 [label=CloneBackward0]
|
||
|
140509588105680 -> 140509588105440
|
||
|
140509588105680 [label=ExpandBackward0]
|
||
|
140509588105872 -> 140509588105680
|
||
|
140509588105872 [label=PermuteBackward0]
|
||
|
140509588105920 -> 140509588105872
|
||
|
140509588105920 [label=ViewBackward0]
|
||
|
140509588106160 -> 140509588105920
|
||
|
140509588106160 [label=ViewBackward0]
|
||
|
140509588106352 -> 140509588106160
|
||
|
140509588106352 [label=AddmmBackward0]
|
||
|
140509588106400 -> 140509588106352
|
||
|
140509588106400 [label=ToCopyBackward0]
|
||
|
140509588106832 -> 140509588106400
|
||
|
140509590823536 [label="encoder.layer.8.attention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590823536 -> 140509588106832
|
||
|
140509588106832 [label=AccumulateGrad]
|
||
|
140509588106544 -> 140509588106352
|
||
|
140509588106544 [label=ViewBackward0]
|
||
|
140509588107024 -> 140509588106544
|
||
|
140509588107024 [label=ToCopyBackward0]
|
||
|
140509588077488 -> 140509588107024
|
||
|
140509588077488 [label=CatBackward0]
|
||
|
140509588107120 -> 140509588077488
|
||
|
140509588107120 [label=SumBackward1]
|
||
|
140509588136240 -> 140509588107120
|
||
|
140509588136240 [label=MulBackward0]
|
||
|
140509588136432 -> 140509588136240
|
||
|
140509588136432 [label=CatBackward0]
|
||
|
140509588136528 -> 140509588136432
|
||
|
140509588136528 [label=UnsqueezeBackward0]
|
||
|
140509588136912 -> 140509588136528
|
||
|
140509588136912 [label=NativeLayerNormBackward0]
|
||
|
140509588137104 -> 140509588136912
|
||
|
140509588137104 [label=AddBackward0]
|
||
|
140509588137392 -> 140509588137104
|
||
|
140509588137392 [label=NativeDropoutBackward0]
|
||
|
140509588137488 -> 140509588137392
|
||
|
140509588137488 [label=ViewBackward0]
|
||
|
140509588137536 -> 140509588137488
|
||
|
140509588137536 [label=AddmmBackward0]
|
||
|
140509588137776 -> 140509588137536
|
||
|
140509588137776 [label=ToCopyBackward0]
|
||
|
140509588138016 -> 140509588137776
|
||
|
140509591341952 [label="encoder.layer.7.experts.experts.0.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591341952 -> 140509588138016
|
||
|
140509588138016 [label=AccumulateGrad]
|
||
|
140509588137872 -> 140509588137536
|
||
|
140509588137872 [label=ViewBackward0]
|
||
|
140509588138352 -> 140509588137872
|
||
|
140509588138352 [label=GeluBackward0]
|
||
|
140509588138544 -> 140509588138352
|
||
|
140509588138544 [label=ViewBackward0]
|
||
|
140509588138640 -> 140509588138544
|
||
|
140509588138640 [label=AddmmBackward0]
|
||
|
140509588138832 -> 140509588138640
|
||
|
140509588138832 [label=ToCopyBackward0]
|
||
|
140509588139120 -> 140509588138832
|
||
|
140509591342272 [label="encoder.layer.7.experts.experts.0.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591342272 -> 140509588139120
|
||
|
140509588139120 [label=AccumulateGrad]
|
||
|
140509588138496 -> 140509588138640
|
||
|
140509588138496 [label=ViewBackward0]
|
||
|
140509588138976 -> 140509588138496
|
||
|
140509588138976 [label=ToCopyBackward0]
|
||
|
140509588137056 -> 140509588138976
|
||
|
140509588137056 [label=SliceBackward0]
|
||
|
140509588139504 -> 140509588137056
|
||
|
140509588139504 [label=SliceBackward0]
|
||
|
140509588139600 -> 140509588139504
|
||
|
140509588139600 [label=SliceBackward0]
|
||
|
140509588139792 -> 140509588139600
|
||
|
140509588139792 [label=SliceBackward0]
|
||
|
140509588139984 -> 140509588139792
|
||
|
140509588139984 [label=SliceBackward0]
|
||
|
140509588139888 -> 140509588139984
|
||
|
140509588139888 [label=NativeLayerNormBackward0]
|
||
|
140509588164912 -> 140509588139888
|
||
|
140509588164912 [label=AddBackward0]
|
||
|
140509588165200 -> 140509588164912
|
||
|
140509588165200 [label=NativeDropoutBackward0]
|
||
|
140509588165296 -> 140509588165200
|
||
|
140509588165296 [label=ViewBackward0]
|
||
|
140509588165488 -> 140509588165296
|
||
|
140509588165488 [label=AddmmBackward0]
|
||
|
140509588165536 -> 140509588165488
|
||
|
140509588165536 [label=ToCopyBackward0]
|
||
|
140509588165968 -> 140509588165536
|
||
|
140509590839360 [label="encoder.layer.7.attention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590839360 -> 140509588165968
|
||
|
140509588165968 [label=AccumulateGrad]
|
||
|
140509588165680 -> 140509588165488
|
||
|
140509588165680 [label=ViewBackward0]
|
||
|
140509588166160 -> 140509588165680
|
||
|
140509588166160 [label=ViewBackward0]
|
||
|
140509588166352 -> 140509588166160
|
||
|
140509588166352 [label=CloneBackward0]
|
||
|
140509588166544 -> 140509588166352
|
||
|
140509588166544 [label=PermuteBackward0]
|
||
|
140509588166640 -> 140509588166544
|
||
|
140509588166640 [label=UnsafeViewBackward0]
|
||
|
140509588166832 -> 140509588166640
|
||
|
140509588166832 [label=BmmBackward0]
|
||
|
140509588167024 -> 140509588166832
|
||
|
140509588167024 [label=ReshapeAliasBackward0]
|
||
|
140509588166976 -> 140509588167024
|
||
|
140509588166976 [label=ExpandBackward0]
|
||
|
140509588167216 -> 140509588166976
|
||
|
140509588167216 [label=ToCopyBackward0]
|
||
|
140509588167408 -> 140509588167216
|
||
|
140509588167408 [label=NativeDropoutBackward0]
|
||
|
140509588167456 -> 140509588167408
|
||
|
140509588167456 [label=SoftmaxBackward0]
|
||
|
140509588167696 -> 140509588167456
|
||
|
140509588167696 [label=AddBackward0]
|
||
|
140509588167888 -> 140509588167696
|
||
|
140509588167888 [label=DivBackward0]
|
||
|
140509588167936 -> 140509588167888
|
||
|
140509588167936 [label=UnsafeViewBackward0]
|
||
|
140509588168176 -> 140509588167936
|
||
|
140509588168176 [label=BmmBackward0]
|
||
|
140509588168368 -> 140509588168176
|
||
|
140509588168368 [label=UnsafeViewBackward0]
|
||
|
140509588168416 -> 140509588168368
|
||
|
140509588168416 [label=CloneBackward0]
|
||
|
140509588193584 -> 140509588168416
|
||
|
140509588193584 [label=ExpandBackward0]
|
||
|
140509588193680 -> 140509588193584
|
||
|
140509588193680 [label=PermuteBackward0]
|
||
|
140509588193872 -> 140509588193680
|
||
|
140509588193872 [label=ViewBackward0]
|
||
|
140509588194064 -> 140509588193872
|
||
|
140509588194064 [label=ViewBackward0]
|
||
|
140509588194160 -> 140509588194064
|
||
|
140509588194160 [label=AddmmBackward0]
|
||
|
140509588194352 -> 140509588194160
|
||
|
140509588194352 [label=ToCopyBackward0]
|
||
|
140509588194640 -> 140509588194352
|
||
|
140509590840320 [label="encoder.layer.7.attention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590840320 -> 140509588194640
|
||
|
140509588194640 [label=AccumulateGrad]
|
||
|
140509588194016 -> 140509588194160
|
||
|
140509588194016 [label=ViewBackward0]
|
||
|
140509588194496 -> 140509588194016
|
||
|
140509588194496 [label=ToCopyBackward0]
|
||
|
140509588165008 -> 140509588194496
|
||
|
140509588165008 [label=CatBackward0]
|
||
|
140509588195024 -> 140509588165008
|
||
|
140509588195024 [label=SumBackward1]
|
||
|
140509588194976 -> 140509588195024
|
||
|
140509588194976 [label=MulBackward0]
|
||
|
140509588195216 -> 140509588194976
|
||
|
140509588195216 [label=CatBackward0]
|
||
|
140509588195600 -> 140509588195216
|
||
|
140509588195600 [label=UnsqueezeBackward0]
|
||
|
140509588195696 -> 140509588195600
|
||
|
140509588195696 [label=NativeLayerNormBackward0]
|
||
|
140509588195888 -> 140509588195696
|
||
|
140509588195888 [label=AddBackward0]
|
||
|
140509588196176 -> 140509588195888
|
||
|
140509588196176 [label=NativeDropoutBackward0]
|
||
|
140509588196560 -> 140509588196176
|
||
|
140509588196560 [label=ViewBackward0]
|
||
|
140509588196752 -> 140509588196560
|
||
|
140509588196752 [label=AddmmBackward0]
|
||
|
140509588196944 -> 140509588196752
|
||
|
140509588196944 [label=ToCopyBackward0]
|
||
|
140509588197232 -> 140509588196944
|
||
|
140509590825776 [label="encoder.layer.6.experts.experts.0.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590825776 -> 140509588197232
|
||
|
140509588197232 [label=AccumulateGrad]
|
||
|
140509588196656 -> 140509588196752
|
||
|
140509588196656 [label=ViewBackward0]
|
||
|
140509588197040 -> 140509588196656
|
||
|
140509588197040 [label=GeluBackward0]
|
||
|
140509588196896 -> 140509588197040
|
||
|
140509588196896 [label=ViewBackward0]
|
||
|
140509587696464 -> 140509588196896
|
||
|
140509587696464 [label=AddmmBackward0]
|
||
|
140509587696368 -> 140509587696464
|
||
|
140509587696368 [label=ToCopyBackward0]
|
||
|
140509587693680 -> 140509587696368
|
||
|
140509590826256 [label="encoder.layer.6.experts.experts.0.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590826256 -> 140509587693680
|
||
|
140509587693680 [label=AccumulateGrad]
|
||
|
140509587696752 -> 140509587696464
|
||
|
140509587696752 [label=ViewBackward0]
|
||
|
140509587693728 -> 140509587696752
|
||
|
140509587693728 [label=ToCopyBackward0]
|
||
|
140509588196272 -> 140509587693728
|
||
|
140509588196272 [label=ViewBackward0]
|
||
|
140509587693872 -> 140509588196272
|
||
|
140509587693872 [label=CloneBackward0]
|
||
|
140509587694064 -> 140509587693872
|
||
|
140509587694064 [label=ExpandBackward0]
|
||
|
140509587694112 -> 140509587694064
|
||
|
140509587694112 [label=UnsqueezeBackward0]
|
||
|
140509587694352 -> 140509587694112
|
||
|
140509587694352 [label=SliceBackward0]
|
||
|
140509587694544 -> 140509587694352
|
||
|
140509587694544 [label=SliceBackward0]
|
||
|
140509587694592 -> 140509587694544
|
||
|
140509587694592 [label=NativeLayerNormBackward0]
|
||
|
140509587694832 -> 140509587694592
|
||
|
140509587694832 [label=AddBackward0]
|
||
|
140509587695072 -> 140509587694832
|
||
|
140509587695072 [label=NativeDropoutBackward0]
|
||
|
140509587695408 -> 140509587695072
|
||
|
140509587695408 [label=ViewBackward0]
|
||
|
140509587695600 -> 140509587695408
|
||
|
140509587695600 [label=AddmmBackward0]
|
||
|
140509587697232 -> 140509587695600
|
||
|
140509587697232 [label=ToCopyBackward0]
|
||
|
140509587696992 -> 140509587697232
|
||
|
140509590842480 [label="encoder.layer.6.crossattention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590842480 -> 140509587696992
|
||
|
140509587696992 [label=AccumulateGrad]
|
||
|
140509587697472 -> 140509587695600
|
||
|
140509587697472 [label=ViewBackward0]
|
||
|
140509587697616 -> 140509587697472
|
||
|
140509587697616 [label=ViewBackward0]
|
||
|
140509587696272 -> 140509587697616
|
||
|
140509587696272 [label=CloneBackward0]
|
||
|
140509587696944 -> 140509587696272
|
||
|
140509587696944 [label=PermuteBackward0]
|
||
|
140509587696512 -> 140509587696944
|
||
|
140509587696512 [label=UnsafeViewBackward0]
|
||
|
140509587695984 -> 140509587696512
|
||
|
140509587695984 [label=BmmBackward0]
|
||
|
140509587696032 -> 140509587695984
|
||
|
140509587696032 [label=ReshapeAliasBackward0]
|
||
|
140509587852640 -> 140509587696032
|
||
|
140509587852640 [label=ExpandBackward0]
|
||
|
140509587852544 -> 140509587852640
|
||
|
140509587852544 [label=ToCopyBackward0]
|
||
|
140509587852448 -> 140509587852544
|
||
|
140509587852448 [label=NativeDropoutBackward0]
|
||
|
140509587852352 -> 140509587852448
|
||
|
140509587852352 [label=SoftmaxBackward0]
|
||
|
140509587852256 -> 140509587852352
|
||
|
140509587852256 [label=AddBackward0]
|
||
|
140509587852160 -> 140509587852256
|
||
|
140509587852160 [label=DivBackward0]
|
||
|
140509587852064 -> 140509587852160
|
||
|
140509587852064 [label=UnsafeViewBackward0]
|
||
|
140509587851968 -> 140509587852064
|
||
|
140509587851968 [label=BmmBackward0]
|
||
|
140509587851872 -> 140509587851968
|
||
|
140509587851872 [label=ReshapeAliasBackward0]
|
||
|
140509587851824 -> 140509587851872
|
||
|
140509587851824 [label=ExpandBackward0]
|
||
|
140509587851728 -> 140509587851824
|
||
|
140509587851728 [label=PermuteBackward0]
|
||
|
140509587851632 -> 140509587851728
|
||
|
140509587851632 [label=ViewBackward0]
|
||
|
140509587851536 -> 140509587851632
|
||
|
140509587851536 [label=ViewBackward0]
|
||
|
140509587851440 -> 140509587851536
|
||
|
140509587851440 [label=AddmmBackward0]
|
||
|
140509587851344 -> 140509587851440
|
||
|
140509587851344 [label=ToCopyBackward0]
|
||
|
140509587851152 -> 140509587851344
|
||
|
140509590843200 [label="encoder.layer.6.crossattention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590843200 -> 140509587851152
|
||
|
140509587851152 [label=AccumulateGrad]
|
||
|
140509587851296 -> 140509587851440
|
||
|
140509587851296 [label=ViewBackward0]
|
||
|
140509587851008 -> 140509587851296
|
||
|
140509587851008 [label=ToCopyBackward0]
|
||
|
140509587695120 -> 140509587851008
|
||
|
140509587695120 [label=SliceBackward0]
|
||
|
140509587850960 -> 140509587695120
|
||
|
140509587850960 [label=SliceBackward0]
|
||
|
140509587850864 -> 140509587850960
|
||
|
140509587850864 [label=SliceBackward0]
|
||
|
140509587850768 -> 140509587850864
|
||
|
140509587850768 [label=NativeLayerNormBackward0]
|
||
|
140509587850672 -> 140509587850768
|
||
|
140509587850672 [label=AddBackward0]
|
||
|
140509587850480 -> 140509587850672
|
||
|
140509587850480 [label=NativeDropoutBackward0]
|
||
|
140509587850240 -> 140509587850480
|
||
|
140509587850240 [label=ViewBackward0]
|
||
|
140509587850144 -> 140509587850240
|
||
|
140509587850144 [label=AddmmBackward0]
|
||
|
140509587850048 -> 140509587850144
|
||
|
140509587850048 [label=ToCopyBackward0]
|
||
|
140509587849856 -> 140509587850048
|
||
|
140509590856064 [label="encoder.layer.6.attention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590856064 -> 140509587849856
|
||
|
140509587849856 [label=AccumulateGrad]
|
||
|
140509587850192 -> 140509587850144
|
||
|
140509587850192 [label=ViewBackward0]
|
||
|
140509587849904 -> 140509587850192
|
||
|
140509587849904 [label=ViewBackward0]
|
||
|
140509587849808 -> 140509587849904
|
||
|
140509587849808 [label=CloneBackward0]
|
||
|
140509587849712 -> 140509587849808
|
||
|
140509587849712 [label=PermuteBackward0]
|
||
|
140509587849616 -> 140509587849712
|
||
|
140509587849616 [label=UnsafeViewBackward0]
|
||
|
140509587849520 -> 140509587849616
|
||
|
140509587849520 [label=BmmBackward0]
|
||
|
140509587849424 -> 140509587849520
|
||
|
140509587849424 [label=ReshapeAliasBackward0]
|
||
|
140509587852976 -> 140509587849424
|
||
|
140509587852976 [label=ExpandBackward0]
|
||
|
140509587853072 -> 140509587852976
|
||
|
140509587853072 [label=ToCopyBackward0]
|
||
|
140509587853168 -> 140509587853072
|
||
|
140509587853168 [label=NativeDropoutBackward0]
|
||
|
140509587853264 -> 140509587853168
|
||
|
140509587853264 [label=SoftmaxBackward0]
|
||
|
140509587849280 -> 140509587853264
|
||
|
140509587849280 [label=AddBackward0]
|
||
|
140509587558608 -> 140509587849280
|
||
|
140509587558608 [label=DivBackward0]
|
||
|
140509587558704 -> 140509587558608
|
||
|
140509587558704 [label=UnsafeViewBackward0]
|
||
|
140509587558800 -> 140509587558704
|
||
|
140509587558800 [label=BmmBackward0]
|
||
|
140509587558896 -> 140509587558800
|
||
|
140509587558896 [label=ReshapeAliasBackward0]
|
||
|
140509587559040 -> 140509587558896
|
||
|
140509587559040 [label=ExpandBackward0]
|
||
|
140509587559136 -> 140509587559040
|
||
|
140509587559136 [label=PermuteBackward0]
|
||
|
140509587559232 -> 140509587559136
|
||
|
140509587559232 [label=ViewBackward0]
|
||
|
140509587559328 -> 140509587559232
|
||
|
140509587559328 [label=ViewBackward0]
|
||
|
140509587559424 -> 140509587559328
|
||
|
140509587559424 [label=AddmmBackward0]
|
||
|
140509587559520 -> 140509587559424
|
||
|
140509587559520 [label=ToCopyBackward0]
|
||
|
140509587559712 -> 140509587559520
|
||
|
140509590856784 [label="encoder.layer.6.attention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590856784 -> 140509587559712
|
||
|
140509587559712 [label=AccumulateGrad]
|
||
|
140509587559472 -> 140509587559424
|
||
|
140509587559472 [label=ViewBackward0]
|
||
|
140509587559760 -> 140509587559472
|
||
|
140509587559760 [label=ToCopyBackward0]
|
||
|
140509587850432 -> 140509587559760
|
||
|
140509587850432 [label=CatBackward0]
|
||
|
140509587559904 -> 140509587850432
|
||
|
140509587559904 [label=NativeLayerNormBackward0]
|
||
|
140509587560048 -> 140509587559904
|
||
|
140509587560048 [label=AddBackward0]
|
||
|
140509587560240 -> 140509587560048
|
||
|
140509587560240 [label=NativeDropoutBackward0]
|
||
|
140509587560384 -> 140509587560240
|
||
|
140509587560384 [label=ViewBackward0]
|
||
|
140509587560480 -> 140509587560384
|
||
|
140509587560480 [label=AddmmBackward0]
|
||
|
140509587560576 -> 140509587560480
|
||
|
140509587560576 [label=ToCopyBackward0]
|
||
|
140509587560768 -> 140509587560576
|
||
|
140509590857264 [label="encoder.layer.5.experts.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590857264 -> 140509587560768
|
||
|
140509587560768 [label=AccumulateGrad]
|
||
|
140509587560528 -> 140509587560480
|
||
|
140509587560528 [label=ViewBackward0]
|
||
|
140509587560816 -> 140509587560528
|
||
|
140509587560816 [label=GeluBackward0]
|
||
|
140509587560912 -> 140509587560816
|
||
|
140509587560912 [label=ViewBackward0]
|
||
|
140509587561008 -> 140509587560912
|
||
|
140509587561008 [label=AddmmBackward0]
|
||
|
140509587561104 -> 140509587561008
|
||
|
140509587561104 [label=ToCopyBackward0]
|
||
|
140509587561296 -> 140509587561104
|
||
|
140509590857504 [label="encoder.layer.5.experts.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590857504 -> 140509587561296
|
||
|
140509587561296 [label=AccumulateGrad]
|
||
|
140509587561056 -> 140509587561008
|
||
|
140509587561056 [label=ViewBackward0]
|
||
|
140509587561344 -> 140509587561056
|
||
|
140509587561344 [label=ToCopyBackward0]
|
||
|
140509587560192 -> 140509587561344
|
||
|
140509587560192 [label=SliceBackward0]
|
||
|
140509587561488 -> 140509587560192
|
||
|
140509587561488 [label=SliceBackward0]
|
||
|
140509587561584 -> 140509587561488
|
||
|
140509587561584 [label=SliceBackward0]
|
||
|
140509587561680 -> 140509587561584
|
||
|
140509587561680 [label=SliceBackward0]
|
||
|
140509587561776 -> 140509587561680
|
||
|
140509587561776 [label=SliceBackward0]
|
||
|
140509587561872 -> 140509587561776
|
||
|
140509587561872 [label=NativeLayerNormBackward0]
|
||
|
140509587561968 -> 140509587561872
|
||
|
140509587561968 [label=AddBackward0]
|
||
|
140509587562160 -> 140509587561968
|
||
|
140509587562160 [label=NativeDropoutBackward0]
|
||
|
140509587562304 -> 140509587562160
|
||
|
140509587562304 [label=ViewBackward0]
|
||
|
140509587562400 -> 140509587562304
|
||
|
140509587562400 [label=AddmmBackward0]
|
||
|
140509587562448 -> 140509587562400
|
||
|
140509587562448 [label=ToCopyBackward0]
|
||
|
140509587570944 -> 140509587562448
|
||
|
140509590859424 [label="encoder.layer.5.attention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590859424 -> 140509587570944
|
||
|
140509587570944 [label=AccumulateGrad]
|
||
|
140509587562208 -> 140509587562400
|
||
|
140509587562208 [label=ViewBackward0]
|
||
|
140509587570992 -> 140509587562208
|
||
|
140509587570992 [label=ViewBackward0]
|
||
|
140509587571136 -> 140509587570992
|
||
|
140509587571136 [label=CloneBackward0]
|
||
|
140509587571232 -> 140509587571136
|
||
|
140509587571232 [label=PermuteBackward0]
|
||
|
140509587571328 -> 140509587571232
|
||
|
140509587571328 [label=UnsafeViewBackward0]
|
||
|
140509587571424 -> 140509587571328
|
||
|
140509587571424 [label=BmmBackward0]
|
||
|
140509587571520 -> 140509587571424
|
||
|
140509587571520 [label=ReshapeAliasBackward0]
|
||
|
140509587571664 -> 140509587571520
|
||
|
140509587571664 [label=ExpandBackward0]
|
||
|
140509587571760 -> 140509587571664
|
||
|
140509587571760 [label=ToCopyBackward0]
|
||
|
140509587571856 -> 140509587571760
|
||
|
140509587571856 [label=NativeDropoutBackward0]
|
||
|
140509587571952 -> 140509587571856
|
||
|
140509587571952 [label=SoftmaxBackward0]
|
||
|
140509587572048 -> 140509587571952
|
||
|
140509587572048 [label=AddBackward0]
|
||
|
140509587572144 -> 140509587572048
|
||
|
140509587572144 [label=DivBackward0]
|
||
|
140509587572240 -> 140509587572144
|
||
|
140509587572240 [label=UnsafeViewBackward0]
|
||
|
140509587572336 -> 140509587572240
|
||
|
140509587572336 [label=BmmBackward0]
|
||
|
140509587572432 -> 140509587572336
|
||
|
140509587572432 [label=ReshapeAliasBackward0]
|
||
|
140509587572576 -> 140509587572432
|
||
|
140509587572576 [label=ExpandBackward0]
|
||
|
140509587572672 -> 140509587572576
|
||
|
140509587572672 [label=PermuteBackward0]
|
||
|
140509587572768 -> 140509587572672
|
||
|
140509587572768 [label=ViewBackward0]
|
||
|
140509587572864 -> 140509587572768
|
||
|
140509587572864 [label=ViewBackward0]
|
||
|
140509587572960 -> 140509587572864
|
||
|
140509587572960 [label=AddmmBackward0]
|
||
|
140509587573056 -> 140509587572960
|
||
|
140509587573056 [label=ToCopyBackward0]
|
||
|
140509587573248 -> 140509587573056
|
||
|
140509590872528 [label="encoder.layer.5.attention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590872528 -> 140509587573248
|
||
|
140509587573248 [label=AccumulateGrad]
|
||
|
140509587573008 -> 140509587572960
|
||
|
140509587573008 [label=ViewBackward0]
|
||
|
140509587573296 -> 140509587573008
|
||
|
140509587573296 [label=ToCopyBackward0]
|
||
|
140509587562112 -> 140509587573296
|
||
|
140509587562112 [label=CatBackward0]
|
||
|
140509587573440 -> 140509587562112
|
||
|
140509587573440 [label=NativeLayerNormBackward0]
|
||
|
140509587573584 -> 140509587573440
|
||
|
140509587573584 [label=AddBackward0]
|
||
|
140509587573776 -> 140509587573584
|
||
|
140509587573776 [label=NativeDropoutBackward0]
|
||
|
140509587573920 -> 140509587573776
|
||
|
140509587573920 [label=ViewBackward0]
|
||
|
140509587574016 -> 140509587573920
|
||
|
140509587574016 [label=AddmmBackward0]
|
||
|
140509587574112 -> 140509587574016
|
||
|
140509587574112 [label=ToCopyBackward0]
|
||
|
140509587574304 -> 140509587574112
|
||
|
140509590873008 [label="encoder.layer.4.experts.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590873008 -> 140509587574304
|
||
|
140509587574304 [label=AccumulateGrad]
|
||
|
140509587574064 -> 140509587574016
|
||
|
140509587574064 [label=ViewBackward0]
|
||
|
140509587574352 -> 140509587574064
|
||
|
140509587574352 [label=GeluBackward0]
|
||
|
140509587574448 -> 140509587574352
|
||
|
140509587574448 [label=ViewBackward0]
|
||
|
140509587574544 -> 140509587574448
|
||
|
140509587574544 [label=AddmmBackward0]
|
||
|
140509587574640 -> 140509587574544
|
||
|
140509587574640 [label=ToCopyBackward0]
|
||
|
140509587574736 -> 140509587574640
|
||
|
140509590873248 [label="encoder.layer.4.experts.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590873248 -> 140509587574736
|
||
|
140509587574736 [label=AccumulateGrad]
|
||
|
140509587574592 -> 140509587574544
|
||
|
140509587574592 [label=ViewBackward0]
|
||
|
140509587591232 -> 140509587574592
|
||
|
140509587591232 [label=ToCopyBackward0]
|
||
|
140509587573728 -> 140509587591232
|
||
|
140509587573728 [label=SliceBackward0]
|
||
|
140509587591472 -> 140509587573728
|
||
|
140509587591472 [label=SliceBackward0]
|
||
|
140509587591568 -> 140509587591472
|
||
|
140509587591568 [label=NativeLayerNormBackward0]
|
||
|
140509587591664 -> 140509587591568
|
||
|
140509587591664 [label=AddBackward0]
|
||
|
140509587591856 -> 140509587591664
|
||
|
140509587591856 [label=NativeDropoutBackward0]
|
||
|
140509587592000 -> 140509587591856
|
||
|
140509587592000 [label=ViewBackward0]
|
||
|
140509587592096 -> 140509587592000
|
||
|
140509587592096 [label=AddmmBackward0]
|
||
|
140509587592192 -> 140509587592096
|
||
|
140509587592192 [label=ToCopyBackward0]
|
||
|
140509587592384 -> 140509587592192
|
||
|
140509590875168 [label="encoder.layer.4.crossattention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590875168 -> 140509587592384
|
||
|
140509587592384 [label=AccumulateGrad]
|
||
|
140509587592144 -> 140509587592096
|
||
|
140509587592144 [label=ViewBackward0]
|
||
|
140509587592432 -> 140509587592144
|
||
|
140509587592432 [label=ViewBackward0]
|
||
|
140509587592528 -> 140509587592432
|
||
|
140509587592528 [label=CloneBackward0]
|
||
|
140509587592624 -> 140509587592528
|
||
|
140509587592624 [label=PermuteBackward0]
|
||
|
140509587592720 -> 140509587592624
|
||
|
140509587592720 [label=UnsafeViewBackward0]
|
||
|
140509587592816 -> 140509587592720
|
||
|
140509587592816 [label=BmmBackward0]
|
||
|
140509587592912 -> 140509587592816
|
||
|
140509587592912 [label=ReshapeAliasBackward0]
|
||
|
140509587593056 -> 140509587592912
|
||
|
140509587593056 [label=ExpandBackward0]
|
||
|
140509587593152 -> 140509587593056
|
||
|
140509587593152 [label=ToCopyBackward0]
|
||
|
140509587593248 -> 140509587593152
|
||
|
140509587593248 [label=NativeDropoutBackward0]
|
||
|
140509587593344 -> 140509587593248
|
||
|
140509587593344 [label=SoftmaxBackward0]
|
||
|
140509587593440 -> 140509587593344
|
||
|
140509587593440 [label=AddBackward0]
|
||
|
140509587593536 -> 140509587593440
|
||
|
140509587593536 [label=DivBackward0]
|
||
|
140509587593632 -> 140509587593536
|
||
|
140509587593632 [label=UnsafeViewBackward0]
|
||
|
140509587593728 -> 140509587593632
|
||
|
140509587593728 [label=BmmBackward0]
|
||
|
140509587593824 -> 140509587593728
|
||
|
140509587593824 [label=ReshapeAliasBackward0]
|
||
|
140509587593968 -> 140509587593824
|
||
|
140509587593968 [label=ExpandBackward0]
|
||
|
140509587594064 -> 140509587593968
|
||
|
140509587594064 [label=PermuteBackward0]
|
||
|
140509587594160 -> 140509587594064
|
||
|
140509587594160 [label=ViewBackward0]
|
||
|
140509587594256 -> 140509587594160
|
||
|
140509587594256 [label=ViewBackward0]
|
||
|
140509587594352 -> 140509587594256
|
||
|
140509587594352 [label=AddmmBackward0]
|
||
|
140509587594448 -> 140509587594352
|
||
|
140509587594448 [label=ToCopyBackward0]
|
||
|
140509587594640 -> 140509587594448
|
||
|
140509590875888 [label="encoder.layer.4.crossattention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590875888 -> 140509587594640
|
||
|
140509587594640 [label=AccumulateGrad]
|
||
|
140509587594400 -> 140509587594352
|
||
|
140509587594400 [label=ViewBackward0]
|
||
|
140509587594688 -> 140509587594400
|
||
|
140509587594688 [label=ToCopyBackward0]
|
||
|
140509587591808 -> 140509587594688
|
||
|
140509587591808 [label=SliceBackward0]
|
||
|
140509587594832 -> 140509587591808
|
||
|
140509587594832 [label=SliceBackward0]
|
||
|
140509587594928 -> 140509587594832
|
||
|
140509587594928 [label=SliceBackward0]
|
||
|
140509587595024 -> 140509587594928
|
||
|
140509587595024 [label=NativeLayerNormBackward0]
|
||
|
140509587595120 -> 140509587595024
|
||
|
140509587595120 [label=AddBackward0]
|
||
|
140509587595216 -> 140509587595120
|
||
|
140509587595216 [label=NativeDropoutBackward0]
|
||
|
140509587607808 -> 140509587595216
|
||
|
140509587607808 [label=ViewBackward0]
|
||
|
140509587607904 -> 140509587607808
|
||
|
140509587607904 [label=AddmmBackward0]
|
||
|
140509587608000 -> 140509587607904
|
||
|
140509587608000 [label=ToCopyBackward0]
|
||
|
140509587608192 -> 140509587608000
|
||
|
140509590892848 [label="encoder.layer.4.attention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590892848 -> 140509587608192
|
||
|
140509587608192 [label=AccumulateGrad]
|
||
|
140509587607952 -> 140509587607904
|
||
|
140509587607952 [label=ViewBackward0]
|
||
|
140509587608240 -> 140509587607952
|
||
|
140509587608240 [label=ViewBackward0]
|
||
|
140509587608336 -> 140509587608240
|
||
|
140509587608336 [label=CloneBackward0]
|
||
|
140509587608432 -> 140509587608336
|
||
|
140509587608432 [label=PermuteBackward0]
|
||
|
140509587608528 -> 140509587608432
|
||
|
140509587608528 [label=UnsafeViewBackward0]
|
||
|
140509587608624 -> 140509587608528
|
||
|
140509587608624 [label=BmmBackward0]
|
||
|
140509587608720 -> 140509587608624
|
||
|
140509587608720 [label=ReshapeAliasBackward0]
|
||
|
140509587608864 -> 140509587608720
|
||
|
140509587608864 [label=ExpandBackward0]
|
||
|
140509587608960 -> 140509587608864
|
||
|
140509587608960 [label=ToCopyBackward0]
|
||
|
140509587609056 -> 140509587608960
|
||
|
140509587609056 [label=NativeDropoutBackward0]
|
||
|
140509587609152 -> 140509587609056
|
||
|
140509587609152 [label=SoftmaxBackward0]
|
||
|
140509587609248 -> 140509587609152
|
||
|
140509587609248 [label=AddBackward0]
|
||
|
140509587609344 -> 140509587609248
|
||
|
140509587609344 [label=DivBackward0]
|
||
|
140509587609440 -> 140509587609344
|
||
|
140509587609440 [label=UnsafeViewBackward0]
|
||
|
140509587609536 -> 140509587609440
|
||
|
140509587609536 [label=BmmBackward0]
|
||
|
140509587609632 -> 140509587609536
|
||
|
140509587609632 [label=ReshapeAliasBackward0]
|
||
|
140509587609776 -> 140509587609632
|
||
|
140509587609776 [label=ExpandBackward0]
|
||
|
140509587609872 -> 140509587609776
|
||
|
140509587609872 [label=PermuteBackward0]
|
||
|
140509587609968 -> 140509587609872
|
||
|
140509587609968 [label=ViewBackward0]
|
||
|
140509587610064 -> 140509587609968
|
||
|
140509587610064 [label=ViewBackward0]
|
||
|
140509587610160 -> 140509587610064
|
||
|
140509587610160 [label=AddmmBackward0]
|
||
|
140509587610256 -> 140509587610160
|
||
|
140509587610256 [label=ToCopyBackward0]
|
||
|
140509587610448 -> 140509587610256
|
||
|
140509590893568 [label="encoder.layer.4.attention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590893568 -> 140509587610448
|
||
|
140509587610448 [label=AccumulateGrad]
|
||
|
140509587610208 -> 140509587610160
|
||
|
140509587610208 [label=ViewBackward0]
|
||
|
140509587610496 -> 140509587610208
|
||
|
140509587610496 [label=ToCopyBackward0]
|
||
|
140509587607664 -> 140509587610496
|
||
|
140509587607664 [label=CatBackward0]
|
||
|
140509587610640 -> 140509587607664
|
||
|
140509587610640 [label=NativeLayerNormBackward0]
|
||
|
140509587610784 -> 140509587610640
|
||
|
140509587610784 [label=AddBackward0]
|
||
|
140509587610976 -> 140509587610784
|
||
|
140509587610976 [label=NativeDropoutBackward0]
|
||
|
140509587611120 -> 140509587610976
|
||
|
140509587611120 [label=ViewBackward0]
|
||
|
140509587611216 -> 140509587611120
|
||
|
140509587611216 [label=AddmmBackward0]
|
||
|
140509587611312 -> 140509587611216
|
||
|
140509587611312 [label=ToCopyBackward0]
|
||
|
140509587611504 -> 140509587611312
|
||
|
140509590894048 [label="encoder.layer.3.experts.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590894048 -> 140509587611504
|
||
|
140509587611504 [label=AccumulateGrad]
|
||
|
140509587611264 -> 140509587611216
|
||
|
140509587611264 [label=ViewBackward0]
|
||
|
140509587611552 -> 140509587611264
|
||
|
140509587611552 [label=GeluBackward0]
|
||
|
140509587611408 -> 140509587611552
|
||
|
140509587611408 [label=ViewBackward0]
|
||
|
140509587624096 -> 140509587611408
|
||
|
140509587624096 [label=AddmmBackward0]
|
||
|
140509587624192 -> 140509587624096
|
||
|
140509587624192 [label=ToCopyBackward0]
|
||
|
140509587624384 -> 140509587624192
|
||
|
140509590894288 [label="encoder.layer.3.experts.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590894288 -> 140509587624384
|
||
|
140509587624384 [label=AccumulateGrad]
|
||
|
140509587624144 -> 140509587624096
|
||
|
140509587624144 [label=ViewBackward0]
|
||
|
140509587624432 -> 140509587624144
|
||
|
140509587624432 [label=ToCopyBackward0]
|
||
|
140509587610928 -> 140509587624432
|
||
|
140509587610928 [label=SliceBackward0]
|
||
|
140509587624576 -> 140509587610928
|
||
|
140509587624576 [label=SliceBackward0]
|
||
|
140509587624672 -> 140509587624576
|
||
|
140509587624672 [label=SliceBackward0]
|
||
|
140509587624768 -> 140509587624672
|
||
|
140509587624768 [label=SliceBackward0]
|
||
|
140509587624864 -> 140509587624768
|
||
|
140509587624864 [label=SliceBackward0]
|
||
|
140509587624960 -> 140509587624864
|
||
|
140509587624960 [label=NativeLayerNormBackward0]
|
||
|
140509587625056 -> 140509587624960
|
||
|
140509587625056 [label=AddBackward0]
|
||
|
140509587625248 -> 140509587625056
|
||
|
140509587625248 [label=NativeDropoutBackward0]
|
||
|
140509587625392 -> 140509587625248
|
||
|
140509587625392 [label=ViewBackward0]
|
||
|
140509587625488 -> 140509587625392
|
||
|
140509587625488 [label=AddmmBackward0]
|
||
|
140509587625584 -> 140509587625488
|
||
|
140509587625584 [label=ToCopyBackward0]
|
||
|
140509587625776 -> 140509587625584
|
||
|
140509590896208 [label="encoder.layer.3.attention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590896208 -> 140509587625776
|
||
|
140509587625776 [label=AccumulateGrad]
|
||
|
140509587625536 -> 140509587625488
|
||
|
140509587625536 [label=ViewBackward0]
|
||
|
140509587625824 -> 140509587625536
|
||
|
140509587625824 [label=ViewBackward0]
|
||
|
140509587625920 -> 140509587625824
|
||
|
140509587625920 [label=CloneBackward0]
|
||
|
140509587626016 -> 140509587625920
|
||
|
140509587626016 [label=PermuteBackward0]
|
||
|
140509587626112 -> 140509587626016
|
||
|
140509587626112 [label=UnsafeViewBackward0]
|
||
|
140509587626208 -> 140509587626112
|
||
|
140509587626208 [label=BmmBackward0]
|
||
|
140509587626304 -> 140509587626208
|
||
|
140509587626304 [label=ReshapeAliasBackward0]
|
||
|
140509587626448 -> 140509587626304
|
||
|
140509587626448 [label=ExpandBackward0]
|
||
|
140509587626544 -> 140509587626448
|
||
|
140509587626544 [label=ToCopyBackward0]
|
||
|
140509587626640 -> 140509587626544
|
||
|
140509587626640 [label=NativeDropoutBackward0]
|
||
|
140509587626736 -> 140509587626640
|
||
|
140509587626736 [label=SoftmaxBackward0]
|
||
|
140509587626832 -> 140509587626736
|
||
|
140509587626832 [label=AddBackward0]
|
||
|
140509587626928 -> 140509587626832
|
||
|
140509587626928 [label=DivBackward0]
|
||
|
140509587627024 -> 140509587626928
|
||
|
140509587627024 [label=UnsafeViewBackward0]
|
||
|
140509587627120 -> 140509587627024
|
||
|
140509587627120 [label=BmmBackward0]
|
||
|
140509587627216 -> 140509587627120
|
||
|
140509587627216 [label=ReshapeAliasBackward0]
|
||
|
140509587627360 -> 140509587627216
|
||
|
140509587627360 [label=ExpandBackward0]
|
||
|
140509587627456 -> 140509587627360
|
||
|
140509587627456 [label=PermuteBackward0]
|
||
|
140509587627552 -> 140509587627456
|
||
|
140509587627552 [label=ViewBackward0]
|
||
|
140509587627648 -> 140509587627552
|
||
|
140509587627648 [label=ViewBackward0]
|
||
|
140509587627744 -> 140509587627648
|
||
|
140509587627744 [label=AddmmBackward0]
|
||
|
140509587627840 -> 140509587627744
|
||
|
140509587627840 [label=ToCopyBackward0]
|
||
|
140509587627984 -> 140509587627840
|
||
|
140509590901120 [label="encoder.layer.3.attention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590901120 -> 140509587627984
|
||
|
140509587627984 [label=AccumulateGrad]
|
||
|
140509587627792 -> 140509587627744
|
||
|
140509587627792 [label=ViewBackward0]
|
||
|
140509587627936 -> 140509587627792
|
||
|
140509587627936 [label=ToCopyBackward0]
|
||
|
140509587625200 -> 140509587627936
|
||
|
140509587625200 [label=CatBackward0]
|
||
|
140509587640576 -> 140509587625200
|
||
|
140509587640576 [label=NativeLayerNormBackward0]
|
||
|
140509587640720 -> 140509587640576
|
||
|
140509587640720 [label=AddBackward0]
|
||
|
140509587640912 -> 140509587640720
|
||
|
140509587640912 [label=NativeDropoutBackward0]
|
||
|
140509587641056 -> 140509587640912
|
||
|
140509587641056 [label=ViewBackward0]
|
||
|
140509587641152 -> 140509587641056
|
||
|
140509587641152 [label=AddmmBackward0]
|
||
|
140509587641248 -> 140509587641152
|
||
|
140509587641248 [label=ToCopyBackward0]
|
||
|
140509587641440 -> 140509587641248
|
||
|
140509590901600 [label="encoder.layer.2.experts.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590901600 -> 140509587641440
|
||
|
140509587641440 [label=AccumulateGrad]
|
||
|
140509587641200 -> 140509587641152
|
||
|
140509587641200 [label=ViewBackward0]
|
||
|
140509587641488 -> 140509587641200
|
||
|
140509587641488 [label=GeluBackward0]
|
||
|
140509587641584 -> 140509587641488
|
||
|
140509587641584 [label=ViewBackward0]
|
||
|
140509587641680 -> 140509587641584
|
||
|
140509587641680 [label=AddmmBackward0]
|
||
|
140509587641776 -> 140509587641680
|
||
|
140509587641776 [label=ToCopyBackward0]
|
||
|
140509587641968 -> 140509587641776
|
||
|
140509590901840 [label="encoder.layer.2.experts.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590901840 -> 140509587641968
|
||
|
140509587641968 [label=AccumulateGrad]
|
||
|
140509587641728 -> 140509587641680
|
||
|
140509587641728 [label=ViewBackward0]
|
||
|
140509587642016 -> 140509587641728
|
||
|
140509587642016 [label=ToCopyBackward0]
|
||
|
140509587640864 -> 140509587642016
|
||
|
140509587640864 [label=SliceBackward0]
|
||
|
140509587642160 -> 140509587640864
|
||
|
140509587642160 [label=SliceBackward0]
|
||
|
140509587642256 -> 140509587642160
|
||
|
140509587642256 [label=NativeLayerNormBackward0]
|
||
|
140509587642352 -> 140509587642256
|
||
|
140509587642352 [label=AddBackward0]
|
||
|
140509587642544 -> 140509587642352
|
||
|
140509587642544 [label=NativeDropoutBackward0]
|
||
|
140509587642688 -> 140509587642544
|
||
|
140509587642688 [label=ViewBackward0]
|
||
|
140509587642784 -> 140509587642688
|
||
|
140509587642784 [label=AddmmBackward0]
|
||
|
140509587642880 -> 140509587642784
|
||
|
140509587642880 [label=ToCopyBackward0]
|
||
|
140509587643072 -> 140509587642880
|
||
|
140509590903760 [label="encoder.layer.2.crossattention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590903760 -> 140509587643072
|
||
|
140509587643072 [label=AccumulateGrad]
|
||
|
140509587642832 -> 140509587642784
|
||
|
140509587642832 [label=ViewBackward0]
|
||
|
140509587643120 -> 140509587642832
|
||
|
140509587643120 [label=ViewBackward0]
|
||
|
140509587643216 -> 140509587643120
|
||
|
140509587643216 [label=CloneBackward0]
|
||
|
140509587643312 -> 140509587643216
|
||
|
140509587643312 [label=PermuteBackward0]
|
||
|
140509587643408 -> 140509587643312
|
||
|
140509587643408 [label=UnsafeViewBackward0]
|
||
|
140509587643504 -> 140509587643408
|
||
|
140509587643504 [label=BmmBackward0]
|
||
|
140509587643600 -> 140509587643504
|
||
|
140509587643600 [label=ReshapeAliasBackward0]
|
||
|
140509587643744 -> 140509587643600
|
||
|
140509587643744 [label=ExpandBackward0]
|
||
|
140509587643840 -> 140509587643744
|
||
|
140509587643840 [label=ToCopyBackward0]
|
||
|
140509587643936 -> 140509587643840
|
||
|
140509587643936 [label=NativeDropoutBackward0]
|
||
|
140509587644032 -> 140509587643936
|
||
|
140509587644032 [label=SoftmaxBackward0]
|
||
|
140509587644128 -> 140509587644032
|
||
|
140509587644128 [label=AddBackward0]
|
||
|
140509587644224 -> 140509587644128
|
||
|
140509587644224 [label=DivBackward0]
|
||
|
140509587644320 -> 140509587644224
|
||
|
140509587644320 [label=UnsafeViewBackward0]
|
||
|
140509587644368 -> 140509587644320
|
||
|
140509587644368 [label=BmmBackward0]
|
||
|
140509587656864 -> 140509587644368
|
||
|
140509587656864 [label=ReshapeAliasBackward0]
|
||
|
140509587657008 -> 140509587656864
|
||
|
140509587657008 [label=ExpandBackward0]
|
||
|
140509587657104 -> 140509587657008
|
||
|
140509587657104 [label=PermuteBackward0]
|
||
|
140509587657200 -> 140509587657104
|
||
|
140509587657200 [label=ViewBackward0]
|
||
|
140509587657296 -> 140509587657200
|
||
|
140509587657296 [label=ViewBackward0]
|
||
|
140509587657392 -> 140509587657296
|
||
|
140509587657392 [label=AddmmBackward0]
|
||
|
140509587657488 -> 140509587657392
|
||
|
140509587657488 [label=ToCopyBackward0]
|
||
|
140509587657680 -> 140509587657488
|
||
|
140509590904480 [label="encoder.layer.2.crossattention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590904480 -> 140509587657680
|
||
|
140509587657680 [label=AccumulateGrad]
|
||
|
140509587657440 -> 140509587657392
|
||
|
140509587657440 [label=ViewBackward0]
|
||
|
140509587657728 -> 140509587657440
|
||
|
140509587657728 [label=ToCopyBackward0]
|
||
|
140509587642496 -> 140509587657728
|
||
|
140509587642496 [label=SliceBackward0]
|
||
|
140509587657872 -> 140509587642496
|
||
|
140509587657872 [label=SliceBackward0]
|
||
|
140509587657968 -> 140509587657872
|
||
|
140509587657968 [label=SliceBackward0]
|
||
|
140509587658064 -> 140509587657968
|
||
|
140509587658064 [label=NativeLayerNormBackward0]
|
||
|
140509587658160 -> 140509587658064
|
||
|
140509587658160 [label=AddBackward0]
|
||
|
140509587658352 -> 140509587658160
|
||
|
140509587658352 [label=NativeDropoutBackward0]
|
||
|
140509587658496 -> 140509587658352
|
||
|
140509587658496 [label=ViewBackward0]
|
||
|
140509587658592 -> 140509587658496
|
||
|
140509587658592 [label=AddmmBackward0]
|
||
|
140509587658688 -> 140509587658592
|
||
|
140509587658688 [label=ToCopyBackward0]
|
||
|
140509587658880 -> 140509587658688
|
||
|
140509590913248 [label="encoder.layer.2.attention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590913248 -> 140509587658880
|
||
|
140509587658880 [label=AccumulateGrad]
|
||
|
140509587658640 -> 140509587658592
|
||
|
140509587658640 [label=ViewBackward0]
|
||
|
140509587658928 -> 140509587658640
|
||
|
140509587658928 [label=ViewBackward0]
|
||
|
140509587659024 -> 140509587658928
|
||
|
140509587659024 [label=CloneBackward0]
|
||
|
140509587659120 -> 140509587659024
|
||
|
140509587659120 [label=PermuteBackward0]
|
||
|
140509587659216 -> 140509587659120
|
||
|
140509587659216 [label=UnsafeViewBackward0]
|
||
|
140509587659312 -> 140509587659216
|
||
|
140509587659312 [label=BmmBackward0]
|
||
|
140509587659408 -> 140509587659312
|
||
|
140509587659408 [label=ReshapeAliasBackward0]
|
||
|
140509587659552 -> 140509587659408
|
||
|
140509587659552 [label=ExpandBackward0]
|
||
|
140509587659648 -> 140509587659552
|
||
|
140509587659648 [label=ToCopyBackward0]
|
||
|
140509587659744 -> 140509587659648
|
||
|
140509587659744 [label=NativeDropoutBackward0]
|
||
|
140509587659840 -> 140509587659744
|
||
|
140509587659840 [label=SoftmaxBackward0]
|
||
|
140509587659936 -> 140509587659840
|
||
|
140509587659936 [label=AddBackward0]
|
||
|
140509587660032 -> 140509587659936
|
||
|
140509587660032 [label=DivBackward0]
|
||
|
140509587660128 -> 140509587660032
|
||
|
140509587660128 [label=UnsafeViewBackward0]
|
||
|
140509587660224 -> 140509587660128
|
||
|
140509587660224 [label=BmmBackward0]
|
||
|
140509587660320 -> 140509587660224
|
||
|
140509587660320 [label=ReshapeAliasBackward0]
|
||
|
140509587660464 -> 140509587660320
|
||
|
140509587660464 [label=ExpandBackward0]
|
||
|
140509587660560 -> 140509587660464
|
||
|
140509587660560 [label=PermuteBackward0]
|
||
|
140509587660656 -> 140509587660560
|
||
|
140509587660656 [label=ViewBackward0]
|
||
|
140509587660752 -> 140509587660656
|
||
|
140509587660752 [label=ViewBackward0]
|
||
|
140509587660368 -> 140509587660752
|
||
|
140509587660368 [label=AddmmBackward0]
|
||
|
140509587673296 -> 140509587660368
|
||
|
140509587673296 [label=ToCopyBackward0]
|
||
|
140509587673488 -> 140509587673296
|
||
|
140509590913968 [label="encoder.layer.2.attention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590913968 -> 140509587673488
|
||
|
140509587673488 [label=AccumulateGrad]
|
||
|
140509587673248 -> 140509587660368
|
||
|
140509587673248 [label=ViewBackward0]
|
||
|
140509587673536 -> 140509587673248
|
||
|
140509587673536 [label=ToCopyBackward0]
|
||
|
140509587658304 -> 140509587673536
|
||
|
140509587658304 [label=CatBackward0]
|
||
|
140509587673680 -> 140509587658304
|
||
|
140509587673680 [label=NativeLayerNormBackward0]
|
||
|
140509587673824 -> 140509587673680
|
||
|
140509587673824 [label=AddBackward0]
|
||
|
140509587674016 -> 140509587673824
|
||
|
140509587674016 [label=NativeDropoutBackward0]
|
||
|
140509587674160 -> 140509587674016
|
||
|
140509587674160 [label=ViewBackward0]
|
||
|
140509587674256 -> 140509587674160
|
||
|
140509587674256 [label=AddmmBackward0]
|
||
|
140509587674352 -> 140509587674256
|
||
|
140509587674352 [label=ToCopyBackward0]
|
||
|
140509587674544 -> 140509587674352
|
||
|
140509590914448 [label="encoder.layer.1.experts.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590914448 -> 140509587674544
|
||
|
140509587674544 [label=AccumulateGrad]
|
||
|
140509587674304 -> 140509587674256
|
||
|
140509587674304 [label=ViewBackward0]
|
||
|
140509587674592 -> 140509587674304
|
||
|
140509587674592 [label=GeluBackward0]
|
||
|
140509587674688 -> 140509587674592
|
||
|
140509587674688 [label=ViewBackward0]
|
||
|
140509587674784 -> 140509587674688
|
||
|
140509587674784 [label=AddmmBackward0]
|
||
|
140509587674880 -> 140509587674784
|
||
|
140509587674880 [label=ToCopyBackward0]
|
||
|
140509587675072 -> 140509587674880
|
||
|
140509590914688 [label="encoder.layer.1.experts.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590914688 -> 140509587675072
|
||
|
140509587675072 [label=AccumulateGrad]
|
||
|
140509587674832 -> 140509587674784
|
||
|
140509587674832 [label=ViewBackward0]
|
||
|
140509587675120 -> 140509587674832
|
||
|
140509587675120 [label=ToCopyBackward0]
|
||
|
140509587673968 -> 140509587675120
|
||
|
140509587673968 [label=SliceBackward0]
|
||
|
140509587675264 -> 140509587673968
|
||
|
140509587675264 [label=SliceBackward0]
|
||
|
140509587675360 -> 140509587675264
|
||
|
140509587675360 [label=SliceBackward0]
|
||
|
140509587675456 -> 140509587675360
|
||
|
140509587675456 [label=SliceBackward0]
|
||
|
140509587675552 -> 140509587675456
|
||
|
140509587675552 [label=SliceBackward0]
|
||
|
140509587675648 -> 140509587675552
|
||
|
140509587675648 [label=NativeLayerNormBackward0]
|
||
|
140509587675744 -> 140509587675648
|
||
|
140509587675744 [label=AddBackward0]
|
||
|
140509587675936 -> 140509587675744
|
||
|
140509587675936 [label=NativeDropoutBackward0]
|
||
|
140509587676080 -> 140509587675936
|
||
|
140509587676080 [label=ViewBackward0]
|
||
|
140509587676176 -> 140509587676080
|
||
|
140509587676176 [label=AddmmBackward0]
|
||
|
140509587676272 -> 140509587676176
|
||
|
140509587676272 [label=ToCopyBackward0]
|
||
|
140509587676464 -> 140509587676272
|
||
|
140509590916608 [label="encoder.layer.1.attention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590916608 -> 140509587676464
|
||
|
140509587676464 [label=AccumulateGrad]
|
||
|
140509587676224 -> 140509587676176
|
||
|
140509587676224 [label=ViewBackward0]
|
||
|
140509587676512 -> 140509587676224
|
||
|
140509587676512 [label=ViewBackward0]
|
||
|
140509587676608 -> 140509587676512
|
||
|
140509587676608 [label=CloneBackward0]
|
||
|
140509587676704 -> 140509587676608
|
||
|
140509587676704 [label=PermuteBackward0]
|
||
|
140509587676800 -> 140509587676704
|
||
|
140509587676800 [label=UnsafeViewBackward0]
|
||
|
140509587676896 -> 140509587676800
|
||
|
140509587676896 [label=BmmBackward0]
|
||
|
140509587676992 -> 140509587676896
|
||
|
140509587676992 [label=ReshapeAliasBackward0]
|
||
|
140509587677136 -> 140509587676992
|
||
|
140509587677136 [label=ExpandBackward0]
|
||
|
140509587677040 -> 140509587677136
|
||
|
140509587677040 [label=ToCopyBackward0]
|
||
|
140517615505616 -> 140509587677040
|
||
|
140517615505616 [label=NativeDropoutBackward0]
|
||
|
140517615505712 -> 140517615505616
|
||
|
140517615505712 [label=SoftmaxBackward0]
|
||
|
140517615505808 -> 140517615505712
|
||
|
140517615505808 [label=AddBackward0]
|
||
|
140517615505904 -> 140517615505808
|
||
|
140517615505904 [label=DivBackward0]
|
||
|
140517615506000 -> 140517615505904
|
||
|
140517615506000 [label=UnsafeViewBackward0]
|
||
|
140517615506096 -> 140517615506000
|
||
|
140517615506096 [label=BmmBackward0]
|
||
|
140517615506192 -> 140517615506096
|
||
|
140517615506192 [label=ReshapeAliasBackward0]
|
||
|
140517615506336 -> 140517615506192
|
||
|
140517615506336 [label=ExpandBackward0]
|
||
|
140517615506432 -> 140517615506336
|
||
|
140517615506432 [label=PermuteBackward0]
|
||
|
140517615506528 -> 140517615506432
|
||
|
140517615506528 [label=ViewBackward0]
|
||
|
140517615506624 -> 140517615506528
|
||
|
140517615506624 [label=ViewBackward0]
|
||
|
140517615506720 -> 140517615506624
|
||
|
140517615506720 [label=AddmmBackward0]
|
||
|
140517615506816 -> 140517615506720
|
||
|
140517615506816 [label=ToCopyBackward0]
|
||
|
140517615507008 -> 140517615506816
|
||
|
140509590933808 [label="encoder.layer.1.attention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590933808 -> 140517615507008
|
||
|
140517615507008 [label=AccumulateGrad]
|
||
|
140517615506768 -> 140517615506720
|
||
|
140517615506768 [label=ViewBackward0]
|
||
|
140517615507056 -> 140517615506768
|
||
|
140517615507056 [label=ToCopyBackward0]
|
||
|
140509587675888 -> 140517615507056
|
||
|
140509587675888 [label=CatBackward0]
|
||
|
140517615507200 -> 140509587675888
|
||
|
140517615507200 [label=NativeLayerNormBackward0]
|
||
|
140517615507344 -> 140517615507200
|
||
|
140517615507344 [label=AddBackward0]
|
||
|
140517615507536 -> 140517615507344
|
||
|
140517615507536 [label=NativeDropoutBackward0]
|
||
|
140517615507680 -> 140517615507536
|
||
|
140517615507680 [label=ViewBackward0]
|
||
|
140517615507776 -> 140517615507680
|
||
|
140517615507776 [label=AddmmBackward0]
|
||
|
140517615507872 -> 140517615507776
|
||
|
140517615507872 [label=ToCopyBackward0]
|
||
|
140517615508064 -> 140517615507872
|
||
|
140509590934288 [label="encoder.layer.0.experts.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590934288 -> 140517615508064
|
||
|
140517615508064 [label=AccumulateGrad]
|
||
|
140517615507824 -> 140517615507776
|
||
|
140517615507824 [label=ViewBackward0]
|
||
|
140517615508112 -> 140517615507824
|
||
|
140517615508112 [label=GeluBackward0]
|
||
|
140517615508208 -> 140517615508112
|
||
|
140517615508208 [label=ViewBackward0]
|
||
|
140517615508304 -> 140517615508208
|
||
|
140517615508304 [label=AddmmBackward0]
|
||
|
140517615508400 -> 140517615508304
|
||
|
140517615508400 [label=ToCopyBackward0]
|
||
|
140517615508592 -> 140517615508400
|
||
|
140509590934528 [label="encoder.layer.0.experts.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590934528 -> 140517615508592
|
||
|
140517615508592 [label=AccumulateGrad]
|
||
|
140517615508352 -> 140517615508304
|
||
|
140517615508352 [label=ViewBackward0]
|
||
|
140517615508640 -> 140517615508352
|
||
|
140517615508640 [label=ToCopyBackward0]
|
||
|
140517615507488 -> 140517615508640
|
||
|
140517615507488 [label=SliceBackward0]
|
||
|
140517615508784 -> 140517615507488
|
||
|
140517615508784 [label=SliceBackward0]
|
||
|
140517615508880 -> 140517615508784
|
||
|
140517615508880 [label=NativeLayerNormBackward0]
|
||
|
140517615508976 -> 140517615508880
|
||
|
140517615508976 [label=AddBackward0]
|
||
|
140517615509168 -> 140517615508976
|
||
|
140517615509168 [label=NativeDropoutBackward0]
|
||
|
140517615509312 -> 140517615509168
|
||
|
140517615509312 [label=ViewBackward0]
|
||
|
140517615509408 -> 140517615509312
|
||
|
140517615509408 [label=AddmmBackward0]
|
||
|
140517615509456 -> 140517615509408
|
||
|
140517615509456 [label=ToCopyBackward0]
|
||
|
140517615522048 -> 140517615509456
|
||
|
140509590936448 [label="encoder.layer.0.crossattention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590936448 -> 140517615522048
|
||
|
140517615522048 [label=AccumulateGrad]
|
||
|
140517615509216 -> 140517615509408
|
||
|
140517615509216 [label=ViewBackward0]
|
||
|
140517615522096 -> 140517615509216
|
||
|
140517615522096 [label=ViewBackward0]
|
||
|
140517615522192 -> 140517615522096
|
||
|
140517615522192 [label=CloneBackward0]
|
||
|
140517615522288 -> 140517615522192
|
||
|
140517615522288 [label=PermuteBackward0]
|
||
|
140517615522384 -> 140517615522288
|
||
|
140517615522384 [label=UnsafeViewBackward0]
|
||
|
140517615522480 -> 140517615522384
|
||
|
140517615522480 [label=BmmBackward0]
|
||
|
140517615522576 -> 140517615522480
|
||
|
140517615522576 [label=ReshapeAliasBackward0]
|
||
|
140517615522720 -> 140517615522576
|
||
|
140517615522720 [label=ExpandBackward0]
|
||
|
140517615522816 -> 140517615522720
|
||
|
140517615522816 [label=ToCopyBackward0]
|
||
|
140517615522912 -> 140517615522816
|
||
|
140517615522912 [label=NativeDropoutBackward0]
|
||
|
140517615523008 -> 140517615522912
|
||
|
140517615523008 [label=SoftmaxBackward0]
|
||
|
140517615523104 -> 140517615523008
|
||
|
140517615523104 [label=AddBackward0]
|
||
|
140517615523200 -> 140517615523104
|
||
|
140517615523200 [label=DivBackward0]
|
||
|
140517615523296 -> 140517615523200
|
||
|
140517615523296 [label=UnsafeViewBackward0]
|
||
|
140517615523392 -> 140517615523296
|
||
|
140517615523392 [label=BmmBackward0]
|
||
|
140517615523488 -> 140517615523392
|
||
|
140517615523488 [label=ReshapeAliasBackward0]
|
||
|
140517615523632 -> 140517615523488
|
||
|
140517615523632 [label=ExpandBackward0]
|
||
|
140517615523728 -> 140517615523632
|
||
|
140517615523728 [label=PermuteBackward0]
|
||
|
140517615523824 -> 140517615523728
|
||
|
140517615523824 [label=ViewBackward0]
|
||
|
140517615523920 -> 140517615523824
|
||
|
140517615523920 [label=ViewBackward0]
|
||
|
140517615524016 -> 140517615523920
|
||
|
140517615524016 [label=AddmmBackward0]
|
||
|
140517615524112 -> 140517615524016
|
||
|
140517615524112 [label=ToCopyBackward0]
|
||
|
140517615524304 -> 140517615524112
|
||
|
140509590937168 [label="encoder.layer.0.crossattention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590937168 -> 140517615524304
|
||
|
140517615524304 [label=AccumulateGrad]
|
||
|
140517615524064 -> 140517615524016
|
||
|
140517615524064 [label=ViewBackward0]
|
||
|
140517615524352 -> 140517615524064
|
||
|
140517615524352 [label=ToCopyBackward0]
|
||
|
140517615509120 -> 140517615524352
|
||
|
140517615509120 [label=SliceBackward0]
|
||
|
140517615524496 -> 140517615509120
|
||
|
140517615524496 [label=SliceBackward0]
|
||
|
140517615524592 -> 140517615524496
|
||
|
140517615524592 [label=SliceBackward0]
|
||
|
140517615524688 -> 140517615524592
|
||
|
140517615524688 [label=NativeLayerNormBackward0]
|
||
|
140517615524784 -> 140517615524688
|
||
|
140517615524784 [label=AddBackward0]
|
||
|
140517615524976 -> 140517615524784
|
||
|
140517615524976 [label=NativeDropoutBackward0]
|
||
|
140517615525120 -> 140517615524976
|
||
|
140517615525120 [label=ViewBackward0]
|
||
|
140517615525216 -> 140517615525120
|
||
|
140517615525216 [label=AddmmBackward0]
|
||
|
140517615525312 -> 140517615525216
|
||
|
140517615525312 [label=ToCopyBackward0]
|
||
|
140517615525504 -> 140517615525312
|
||
|
140509590945936 [label="encoder.layer.0.attention.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590945936 -> 140517615525504
|
||
|
140517615525504 [label=AccumulateGrad]
|
||
|
140517615525264 -> 140517615525216
|
||
|
140517615525264 [label=ViewBackward0]
|
||
|
140517615525552 -> 140517615525264
|
||
|
140517615525552 [label=ViewBackward0]
|
||
|
140517615525648 -> 140517615525552
|
||
|
140517615525648 [label=CloneBackward0]
|
||
|
140517615525744 -> 140517615525648
|
||
|
140517615525744 [label=PermuteBackward0]
|
||
|
140517615525840 -> 140517615525744
|
||
|
140517615525840 [label=UnsafeViewBackward0]
|
||
|
140517615525456 -> 140517615525840
|
||
|
140517615525456 [label=BmmBackward0]
|
||
|
140517615538384 -> 140517615525456
|
||
|
140517615538384 [label=ReshapeAliasBackward0]
|
||
|
140517615538528 -> 140517615538384
|
||
|
140517615538528 [label=ExpandBackward0]
|
||
|
140517615538624 -> 140517615538528
|
||
|
140517615538624 [label=ToCopyBackward0]
|
||
|
140517615538720 -> 140517615538624
|
||
|
140517615538720 [label=NativeDropoutBackward0]
|
||
|
140517615538816 -> 140517615538720
|
||
|
140517615538816 [label=SoftmaxBackward0]
|
||
|
140517615538912 -> 140517615538816
|
||
|
140517615538912 [label=AddBackward0]
|
||
|
140517615539008 -> 140517615538912
|
||
|
140517615539008 [label=DivBackward0]
|
||
|
140517615539104 -> 140517615539008
|
||
|
140517615539104 [label=UnsafeViewBackward0]
|
||
|
140517615539200 -> 140517615539104
|
||
|
140517615539200 [label=BmmBackward0]
|
||
|
140517615539296 -> 140517615539200
|
||
|
140517615539296 [label=ReshapeAliasBackward0]
|
||
|
140517615539440 -> 140517615539296
|
||
|
140517615539440 [label=ExpandBackward0]
|
||
|
140517615539536 -> 140517615539440
|
||
|
140517615539536 [label=PermuteBackward0]
|
||
|
140517615539632 -> 140517615539536
|
||
|
140517615539632 [label=ViewBackward0]
|
||
|
140517615539728 -> 140517615539632
|
||
|
140517615539728 [label=ViewBackward0]
|
||
|
140517615539824 -> 140517615539728
|
||
|
140517615539824 [label=AddmmBackward0]
|
||
|
140517615539920 -> 140517615539824
|
||
|
140517615539920 [label=ToCopyBackward0]
|
||
|
140517615540112 -> 140517615539920
|
||
|
140509590600896 [label="encoder.layer.0.attention.self.query.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590600896 -> 140517615540112
|
||
|
140517615540112 [label=AccumulateGrad]
|
||
|
140517615539872 -> 140517615539824
|
||
|
140517615539872 [label=ViewBackward0]
|
||
|
140517615540160 -> 140517615539872
|
||
|
140517615540160 [label=ToCopyBackward0]
|
||
|
140517615524928 -> 140517615540160
|
||
|
140517615524928 [label=NativeDropoutBackward0]
|
||
|
140517615540304 -> 140517615524928
|
||
|
140517615540304 [label=NativeLayerNormBackward0]
|
||
|
140517615540400 -> 140517615540304
|
||
|
140517615540400 [label=CatBackward0]
|
||
|
140517615540592 -> 140517615540400
|
||
|
140517615540592 [label=ExpandBackward0]
|
||
|
140517615540736 -> 140517615540592
|
||
|
140509590947296 [label="
|
||
|
(1, 32, 768)" fillcolor=lightblue]
|
||
|
140509590947296 -> 140517615540736
|
||
|
140517615540736 [label=AccumulateGrad]
|
||
|
140517615540544 -> 140517615540400
|
||
|
140517615540544 [label=AddBackward0]
|
||
|
140517615540784 -> 140517615540544
|
||
|
140517615540784 [label=EmbeddingBackward0]
|
||
|
140517615540928 -> 140517615540784
|
||
|
140509590947856 [label="embeddings.word_embeddings.weight
|
||
|
(30523, 768)" fillcolor=lightblue]
|
||
|
140509590947856 -> 140517615540928
|
||
|
140517615540928 [label=AccumulateGrad]
|
||
|
140517615540832 -> 140517615540544
|
||
|
140517615540832 [label=EmbeddingBackward0]
|
||
|
140517615540976 -> 140517615540832
|
||
|
140509939919504 [label="embeddings.position_embeddings.weight
|
||
|
(512, 768)" fillcolor=lightblue]
|
||
|
140509939919504 -> 140517615540976
|
||
|
140517615540976 [label=AccumulateGrad]
|
||
|
140517615540352 -> 140517615540304
|
||
|
140509590958304 [label="embeddings.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590958304 -> 140517615540352
|
||
|
140517615540352 [label=AccumulateGrad]
|
||
|
140517615540016 -> 140517615540304
|
||
|
140509590946656 [label="embeddings.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590946656 -> 140517615540016
|
||
|
140517615540016 [label=AccumulateGrad]
|
||
|
140517615539344 -> 140517615539824
|
||
|
140517615539344 [label=TBackward0]
|
||
|
140517615540064 -> 140517615539344
|
||
|
140517615540064 [label=ToCopyBackward0]
|
||
|
140517615540496 -> 140517615540064
|
||
|
140509986890912 [label="encoder.layer.0.attention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509986890912 -> 140517615540496
|
||
|
140517615540496 [label=AccumulateGrad]
|
||
|
140517615539248 -> 140517615539200
|
||
|
140517615539248 [label=ReshapeAliasBackward0]
|
||
|
140517615539584 -> 140517615539248
|
||
|
140517615539584 [label=ExpandBackward0]
|
||
|
140517615539776 -> 140517615539584
|
||
|
140517615539776 [label=TransposeBackward0]
|
||
|
140517615540256 -> 140517615539776
|
||
|
140517615540256 [label=PermuteBackward0]
|
||
|
140517615541024 -> 140517615540256
|
||
|
140517615541024 [label=ViewBackward0]
|
||
|
140517615540208 -> 140517615541024
|
||
|
140517615540208 [label=ViewBackward0]
|
||
|
140517615540640 -> 140517615540208
|
||
|
140517615540640 [label=AddmmBackward0]
|
||
|
140517615541120 -> 140517615540640
|
||
|
140517615541120 [label=ToCopyBackward0]
|
||
|
140517615541312 -> 140517615541120
|
||
|
140509590946096 [label="encoder.layer.0.attention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590946096 -> 140517615541312
|
||
|
140517615541312 [label=AccumulateGrad]
|
||
|
140517615540880 -> 140517615540640
|
||
|
140517615540880 [label=ViewBackward0]
|
||
|
140517615541360 -> 140517615540880
|
||
|
140517615541360 [label=ToCopyBackward0]
|
||
|
140517615524928 -> 140517615541360
|
||
|
140517615539392 -> 140517615540640
|
||
|
140517615539392 [label=TBackward0]
|
||
|
140517615541216 -> 140517615539392
|
||
|
140517615541216 [label=ToCopyBackward0]
|
||
|
140517615541504 -> 140517615541216
|
||
|
140509590600816 [label="encoder.layer.0.attention.self.key.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590600816 -> 140517615541504
|
||
|
140517615541504 [label=AccumulateGrad]
|
||
|
140517615538336 -> 140517615525456
|
||
|
140517615538336 [label=ReshapeAliasBackward0]
|
||
|
140517615538672 -> 140517615538336
|
||
|
140517615538672 [label=ExpandBackward0]
|
||
|
140517615538864 -> 140517615538672
|
||
|
140517615538864 [label=PermuteBackward0]
|
||
|
140517615539056 -> 140517615538864
|
||
|
140517615539056 [label=ViewBackward0]
|
||
|
140517615538432 -> 140517615539056
|
||
|
140517615538432 [label=ViewBackward0]
|
||
|
140517615539680 -> 140517615538432
|
||
|
140517615539680 [label=AddmmBackward0]
|
||
|
140517615540448 -> 140517615539680
|
||
|
140517615540448 [label=ToCopyBackward0]
|
||
|
140517615541456 -> 140517615540448
|
||
|
140509590945856 [label="encoder.layer.0.attention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590945856 -> 140517615541456
|
||
|
140517615541456 [label=AccumulateGrad]
|
||
|
140517615539968 -> 140517615539680
|
||
|
140517615539968 [label=ViewBackward0]
|
||
|
140517615541264 -> 140517615539968
|
||
|
140517615541264 [label=ToCopyBackward0]
|
||
|
140517615524928 -> 140517615541264
|
||
|
140517615538480 -> 140517615539680
|
||
|
140517615538480 [label=TBackward0]
|
||
|
140517615541072 -> 140517615538480
|
||
|
140517615541072 [label=ToCopyBackward0]
|
||
|
140517615541408 -> 140517615541072
|
||
|
140509590946176 [label="encoder.layer.0.attention.self.value.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590946176 -> 140517615541408
|
||
|
140517615541408 [label=AccumulateGrad]
|
||
|
140517615525024 -> 140517615525216
|
||
|
140517615525024 [label=TBackward0]
|
||
|
140517615525696 -> 140517615525024
|
||
|
140517615525696 [label=ToCopyBackward0]
|
||
|
140517615525792 -> 140517615525696
|
||
|
140509987117712 [label="encoder.layer.0.attention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509987117712 -> 140517615525792
|
||
|
140517615525792 [label=AccumulateGrad]
|
||
|
140517615524928 -> 140517615524784
|
||
|
140517615524736 -> 140517615524688
|
||
|
140509590937328 [label="encoder.layer.0.attention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590937328 -> 140517615524736
|
||
|
140517615524736 [label=AccumulateGrad]
|
||
|
140517615524208 -> 140517615524688
|
||
|
140509590937408 [label="encoder.layer.0.attention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590937408 -> 140517615524208
|
||
|
140517615524208 [label=AccumulateGrad]
|
||
|
140517615523536 -> 140517615524016
|
||
|
140517615523536 [label=TBackward0]
|
||
|
140517615524256 -> 140517615523536
|
||
|
140517615524256 [label=ToCopyBackward0]
|
||
|
140517615524640 -> 140517615524256
|
||
|
140509590937088 [label="encoder.layer.0.crossattention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590937088 -> 140517615524640
|
||
|
140517615524640 [label=AccumulateGrad]
|
||
|
140517615523440 -> 140517615523392
|
||
|
140517615523440 [label=ReshapeAliasBackward0]
|
||
|
140517615523776 -> 140517615523440
|
||
|
140517615523776 [label=ExpandBackward0]
|
||
|
140517615523968 -> 140517615523776
|
||
|
140517615523968 [label=TransposeBackward0]
|
||
|
140517615524448 -> 140517615523968
|
||
|
140517615524448 [label=PermuteBackward0]
|
||
|
140517615524880 -> 140517615524448
|
||
|
140517615524880 [label=ViewBackward0]
|
||
|
140517615524400 -> 140517615524880
|
||
|
140517615524400 [label=ViewBackward0]
|
||
|
140517615525168 -> 140517615524400
|
||
|
140517615525168 [label=AddmmBackward0]
|
||
|
140517615525408 -> 140517615525168
|
||
|
140517615525408 [label=ToCopyBackward0]
|
||
|
140517615538288 -> 140517615525408
|
||
|
140509590936928 [label="encoder.layer.0.crossattention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590936928 -> 140517615538288
|
||
|
140517615538288 [label=AccumulateGrad]
|
||
|
140517615525360 -> 140517615525168
|
||
|
140517615525360 [label=ViewBackward0]
|
||
|
140517615538768 -> 140517615525360
|
||
|
140517615538768 [label=ToCopyBackward0]
|
||
|
140517615539152 -> 140517615538768
|
||
|
140517615539152 [label=NativeLayerNormBackward0]
|
||
|
140517615540688 -> 140517615539152
|
||
|
140509590598736 [label="
|
||
|
(1408)" fillcolor=lightblue]
|
||
|
140509590598736 -> 140517615540688
|
||
|
140517615540688 [label=AccumulateGrad]
|
||
|
140517615539488 -> 140517615539152
|
||
|
140509590598976 [label="
|
||
|
(1408)" fillcolor=lightblue]
|
||
|
140509590598976 -> 140517615539488
|
||
|
140517615539488 [label=AccumulateGrad]
|
||
|
140517615523584 -> 140517615525168
|
||
|
140517615523584 [label=TBackward0]
|
||
|
140517615538240 -> 140517615523584
|
||
|
140517615538240 [label=ToCopyBackward0]
|
||
|
140517615541168 -> 140517615538240
|
||
|
140509590936848 [label="encoder.layer.0.crossattention.self.key.weight
|
||
|
(768, 1408)" fillcolor=lightblue]
|
||
|
140509590936848 -> 140517615541168
|
||
|
140517615541168 [label=AccumulateGrad]
|
||
|
140517615522528 -> 140517615522480
|
||
|
140517615522528 [label=ReshapeAliasBackward0]
|
||
|
140517615522864 -> 140517615522528
|
||
|
140517615522864 [label=ExpandBackward0]
|
||
|
140517615523056 -> 140517615522864
|
||
|
140517615523056 [label=PermuteBackward0]
|
||
|
140517615523248 -> 140517615523056
|
||
|
140517615523248 [label=ViewBackward0]
|
||
|
140517615522624 -> 140517615523248
|
||
|
140517615522624 [label=ViewBackward0]
|
||
|
140517615523872 -> 140517615522624
|
||
|
140517615523872 [label=AddmmBackward0]
|
||
|
140517615524544 -> 140517615523872
|
||
|
140517615524544 [label=ToCopyBackward0]
|
||
|
140517615525600 -> 140517615524544
|
||
|
140509590936688 [label="encoder.layer.0.crossattention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590936688 -> 140517615525600
|
||
|
140517615525600 [label=AccumulateGrad]
|
||
|
140517615524160 -> 140517615523872
|
||
|
140517615524160 [label=ViewBackward0]
|
||
|
140517615525072 -> 140517615524160
|
||
|
140517615525072 [label=ToCopyBackward0]
|
||
|
140517615539152 -> 140517615525072
|
||
|
140517615522672 -> 140517615523872
|
||
|
140517615522672 [label=TBackward0]
|
||
|
140517615538576 -> 140517615522672
|
||
|
140517615538576 [label=ToCopyBackward0]
|
||
|
140517615538960 -> 140517615538576
|
||
|
140509590936608 [label="encoder.layer.0.crossattention.self.value.weight
|
||
|
(768, 1408)" fillcolor=lightblue]
|
||
|
140509590936608 -> 140517615538960
|
||
|
140517615538960 [label=AccumulateGrad]
|
||
|
140517615521856 -> 140517615509408
|
||
|
140517615521856 [label=TBackward0]
|
||
|
140517615522240 -> 140517615521856
|
||
|
140517615522240 [label=ToCopyBackward0]
|
||
|
140517615522432 -> 140517615522240
|
||
|
140509590936368 [label="encoder.layer.0.crossattention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590936368 -> 140517615522432
|
||
|
140517615522432 [label=AccumulateGrad]
|
||
|
140517615509120 -> 140517615508976
|
||
|
140517615508928 -> 140517615508880
|
||
|
140509590936128 [label="encoder.layer.0.crossattention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590936128 -> 140517615508928
|
||
|
140517615508928 [label=AccumulateGrad]
|
||
|
140517615508496 -> 140517615508880
|
||
|
140509590936208 [label="encoder.layer.0.crossattention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590936208 -> 140517615508496
|
||
|
140517615508496 [label=AccumulateGrad]
|
||
|
140517615508016 -> 140517615508304
|
||
|
140517615508016 [label=TBackward0]
|
||
|
140517615508544 -> 140517615508016
|
||
|
140517615508544 [label=ToCopyBackward0]
|
||
|
140517615509024 -> 140517615508544
|
||
|
140509590934448 [label="encoder.layer.0.experts.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590934448 -> 140517615509024
|
||
|
140517615509024 [label=AccumulateGrad]
|
||
|
140517615507584 -> 140517615507776
|
||
|
140517615507584 [label=TBackward0]
|
||
|
140517615508256 -> 140517615507584
|
||
|
140517615508256 [label=ToCopyBackward0]
|
||
|
140517615508736 -> 140517615508256
|
||
|
140509590934208 [label="encoder.layer.0.experts.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590934208 -> 140517615508736
|
||
|
140517615508736 [label=AccumulateGrad]
|
||
|
140517615507488 -> 140517615507344
|
||
|
140517615507296 -> 140517615507200
|
||
|
140509590933968 [label="encoder.layer.0.experts.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590933968 -> 140517615507296
|
||
|
140517615507296 [label=AccumulateGrad]
|
||
|
140517615507248 -> 140517615507200
|
||
|
140509590934048 [label="encoder.layer.0.experts.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590934048 -> 140517615507248
|
||
|
140517615507248 [label=AccumulateGrad]
|
||
|
140517615506960 -> 140509587675888
|
||
|
140517615506960 [label=NativeLayerNormBackward0]
|
||
|
140517615507632 -> 140517615506960
|
||
|
140517615507632 [label=AddBackward0]
|
||
|
140517615508448 -> 140517615507632
|
||
|
140517615508448 [label=NativeDropoutBackward0]
|
||
|
140517615508160 -> 140517615508448
|
||
|
140517615508160 [label=ViewBackward0]
|
||
|
140517615508688 -> 140517615508160
|
||
|
140517615508688 [label=AddmmBackward0]
|
||
|
140517615509360 -> 140517615508688
|
||
|
140517615509360 [label=ToCopyBackward0]
|
||
|
140517615522000 -> 140517615509360
|
||
|
140509590935728 [label="encoder.layer.0.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590935728 -> 140517615522000
|
||
|
140517615522000 [label=AccumulateGrad]
|
||
|
140517615509264 -> 140517615508688
|
||
|
140517615509264 [label=ViewBackward0]
|
||
|
140517615522144 -> 140517615509264
|
||
|
140517615522144 [label=GeluBackward0]
|
||
|
140517615523152 -> 140517615522144
|
||
|
140517615523152 [label=ViewBackward0]
|
||
|
140517615523680 -> 140517615523152
|
||
|
140517615523680 [label=AddmmBackward0]
|
||
|
140517615524832 -> 140517615523680
|
||
|
140517615524832 [label=ToCopyBackward0]
|
||
|
140517615541552 -> 140517615524832
|
||
|
140509590935968 [label="encoder.layer.0.intermediate.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590935968 -> 140517615541552
|
||
|
140517615541552 [label=AccumulateGrad]
|
||
|
140517615522768 -> 140517615523680
|
||
|
140517615522768 [label=ViewBackward0]
|
||
|
140517615541792 -> 140517615522768
|
||
|
140517615541792 [label=ToCopyBackward0]
|
||
|
140517615507968 -> 140517615541792
|
||
|
140517615507968 [label=SliceBackward0]
|
||
|
140517615541936 -> 140517615507968
|
||
|
140517615541936 [label=SliceBackward0]
|
||
|
140517615542032 -> 140517615541936
|
||
|
140517615542032 [label=SliceBackward0]
|
||
|
140517615524688 -> 140517615542032
|
||
|
140517615541696 -> 140517615523680
|
||
|
140517615541696 [label=TBackward0]
|
||
|
140517615541600 -> 140517615541696
|
||
|
140517615541600 [label=ToCopyBackward0]
|
||
|
140517615542128 -> 140517615541600
|
||
|
140509590935888 [label="encoder.layer.0.intermediate.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590935888 -> 140517615542128
|
||
|
140517615542128 [label=AccumulateGrad]
|
||
|
140517615521904 -> 140517615508688
|
||
|
140517615521904 [label=TBackward0]
|
||
|
140517615523344 -> 140517615521904
|
||
|
140517615523344 [label=ToCopyBackward0]
|
||
|
140517615522960 -> 140517615523344
|
||
|
140509590935648 [label="encoder.layer.0.output.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590935648 -> 140517615522960
|
||
|
140517615522960 [label=AccumulateGrad]
|
||
|
140517615507968 -> 140517615507632
|
||
|
140517615507440 -> 140517615506960
|
||
|
140509590935408 [label="encoder.layer.0.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590935408 -> 140517615507440
|
||
|
140517615507440 [label=AccumulateGrad]
|
||
|
140517615507392 -> 140517615506960
|
||
|
140509590935488 [label="encoder.layer.0.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590935488 -> 140517615507392
|
||
|
140517615507392 [label=AccumulateGrad]
|
||
|
140517615506240 -> 140517615506720
|
||
|
140517615506240 [label=TBackward0]
|
||
|
140517615506912 -> 140517615506240
|
||
|
140517615506912 [label=ToCopyBackward0]
|
||
|
140517615507920 -> 140517615506912
|
||
|
140509590933728 [label="encoder.layer.1.attention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590933728 -> 140517615507920
|
||
|
140517615507920 [label=AccumulateGrad]
|
||
|
140517615506144 -> 140517615506096
|
||
|
140517615506144 [label=ReshapeAliasBackward0]
|
||
|
140517615506480 -> 140517615506144
|
||
|
140517615506480 [label=ExpandBackward0]
|
||
|
140517615506672 -> 140517615506480
|
||
|
140517615506672 [label=TransposeBackward0]
|
||
|
140517615507152 -> 140517615506672
|
||
|
140517615507152 [label=PermuteBackward0]
|
||
|
140517615509072 -> 140517615507152
|
||
|
140517615509072 [label=ViewBackward0]
|
||
|
140517615507104 -> 140517615509072
|
||
|
140517615507104 [label=ViewBackward0]
|
||
|
140517615522336 -> 140517615507104
|
||
|
140517615522336 [label=AddmmBackward0]
|
||
|
140517615506288 -> 140517615522336
|
||
|
140517615506288 [label=ToCopyBackward0]
|
||
|
140517615541840 -> 140517615506288
|
||
|
140509590917008 [label="encoder.layer.1.attention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590917008 -> 140517615541840
|
||
|
140517615541840 [label=AccumulateGrad]
|
||
|
140517615541744 -> 140517615522336
|
||
|
140517615541744 [label=ViewBackward0]
|
||
|
140517615542176 -> 140517615541744
|
||
|
140517615542176 [label=ToCopyBackward0]
|
||
|
140509587675888 -> 140517615542176
|
||
|
140517615541888 -> 140517615522336
|
||
|
140517615541888 [label=TBackward0]
|
||
|
140517615542080 -> 140517615541888
|
||
|
140517615542080 [label=ToCopyBackward0]
|
||
|
140517615542224 -> 140517615542080
|
||
|
140509590933568 [label="encoder.layer.1.attention.self.key.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590933568 -> 140517615542224
|
||
|
140517615542224 [label=AccumulateGrad]
|
||
|
140509587676944 -> 140509587676896
|
||
|
140509587676944 [label=ReshapeAliasBackward0]
|
||
|
140509587677088 -> 140509587676944
|
||
|
140509587677088 [label=ExpandBackward0]
|
||
|
140517615505760 -> 140509587677088
|
||
|
140517615505760 [label=PermuteBackward0]
|
||
|
140517615505952 -> 140517615505760
|
||
|
140517615505952 [label=ViewBackward0]
|
||
|
140517615505472 -> 140517615505952
|
||
|
140517615505472 [label=ViewBackward0]
|
||
|
140517615506576 -> 140517615505472
|
||
|
140517615506576 [label=AddmmBackward0]
|
||
|
140517615507728 -> 140517615506576
|
||
|
140517615507728 [label=ToCopyBackward0]
|
||
|
140517615541648 -> 140517615507728
|
||
|
140509590916848 [label="encoder.layer.1.attention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590916848 -> 140517615541648
|
||
|
140517615541648 [label=AccumulateGrad]
|
||
|
140517615506864 -> 140517615506576
|
||
|
140517615506864 [label=ViewBackward0]
|
||
|
140517615521952 -> 140517615506864
|
||
|
140517615521952 [label=ToCopyBackward0]
|
||
|
140509587675888 -> 140517615521952
|
||
|
140517615505520 -> 140517615506576
|
||
|
140517615505520 [label=TBackward0]
|
||
|
140517615541984 -> 140517615505520
|
||
|
140517615541984 [label=ToCopyBackward0]
|
||
|
140517615591632 -> 140517615541984
|
||
|
140509590916768 [label="encoder.layer.1.attention.self.value.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590916768 -> 140517615591632
|
||
|
140517615591632 [label=AccumulateGrad]
|
||
|
140509587675984 -> 140509587676176
|
||
|
140509587675984 [label=TBackward0]
|
||
|
140509587676656 -> 140509587675984
|
||
|
140509587676656 [label=ToCopyBackward0]
|
||
|
140509587676848 -> 140509587676656
|
||
|
140509590916528 [label="encoder.layer.1.attention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590916528 -> 140509587676848
|
||
|
140509587676848 [label=AccumulateGrad]
|
||
|
140509587675888 -> 140509587675744
|
||
|
140509587675696 -> 140509587675648
|
||
|
140509590916288 [label="encoder.layer.1.attention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590916288 -> 140509587675696
|
||
|
140509587675696 [label=AccumulateGrad]
|
||
|
140509587674976 -> 140509587675648
|
||
|
140509590916368 [label="encoder.layer.1.attention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590916368 -> 140509587674976
|
||
|
140509587674976 [label=AccumulateGrad]
|
||
|
140509587674496 -> 140509587674784
|
||
|
140509587674496 [label=TBackward0]
|
||
|
140509587675024 -> 140509587674496
|
||
|
140509587675024 [label=ToCopyBackward0]
|
||
|
140509587675408 -> 140509587675024
|
||
|
140509590914608 [label="encoder.layer.1.experts.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590914608 -> 140509587675408
|
||
|
140509587675408 [label=AccumulateGrad]
|
||
|
140509587674064 -> 140509587674256
|
||
|
140509587674064 [label=TBackward0]
|
||
|
140509587674736 -> 140509587674064
|
||
|
140509587674736 [label=ToCopyBackward0]
|
||
|
140509587675216 -> 140509587674736
|
||
|
140509590914368 [label="encoder.layer.1.experts.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590914368 -> 140509587675216
|
||
|
140509587675216 [label=AccumulateGrad]
|
||
|
140509587673968 -> 140509587673824
|
||
|
140509587673776 -> 140509587673680
|
||
|
140509590914128 [label="encoder.layer.1.experts.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590914128 -> 140509587673776
|
||
|
140509587673776 [label=AccumulateGrad]
|
||
|
140509587673728 -> 140509587673680
|
||
|
140509590914208 [label="encoder.layer.1.experts.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590914208 -> 140509587673728
|
||
|
140509587673728 [label=AccumulateGrad]
|
||
|
140509587673440 -> 140509587658304
|
||
|
140509587673440 [label=NativeLayerNormBackward0]
|
||
|
140509587674112 -> 140509587673440
|
||
|
140509587674112 [label=AddBackward0]
|
||
|
140509587674928 -> 140509587674112
|
||
|
140509587674928 [label=NativeDropoutBackward0]
|
||
|
140509587674640 -> 140509587674928
|
||
|
140509587674640 [label=ViewBackward0]
|
||
|
140509587675168 -> 140509587674640
|
||
|
140509587675168 [label=AddmmBackward0]
|
||
|
140509587675840 -> 140509587675168
|
||
|
140509587675840 [label=ToCopyBackward0]
|
||
|
140509587676368 -> 140509587675840
|
||
|
140509590915888 [label="encoder.layer.1.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590915888 -> 140509587676368
|
||
|
140509587676368 [label=AccumulateGrad]
|
||
|
140509587675792 -> 140509587675168
|
||
|
140509587675792 [label=ViewBackward0]
|
||
|
140509587676752 -> 140509587675792
|
||
|
140509587676752 [label=GeluBackward0]
|
||
|
140509587676560 -> 140509587676752
|
||
|
140509587676560 [label=ViewBackward0]
|
||
|
140509587676320 -> 140509587676560
|
||
|
140509587676320 [label=AddmmBackward0]
|
||
|
140517615506048 -> 140509587676320
|
||
|
140517615506048 [label=ToCopyBackward0]
|
||
|
140517615508832 -> 140517615506048
|
||
|
140509590916128 [label="encoder.layer.1.intermediate.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590916128 -> 140517615508832
|
||
|
140517615508832 [label=AccumulateGrad]
|
||
|
140517615505856 -> 140509587676320
|
||
|
140517615505856 [label=ViewBackward0]
|
||
|
140517615591728 -> 140517615505856
|
||
|
140517615591728 [label=ToCopyBackward0]
|
||
|
140509587674448 -> 140517615591728
|
||
|
140509587674448 [label=SliceBackward0]
|
||
|
140517615591776 -> 140509587674448
|
||
|
140517615591776 [label=SliceBackward0]
|
||
|
140517615591872 -> 140517615591776
|
||
|
140517615591872 [label=SliceBackward0]
|
||
|
140509587675648 -> 140517615591872
|
||
|
140517615505568 -> 140509587676320
|
||
|
140517615505568 [label=TBackward0]
|
||
|
140517615591536 -> 140517615505568
|
||
|
140517615591536 [label=ToCopyBackward0]
|
||
|
140517615591968 -> 140517615591536
|
||
|
140509590916048 [label="encoder.layer.1.intermediate.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590916048 -> 140517615591968
|
||
|
140517615591968 [label=AccumulateGrad]
|
||
|
140509587675600 -> 140509587675168
|
||
|
140509587675600 [label=TBackward0]
|
||
|
140509587676128 -> 140509587675600
|
||
|
140509587676128 [label=ToCopyBackward0]
|
||
|
140517615506384 -> 140509587676128
|
||
|
140509590915808 [label="encoder.layer.1.output.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590915808 -> 140517615506384
|
||
|
140517615506384 [label=AccumulateGrad]
|
||
|
140509587674448 -> 140509587674112
|
||
|
140509587673920 -> 140509587673440
|
||
|
140509590915568 [label="encoder.layer.1.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590915568 -> 140509587673920
|
||
|
140509587673920 [label=AccumulateGrad]
|
||
|
140509587673872 -> 140509587673440
|
||
|
140509590915648 [label="encoder.layer.1.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590915648 -> 140509587673872
|
||
|
140509587673872 [label=AccumulateGrad]
|
||
|
140509587673152 -> 140509587660368
|
||
|
140509587673152 [label=TBackward0]
|
||
|
140509587673392 -> 140509587673152
|
||
|
140509587673392 [label=ToCopyBackward0]
|
||
|
140509587674400 -> 140509587673392
|
||
|
140509590913888 [label="encoder.layer.2.attention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590913888 -> 140509587674400
|
||
|
140509587674400 [label=AccumulateGrad]
|
||
|
140509587660272 -> 140509587660224
|
||
|
140509587660272 [label=ReshapeAliasBackward0]
|
||
|
140509587660608 -> 140509587660272
|
||
|
140509587660608 [label=ExpandBackward0]
|
||
|
140509587660704 -> 140509587660608
|
||
|
140509587660704 [label=TransposeBackward0]
|
||
|
140509587673632 -> 140509587660704
|
||
|
140509587673632 [label=PermuteBackward0]
|
||
|
140509587675504 -> 140509587673632
|
||
|
140509587675504 [label=ViewBackward0]
|
||
|
140509587673584 -> 140509587675504
|
||
|
140509587673584 [label=ViewBackward0]
|
||
|
140509587676416 -> 140509587673584
|
||
|
140509587676416 [label=AddmmBackward0]
|
||
|
140517615505664 -> 140509587676416
|
||
|
140517615505664 [label=ToCopyBackward0]
|
||
|
140517615591680 -> 140517615505664
|
||
|
140509590913728 [label="encoder.layer.2.attention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590913728 -> 140517615591680
|
||
|
140517615591680 [label=AccumulateGrad]
|
||
|
140509587673200 -> 140509587676416
|
||
|
140509587673200 [label=ViewBackward0]
|
||
|
140517615592016 -> 140509587673200
|
||
|
140517615592016 [label=ToCopyBackward0]
|
||
|
140509587658304 -> 140517615592016
|
||
|
140517615591488 -> 140509587676416
|
||
|
140517615591488 [label=TBackward0]
|
||
|
140517615591584 -> 140517615591488
|
||
|
140517615591584 [label=ToCopyBackward0]
|
||
|
140517615592160 -> 140517615591584
|
||
|
140509590913648 [label="encoder.layer.2.attention.self.key.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590913648 -> 140517615592160
|
||
|
140517615592160 [label=AccumulateGrad]
|
||
|
140509587659360 -> 140509587659312
|
||
|
140509587659360 [label=ReshapeAliasBackward0]
|
||
|
140509587659696 -> 140509587659360
|
||
|
140509587659696 [label=ExpandBackward0]
|
||
|
140509587659888 -> 140509587659696
|
||
|
140509587659888 [label=PermuteBackward0]
|
||
|
140509587660080 -> 140509587659888
|
||
|
140509587660080 [label=ViewBackward0]
|
||
|
140509587659456 -> 140509587660080
|
||
|
140509587659456 [label=ViewBackward0]
|
||
|
140509587660416 -> 140509587659456
|
||
|
140509587660416 [label=AddmmBackward0]
|
||
|
140509587659504 -> 140509587660416
|
||
|
140509587659504 [label=ToCopyBackward0]
|
||
|
140509587676032 -> 140509587659504
|
||
|
140509590913488 [label="encoder.layer.2.attention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590913488 -> 140509587676032
|
||
|
140509587676032 [label=AccumulateGrad]
|
||
|
140509587674208 -> 140509587660416
|
||
|
140509587674208 [label=ViewBackward0]
|
||
|
140517615591920 -> 140509587674208
|
||
|
140517615591920 [label=ToCopyBackward0]
|
||
|
140509587658304 -> 140517615591920
|
||
|
140509587673344 -> 140509587660416
|
||
|
140509587673344 [label=TBackward0]
|
||
|
140517615591824 -> 140509587673344
|
||
|
140517615591824 [label=ToCopyBackward0]
|
||
|
140517615592064 -> 140517615591824
|
||
|
140509590913408 [label="encoder.layer.2.attention.self.value.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590913408 -> 140517615592064
|
||
|
140517615592064 [label=AccumulateGrad]
|
||
|
140509587658400 -> 140509587658592
|
||
|
140509587658400 [label=TBackward0]
|
||
|
140509587659072 -> 140509587658400
|
||
|
140509587659072 [label=ToCopyBackward0]
|
||
|
140509587659264 -> 140509587659072
|
||
|
140509590913168 [label="encoder.layer.2.attention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590913168 -> 140509587659264
|
||
|
140509587659264 [label=AccumulateGrad]
|
||
|
140509587658304 -> 140509587658160
|
||
|
140509587658112 -> 140509587658064
|
||
|
140509590904640 [label="encoder.layer.2.attention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590904640 -> 140509587658112
|
||
|
140509587658112 [label=AccumulateGrad]
|
||
|
140509587657584 -> 140509587658064
|
||
|
140509590904720 [label="encoder.layer.2.attention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590904720 -> 140509587657584
|
||
|
140509587657584 [label=AccumulateGrad]
|
||
|
140509587656912 -> 140509587657392
|
||
|
140509587656912 [label=TBackward0]
|
||
|
140509587657632 -> 140509587656912
|
||
|
140509587657632 [label=ToCopyBackward0]
|
||
|
140509587658016 -> 140509587657632
|
||
|
140509590904400 [label="encoder.layer.2.crossattention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590904400 -> 140509587658016
|
||
|
140509587658016 [label=AccumulateGrad]
|
||
|
140509587656816 -> 140509587644368
|
||
|
140509587656816 [label=ReshapeAliasBackward0]
|
||
|
140509587657152 -> 140509587656816
|
||
|
140509587657152 [label=ExpandBackward0]
|
||
|
140509587657344 -> 140509587657152
|
||
|
140509587657344 [label=TransposeBackward0]
|
||
|
140509587657824 -> 140509587657344
|
||
|
140509587657824 [label=PermuteBackward0]
|
||
|
140509587658256 -> 140509587657824
|
||
|
140509587658256 [label=ViewBackward0]
|
||
|
140509587657776 -> 140509587658256
|
||
|
140509587657776 [label=ViewBackward0]
|
||
|
140509587658544 -> 140509587657776
|
||
|
140509587658544 [label=AddmmBackward0]
|
||
|
140509587658784 -> 140509587658544
|
||
|
140509587658784 [label=ToCopyBackward0]
|
||
|
140509587658976 -> 140509587658784
|
||
|
140509590904240 [label="encoder.layer.2.crossattention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590904240 -> 140509587658976
|
||
|
140509587658976 [label=AccumulateGrad]
|
||
|
140509587658736 -> 140509587658544
|
||
|
140509587658736 [label=ViewBackward0]
|
||
|
140509587659792 -> 140509587658736
|
||
|
140509587659792 [label=ToCopyBackward0]
|
||
|
140517615539152 -> 140509587659792
|
||
|
140509587656960 -> 140509587658544
|
||
|
140509587656960 [label=TBackward0]
|
||
|
140509587659600 -> 140509587656960
|
||
|
140509587659600 [label=ToCopyBackward0]
|
||
|
140509587660512 -> 140509587659600
|
||
|
140509590904160 [label="encoder.layer.2.crossattention.self.key.weight
|
||
|
(768, 1408)" fillcolor=lightblue]
|
||
|
140509590904160 -> 140509587660512
|
||
|
140509587660512 [label=AccumulateGrad]
|
||
|
140509587643552 -> 140509587643504
|
||
|
140509587643552 [label=ReshapeAliasBackward0]
|
||
|
140509587643888 -> 140509587643552
|
||
|
140509587643888 [label=ExpandBackward0]
|
||
|
140509587644080 -> 140509587643888
|
||
|
140509587644080 [label=PermuteBackward0]
|
||
|
140509587644272 -> 140509587644080
|
||
|
140509587644272 [label=ViewBackward0]
|
||
|
140509587675312 -> 140509587644272
|
||
|
140509587675312 [label=ViewBackward0]
|
||
|
140509587643696 -> 140509587675312
|
||
|
140509587643696 [label=AddmmBackward0]
|
||
|
140509587657536 -> 140509587643696
|
||
|
140509587657536 [label=ToCopyBackward0]
|
||
|
140509587659168 -> 140509587657536
|
||
|
140509590904000 [label="encoder.layer.2.crossattention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590904000 -> 140509587659168
|
||
|
140509587659168 [label=AccumulateGrad]
|
||
|
140509587657248 -> 140509587643696
|
||
|
140509587657248 [label=ViewBackward0]
|
||
|
140509587660176 -> 140509587657248
|
||
|
140509587660176 [label=ToCopyBackward0]
|
||
|
140517615539152 -> 140509587660176
|
||
|
140509587656768 -> 140509587643696
|
||
|
140509587656768 [label=TBackward0]
|
||
|
140509587658208 -> 140509587656768
|
||
|
140509587658208 [label=ToCopyBackward0]
|
||
|
140509587658448 -> 140509587658208
|
||
|
140509590903920 [label="encoder.layer.2.crossattention.self.value.weight
|
||
|
(768, 1408)" fillcolor=lightblue]
|
||
|
140509590903920 -> 140509587658448
|
||
|
140509587658448 [label=AccumulateGrad]
|
||
|
140509587642592 -> 140509587642784
|
||
|
140509587642592 [label=TBackward0]
|
||
|
140509587643264 -> 140509587642592
|
||
|
140509587643264 [label=ToCopyBackward0]
|
||
|
140509587643456 -> 140509587643264
|
||
|
140509590903680 [label="encoder.layer.2.crossattention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590903680 -> 140509587643456
|
||
|
140509587643456 [label=AccumulateGrad]
|
||
|
140509587642496 -> 140509587642352
|
||
|
140509587642304 -> 140509587642256
|
||
|
140509590903440 [label="encoder.layer.2.crossattention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590903440 -> 140509587642304
|
||
|
140509587642304 [label=AccumulateGrad]
|
||
|
140509587641872 -> 140509587642256
|
||
|
140509590903520 [label="encoder.layer.2.crossattention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590903520 -> 140509587641872
|
||
|
140509587641872 [label=AccumulateGrad]
|
||
|
140509587641392 -> 140509587641680
|
||
|
140509587641392 [label=TBackward0]
|
||
|
140509587641920 -> 140509587641392
|
||
|
140509587641920 [label=ToCopyBackward0]
|
||
|
140509587642400 -> 140509587641920
|
||
|
140509590901760 [label="encoder.layer.2.experts.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590901760 -> 140509587642400
|
||
|
140509587642400 [label=AccumulateGrad]
|
||
|
140509587640960 -> 140509587641152
|
||
|
140509587640960 [label=TBackward0]
|
||
|
140509587641632 -> 140509587640960
|
||
|
140509587641632 [label=ToCopyBackward0]
|
||
|
140509587642112 -> 140509587641632
|
||
|
140509590901520 [label="encoder.layer.2.experts.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590901520 -> 140509587642112
|
||
|
140509587642112 [label=AccumulateGrad]
|
||
|
140509587640864 -> 140509587640720
|
||
|
140509587640672 -> 140509587640576
|
||
|
140509590901280 [label="encoder.layer.2.experts.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590901280 -> 140509587640672
|
||
|
140509587640672 [label=AccumulateGrad]
|
||
|
140509587640624 -> 140509587640576
|
||
|
140509590901360 [label="encoder.layer.2.experts.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590901360 -> 140509587640624
|
||
|
140509587640624 [label=AccumulateGrad]
|
||
|
140509587640480 -> 140509587625200
|
||
|
140509587640480 [label=NativeLayerNormBackward0]
|
||
|
140509587641008 -> 140509587640480
|
||
|
140509587641008 [label=AddBackward0]
|
||
|
140509587641824 -> 140509587641008
|
||
|
140509587641824 [label=NativeDropoutBackward0]
|
||
|
140509587641536 -> 140509587641824
|
||
|
140509587641536 [label=ViewBackward0]
|
||
|
140509587642064 -> 140509587641536
|
||
|
140509587642064 [label=AddmmBackward0]
|
||
|
140509587642928 -> 140509587642064
|
||
|
140509587642928 [label=ToCopyBackward0]
|
||
|
140509587643024 -> 140509587642928
|
||
|
140509590903040 [label="encoder.layer.2.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590903040 -> 140509587643024
|
||
|
140509587643024 [label=AccumulateGrad]
|
||
|
140509587642736 -> 140509587642064
|
||
|
140509587642736 [label=ViewBackward0]
|
||
|
140509587643168 -> 140509587642736
|
||
|
140509587643168 [label=GeluBackward0]
|
||
|
140509587644176 -> 140509587643168
|
||
|
140509587644176 [label=ViewBackward0]
|
||
|
140509587643648 -> 140509587644176
|
||
|
140509587643648 [label=AddmmBackward0]
|
||
|
140509587659984 -> 140509587643648
|
||
|
140509587659984 [label=ToCopyBackward0]
|
||
|
140517615592208 -> 140509587659984
|
||
|
140509590903280 [label="encoder.layer.2.intermediate.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590903280 -> 140517615592208
|
||
|
140517615592208 [label=AccumulateGrad]
|
||
|
140509587657920 -> 140509587643648
|
||
|
140509587657920 [label=ViewBackward0]
|
||
|
140517615592304 -> 140509587657920
|
||
|
140517615592304 [label=ToCopyBackward0]
|
||
|
140509587641344 -> 140517615592304
|
||
|
140509587641344 [label=SliceBackward0]
|
||
|
140517615592448 -> 140509587641344
|
||
|
140517615592448 [label=SliceBackward0]
|
||
|
140517615592544 -> 140517615592448
|
||
|
140517615592544 [label=SliceBackward0]
|
||
|
140509587658064 -> 140517615592544
|
||
|
140509587657056 -> 140509587643648
|
||
|
140509587657056 [label=TBackward0]
|
||
|
140517615592112 -> 140509587657056
|
||
|
140517615592112 [label=ToCopyBackward0]
|
||
|
140517615592640 -> 140517615592112
|
||
|
140509590903200 [label="encoder.layer.2.intermediate.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590903200 -> 140517615592640
|
||
|
140517615592640 [label=AccumulateGrad]
|
||
|
140509587642640 -> 140509587642064
|
||
|
140509587642640 [label=TBackward0]
|
||
|
140509587643792 -> 140509587642640
|
||
|
140509587643792 [label=ToCopyBackward0]
|
||
|
140509587658832 -> 140509587643792
|
||
|
140509590902960 [label="encoder.layer.2.output.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590902960 -> 140509587658832
|
||
|
140509587658832 [label=AccumulateGrad]
|
||
|
140509587641344 -> 140509587641008
|
||
|
140509587640816 -> 140509587640480
|
||
|
140509590902720 [label="encoder.layer.2.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590902720 -> 140509587640816
|
||
|
140509587640816 [label=AccumulateGrad]
|
||
|
140509587640768 -> 140509587640480
|
||
|
140509590902800 [label="encoder.layer.2.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590902800 -> 140509587640768
|
||
|
140509587640768 [label=AccumulateGrad]
|
||
|
140509587627264 -> 140509587627744
|
||
|
140509587627264 [label=TBackward0]
|
||
|
140509587640384 -> 140509587627264
|
||
|
140509587640384 [label=ToCopyBackward0]
|
||
|
140509587641296 -> 140509587640384
|
||
|
140509590901040 [label="encoder.layer.3.attention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590901040 -> 140509587641296
|
||
|
140509587641296 [label=AccumulateGrad]
|
||
|
140509587627168 -> 140509587627120
|
||
|
140509587627168 [label=ReshapeAliasBackward0]
|
||
|
140509587627504 -> 140509587627168
|
||
|
140509587627504 [label=ExpandBackward0]
|
||
|
140509587627696 -> 140509587627504
|
||
|
140509587627696 [label=TransposeBackward0]
|
||
|
140509587627888 -> 140509587627696
|
||
|
140509587627888 [label=PermuteBackward0]
|
||
|
140509587642448 -> 140509587627888
|
||
|
140509587642448 [label=ViewBackward0]
|
||
|
140509587640432 -> 140509587642448
|
||
|
140509587640432 [label=ViewBackward0]
|
||
|
140509587643360 -> 140509587640432
|
||
|
140509587643360 [label=AddmmBackward0]
|
||
|
140509587643984 -> 140509587643360
|
||
|
140509587643984 [label=ToCopyBackward0]
|
||
|
140517615592256 -> 140509587643984
|
||
|
140509590900880 [label="encoder.layer.3.attention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590900880 -> 140517615592256
|
||
|
140517615592256 [label=AccumulateGrad]
|
||
|
140509587640528 -> 140509587643360
|
||
|
140509587640528 [label=ViewBackward0]
|
||
|
140517615592688 -> 140509587640528
|
||
|
140517615592688 [label=ToCopyBackward0]
|
||
|
140509587625200 -> 140517615592688
|
||
|
140517615592352 -> 140509587643360
|
||
|
140517615592352 [label=TBackward0]
|
||
|
140517615592400 -> 140517615592352
|
||
|
140517615592400 [label=ToCopyBackward0]
|
||
|
140517615592832 -> 140517615592400
|
||
|
140509590900800 [label="encoder.layer.3.attention.self.key.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590900800 -> 140517615592832
|
||
|
140517615592832 [label=AccumulateGrad]
|
||
|
140509587626256 -> 140509587626208
|
||
|
140509587626256 [label=ReshapeAliasBackward0]
|
||
|
140509587626592 -> 140509587626256
|
||
|
140509587626592 [label=ExpandBackward0]
|
||
|
140509587626784 -> 140509587626592
|
||
|
140509587626784 [label=PermuteBackward0]
|
||
|
140509587626976 -> 140509587626784
|
||
|
140509587626976 [label=ViewBackward0]
|
||
|
140509587626352 -> 140509587626976
|
||
|
140509587626352 [label=ViewBackward0]
|
||
|
140509587627600 -> 140509587626352
|
||
|
140509587627600 [label=AddmmBackward0]
|
||
|
140509587627312 -> 140509587627600
|
||
|
140509587627312 [label=ToCopyBackward0]
|
||
|
140509587642976 -> 140509587627312
|
||
|
140509590896448 [label="encoder.layer.3.attention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590896448 -> 140509587642976
|
||
|
140509587642976 [label=AccumulateGrad]
|
||
|
140509587626400 -> 140509587627600
|
||
|
140509587626400 [label=ViewBackward0]
|
||
|
140517615592592 -> 140509587626400
|
||
|
140517615592592 [label=ToCopyBackward0]
|
||
|
140509587625200 -> 140517615592592
|
||
|
140509587641104 -> 140509587627600
|
||
|
140509587641104 [label=TBackward0]
|
||
|
140517615592496 -> 140509587641104
|
||
|
140517615592496 [label=ToCopyBackward0]
|
||
|
140517615592736 -> 140517615592496
|
||
|
140509590896368 [label="encoder.layer.3.attention.self.value.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590896368 -> 140517615592736
|
||
|
140517615592736 [label=AccumulateGrad]
|
||
|
140509587625296 -> 140509587625488
|
||
|
140509587625296 [label=TBackward0]
|
||
|
140509587625968 -> 140509587625296
|
||
|
140509587625968 [label=ToCopyBackward0]
|
||
|
140509587626160 -> 140509587625968
|
||
|
140509590896128 [label="encoder.layer.3.attention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590896128 -> 140509587626160
|
||
|
140509587626160 [label=AccumulateGrad]
|
||
|
140509587625200 -> 140509587625056
|
||
|
140509587625008 -> 140509587624960
|
||
|
140509590895888 [label="encoder.layer.3.attention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590895888 -> 140509587625008
|
||
|
140509587625008 [label=AccumulateGrad]
|
||
|
140509587624288 -> 140509587624960
|
||
|
140509590895968 [label="encoder.layer.3.attention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590895968 -> 140509587624288
|
||
|
140509587624288 [label=AccumulateGrad]
|
||
|
140509587624000 -> 140509587624096
|
||
|
140509587624000 [label=TBackward0]
|
||
|
140509587624336 -> 140509587624000
|
||
|
140509587624336 [label=ToCopyBackward0]
|
||
|
140509587624720 -> 140509587624336
|
||
|
140509590894208 [label="encoder.layer.3.experts.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590894208 -> 140509587624720
|
||
|
140509587624720 [label=AccumulateGrad]
|
||
|
140509587611024 -> 140509587611216
|
||
|
140509587611024 [label=TBackward0]
|
||
|
140509587611456 -> 140509587611024
|
||
|
140509587611456 [label=ToCopyBackward0]
|
||
|
140509587624528 -> 140509587611456
|
||
|
140509590893968 [label="encoder.layer.3.experts.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590893968 -> 140509587624528
|
||
|
140509587624528 [label=AccumulateGrad]
|
||
|
140509587610928 -> 140509587610784
|
||
|
140509587610736 -> 140509587610640
|
||
|
140509590893728 [label="encoder.layer.3.experts.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590893728 -> 140509587610736
|
||
|
140509587610736 [label=AccumulateGrad]
|
||
|
140509587610688 -> 140509587610640
|
||
|
140509590893808 [label="encoder.layer.3.experts.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590893808 -> 140509587610688
|
||
|
140509587610688 [label=AccumulateGrad]
|
||
|
140509587610400 -> 140509587607664
|
||
|
140509587610400 [label=NativeLayerNormBackward0]
|
||
|
140509587611072 -> 140509587610400
|
||
|
140509587611072 [label=AddBackward0]
|
||
|
140509587611600 -> 140509587611072
|
||
|
140509587611600 [label=NativeDropoutBackward0]
|
||
|
140509587624048 -> 140509587611600
|
||
|
140509587624048 [label=ViewBackward0]
|
||
|
140509587624480 -> 140509587624048
|
||
|
140509587624480 [label=AddmmBackward0]
|
||
|
140509587625152 -> 140509587624480
|
||
|
140509587625152 [label=ToCopyBackward0]
|
||
|
140509587625680 -> 140509587625152
|
||
|
140509590895488 [label="encoder.layer.3.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590895488 -> 140509587625680
|
||
|
140509587625680 [label=AccumulateGrad]
|
||
|
140509587625104 -> 140509587624480
|
||
|
140509587625104 [label=ViewBackward0]
|
||
|
140509587626064 -> 140509587625104
|
||
|
140509587626064 [label=GeluBackward0]
|
||
|
140509587625728 -> 140509587626064
|
||
|
140509587625728 [label=ViewBackward0]
|
||
|
140509587626688 -> 140509587625728
|
||
|
140509587626688 [label=AddmmBackward0]
|
||
|
140509587627072 -> 140509587626688
|
||
|
140509587627072 [label=ToCopyBackward0]
|
||
|
140509587642208 -> 140509587627072
|
||
|
140509590895728 [label="encoder.layer.3.intermediate.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590895728 -> 140509587642208
|
||
|
140509587642208 [label=AccumulateGrad]
|
||
|
140509587626880 -> 140509587626688
|
||
|
140509587626880 [label=ViewBackward0]
|
||
|
140517615593024 -> 140509587626880
|
||
|
140517615593024 [label=ToCopyBackward0]
|
||
|
140509587611360 -> 140517615593024
|
||
|
140509587611360 [label=SliceBackward0]
|
||
|
140517615593072 -> 140509587611360
|
||
|
140517615593072 [label=SliceBackward0]
|
||
|
140517615593168 -> 140517615593072
|
||
|
140517615593168 [label=SliceBackward0]
|
||
|
140509587624960 -> 140517615593168
|
||
|
140509587625632 -> 140509587626688
|
||
|
140509587625632 [label=TBackward0]
|
||
|
140517615592784 -> 140509587625632
|
||
|
140517615592784 [label=ToCopyBackward0]
|
||
|
140517615593264 -> 140517615592784
|
||
|
140509590895648 [label="encoder.layer.3.intermediate.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590895648 -> 140517615593264
|
||
|
140517615593264 [label=AccumulateGrad]
|
||
|
140509587624912 -> 140509587624480
|
||
|
140509587624912 [label=TBackward0]
|
||
|
140509587625872 -> 140509587624912
|
||
|
140509587625872 [label=ToCopyBackward0]
|
||
|
140509587627408 -> 140509587625872
|
||
|
140509590895408 [label="encoder.layer.3.output.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590895408 -> 140509587627408
|
||
|
140509587627408 [label=AccumulateGrad]
|
||
|
140509587611360 -> 140509587611072
|
||
|
140509587610880 -> 140509587610400
|
||
|
140509590895168 [label="encoder.layer.3.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590895168 -> 140509587610880
|
||
|
140509587610880 [label=AccumulateGrad]
|
||
|
140509587610832 -> 140509587610400
|
||
|
140509590895248 [label="encoder.layer.3.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590895248 -> 140509587610832
|
||
|
140509587610832 [label=AccumulateGrad]
|
||
|
140509587609680 -> 140509587610160
|
||
|
140509587609680 [label=TBackward0]
|
||
|
140509587610352 -> 140509587609680
|
||
|
140509587610352 [label=ToCopyBackward0]
|
||
|
140509587611168 -> 140509587610352
|
||
|
140509590893488 [label="encoder.layer.4.attention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590893488 -> 140509587611168
|
||
|
140509587611168 [label=AccumulateGrad]
|
||
|
140509587609584 -> 140509587609536
|
||
|
140509587609584 [label=ReshapeAliasBackward0]
|
||
|
140509587609920 -> 140509587609584
|
||
|
140509587609920 [label=ExpandBackward0]
|
||
|
140509587610112 -> 140509587609920
|
||
|
140509587610112 [label=TransposeBackward0]
|
||
|
140509587610592 -> 140509587610112
|
||
|
140509587610592 [label=PermuteBackward0]
|
||
|
140509587610544 -> 140509587610592
|
||
|
140509587610544 [label=ViewBackward0]
|
||
|
140509587624240 -> 140509587610544
|
||
|
140509587624240 [label=ViewBackward0]
|
||
|
140509587625440 -> 140509587624240
|
||
|
140509587625440 [label=AddmmBackward0]
|
||
|
140509587626496 -> 140509587625440
|
||
|
140509587626496 [label=ToCopyBackward0]
|
||
|
140517615592976 -> 140509587626496
|
||
|
140509590893328 [label="encoder.layer.4.attention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590893328 -> 140517615592976
|
||
|
140517615592976 [label=AccumulateGrad]
|
||
|
140509587624816 -> 140509587625440
|
||
|
140509587624816 [label=ViewBackward0]
|
||
|
140517615593312 -> 140509587624816
|
||
|
140517615593312 [label=ToCopyBackward0]
|
||
|
140509587607664 -> 140517615593312
|
||
|
140517615592880 -> 140509587625440
|
||
|
140517615592880 [label=TBackward0]
|
||
|
140517615592928 -> 140517615592880
|
||
|
140517615592928 [label=ToCopyBackward0]
|
||
|
140517615593456 -> 140517615592928
|
||
|
140509590893248 [label="encoder.layer.4.attention.self.key.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590893248 -> 140517615593456
|
||
|
140517615593456 [label=AccumulateGrad]
|
||
|
140509587608672 -> 140509587608624
|
||
|
140509587608672 [label=ReshapeAliasBackward0]
|
||
|
140509587609008 -> 140509587608672
|
||
|
140509587609008 [label=ExpandBackward0]
|
||
|
140509587609200 -> 140509587609008
|
||
|
140509587609200 [label=PermuteBackward0]
|
||
|
140509587609392 -> 140509587609200
|
||
|
140509587609392 [label=ViewBackward0]
|
||
|
140509587608768 -> 140509587609392
|
||
|
140509587608768 [label=ViewBackward0]
|
||
|
140509587610016 -> 140509587608768
|
||
|
140509587610016 [label=AddmmBackward0]
|
||
|
140509587609728 -> 140509587610016
|
||
|
140509587609728 [label=ToCopyBackward0]
|
||
|
140509587625344 -> 140509587609728
|
||
|
140509590893088 [label="encoder.layer.4.attention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590893088 -> 140509587625344
|
||
|
140509587625344 [label=AccumulateGrad]
|
||
|
140509587610304 -> 140509587610016
|
||
|
140509587610304 [label=ViewBackward0]
|
||
|
140517615593216 -> 140509587610304
|
||
|
140517615593216 [label=ToCopyBackward0]
|
||
|
140509587607664 -> 140517615593216
|
||
|
140509587608816 -> 140509587610016
|
||
|
140509587608816 [label=TBackward0]
|
||
|
140517615593120 -> 140509587608816
|
||
|
140517615593120 [label=ToCopyBackward0]
|
||
|
140517615593360 -> 140517615593120
|
||
|
140509590893008 [label="encoder.layer.4.attention.self.value.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590893008 -> 140517615593360
|
||
|
140517615593360 [label=AccumulateGrad]
|
||
|
140509587607712 -> 140509587607904
|
||
|
140509587607712 [label=TBackward0]
|
||
|
140509587608384 -> 140509587607712
|
||
|
140509587608384 [label=ToCopyBackward0]
|
||
|
140509587608576 -> 140509587608384
|
||
|
140509590892768 [label="encoder.layer.4.attention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590892768 -> 140509587608576
|
||
|
140509587608576 [label=AccumulateGrad]
|
||
|
140509587607664 -> 140509587595120
|
||
|
140509587595072 -> 140509587595024
|
||
|
140509590892608 [label="encoder.layer.4.attention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590892608 -> 140509587595072
|
||
|
140509587595072 [label=AccumulateGrad]
|
||
|
140509587594544 -> 140509587595024
|
||
|
140509590876048 [label="encoder.layer.4.attention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590876048 -> 140509587594544
|
||
|
140509587594544 [label=AccumulateGrad]
|
||
|
140509587593872 -> 140509587594352
|
||
|
140509587593872 [label=TBackward0]
|
||
|
140509587594592 -> 140509587593872
|
||
|
140509587594592 [label=ToCopyBackward0]
|
||
|
140509587594976 -> 140509587594592
|
||
|
140509590875808 [label="encoder.layer.4.crossattention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590875808 -> 140509587594976
|
||
|
140509587594976 [label=AccumulateGrad]
|
||
|
140509587593776 -> 140509587593728
|
||
|
140509587593776 [label=ReshapeAliasBackward0]
|
||
|
140509587594112 -> 140509587593776
|
||
|
140509587594112 [label=ExpandBackward0]
|
||
|
140509587594304 -> 140509587594112
|
||
|
140509587594304 [label=TransposeBackward0]
|
||
|
140509587594784 -> 140509587594304
|
||
|
140509587594784 [label=PermuteBackward0]
|
||
|
140509587595168 -> 140509587594784
|
||
|
140509587595168 [label=ViewBackward0]
|
||
|
140509587594736 -> 140509587595168
|
||
|
140509587594736 [label=ViewBackward0]
|
||
|
140509587607856 -> 140509587594736
|
||
|
140509587607856 [label=AddmmBackward0]
|
||
|
140509587608096 -> 140509587607856
|
||
|
140509587608096 [label=ToCopyBackward0]
|
||
|
140509587608288 -> 140509587608096
|
||
|
140509590875648 [label="encoder.layer.4.crossattention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590875648 -> 140509587608288
|
||
|
140509587608288 [label=AccumulateGrad]
|
||
|
140509587608048 -> 140509587607856
|
||
|
140509587608048 [label=ViewBackward0]
|
||
|
140509587609104 -> 140509587608048
|
||
|
140509587609104 [label=ToCopyBackward0]
|
||
|
140517615539152 -> 140509587609104
|
||
|
140509587607616 -> 140509587607856
|
||
|
140509587607616 [label=TBackward0]
|
||
|
140509587608912 -> 140509587607616
|
||
|
140509587608912 [label=ToCopyBackward0]
|
||
|
140509587609824 -> 140509587608912
|
||
|
140509590875568 [label="encoder.layer.4.crossattention.self.key.weight
|
||
|
(768, 1408)" fillcolor=lightblue]
|
||
|
140509590875568 -> 140509587609824
|
||
|
140509587609824 [label=AccumulateGrad]
|
||
|
140509587592864 -> 140509587592816
|
||
|
140509587592864 [label=ReshapeAliasBackward0]
|
||
|
140509587593200 -> 140509587592864
|
||
|
140509587593200 [label=ExpandBackward0]
|
||
|
140509587593392 -> 140509587593200
|
||
|
140509587593392 [label=PermuteBackward0]
|
||
|
140509587593584 -> 140509587593392
|
||
|
140509587593584 [label=ViewBackward0]
|
||
|
140509587592960 -> 140509587593584
|
||
|
140509587592960 [label=ViewBackward0]
|
||
|
140509587594208 -> 140509587592960
|
||
|
140509587594208 [label=AddmmBackward0]
|
||
|
140509587594880 -> 140509587594208
|
||
|
140509587594880 [label=ToCopyBackward0]
|
||
|
140509587624624 -> 140509587594880
|
||
|
140509590875408 [label="encoder.layer.4.crossattention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590875408 -> 140509587624624
|
||
|
140509587624624 [label=AccumulateGrad]
|
||
|
140509587594496 -> 140509587594208
|
||
|
140509587594496 [label=ViewBackward0]
|
||
|
140509587609488 -> 140509587594496
|
||
|
140509587609488 [label=ToCopyBackward0]
|
||
|
140517615539152 -> 140509587609488
|
||
|
140509587593008 -> 140509587594208
|
||
|
140509587593008 [label=TBackward0]
|
||
|
140509587607760 -> 140509587593008
|
||
|
140509587607760 [label=ToCopyBackward0]
|
||
|
140509587608480 -> 140509587607760
|
||
|
140509590875328 [label="encoder.layer.4.crossattention.self.value.weight
|
||
|
(768, 1408)" fillcolor=lightblue]
|
||
|
140509590875328 -> 140509587608480
|
||
|
140509587608480 [label=AccumulateGrad]
|
||
|
140509587591904 -> 140509587592096
|
||
|
140509587591904 [label=TBackward0]
|
||
|
140509587592576 -> 140509587591904
|
||
|
140509587592576 [label=ToCopyBackward0]
|
||
|
140509587592768 -> 140509587592576
|
||
|
140509590875088 [label="encoder.layer.4.crossattention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590875088 -> 140509587592768
|
||
|
140509587592768 [label=AccumulateGrad]
|
||
|
140509587591808 -> 140509587591664
|
||
|
140509587591616 -> 140509587591568
|
||
|
140509590874848 [label="encoder.layer.4.crossattention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590874848 -> 140509587591616
|
||
|
140509587591616 [label=AccumulateGrad]
|
||
|
140509587591376 -> 140509587591568
|
||
|
140509590874928 [label="encoder.layer.4.crossattention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590874928 -> 140509587591376
|
||
|
140509587591376 [label=AccumulateGrad]
|
||
|
140509587574256 -> 140509587574544
|
||
|
140509587574256 [label=TBackward0]
|
||
|
140509587591328 -> 140509587574256
|
||
|
140509587591328 [label=ToCopyBackward0]
|
||
|
140509587591712 -> 140509587591328
|
||
|
140509590873168 [label="encoder.layer.4.experts.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590873168 -> 140509587591712
|
||
|
140509587591712 [label=AccumulateGrad]
|
||
|
140509587573824 -> 140509587574016
|
||
|
140509587573824 [label=TBackward0]
|
||
|
140509587574496 -> 140509587573824
|
||
|
140509587574496 [label=ToCopyBackward0]
|
||
|
140509587574688 -> 140509587574496
|
||
|
140509590872928 [label="encoder.layer.4.experts.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590872928 -> 140509587574688
|
||
|
140509587574688 [label=AccumulateGrad]
|
||
|
140509587573728 -> 140509587573584
|
||
|
140509587573536 -> 140509587573440
|
||
|
140509590872688 [label="encoder.layer.4.experts.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590872688 -> 140509587573536
|
||
|
140509587573536 [label=AccumulateGrad]
|
||
|
140509587573488 -> 140509587573440
|
||
|
140509590872768 [label="encoder.layer.4.experts.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590872768 -> 140509587573488
|
||
|
140509587573488 [label=AccumulateGrad]
|
||
|
140509587573200 -> 140509587562112
|
||
|
140509587573200 [label=NativeLayerNormBackward0]
|
||
|
140509587573872 -> 140509587573200
|
||
|
140509587573872 [label=AddBackward0]
|
||
|
140509587574400 -> 140509587573872
|
||
|
140509587574400 [label=NativeDropoutBackward0]
|
||
|
140509587591424 -> 140509587574400
|
||
|
140509587591424 [label=ViewBackward0]
|
||
|
140509587591280 -> 140509587591424
|
||
|
140509587591280 [label=AddmmBackward0]
|
||
|
140509587592240 -> 140509587591280
|
||
|
140509587592240 [label=ToCopyBackward0]
|
||
|
140509587592336 -> 140509587592240
|
||
|
140509590874448 [label="encoder.layer.4.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590874448 -> 140509587592336
|
||
|
140509587592336 [label=AccumulateGrad]
|
||
|
140509587592048 -> 140509587591280
|
||
|
140509587592048 [label=ViewBackward0]
|
||
|
140509587592480 -> 140509587592048
|
||
|
140509587592480 [label=GeluBackward0]
|
||
|
140509587593488 -> 140509587592480
|
||
|
140509587593488 [label=ViewBackward0]
|
||
|
140509587594016 -> 140509587593488
|
||
|
140509587594016 [label=AddmmBackward0]
|
||
|
140509587593920 -> 140509587594016
|
||
|
140509587593920 [label=ToCopyBackward0]
|
||
|
140517615593504 -> 140509587593920
|
||
|
140509590874688 [label="encoder.layer.4.intermediate.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590874688 -> 140517615593504
|
||
|
140517615593504 [label=AccumulateGrad]
|
||
|
140509587593104 -> 140509587594016
|
||
|
140509587593104 [label=ViewBackward0]
|
||
|
140517615593600 -> 140509587593104
|
||
|
140517615593600 [label=ToCopyBackward0]
|
||
|
140509587574208 -> 140517615593600
|
||
|
140509587574208 [label=SliceBackward0]
|
||
|
140517615593744 -> 140509587574208
|
||
|
140517615593744 [label=SliceBackward0]
|
||
|
140517615593840 -> 140517615593744
|
||
|
140517615593840 [label=SliceBackward0]
|
||
|
140509587595024 -> 140517615593840
|
||
|
140509587609296 -> 140509587594016
|
||
|
140509587609296 [label=TBackward0]
|
||
|
140517615593408 -> 140509587609296
|
||
|
140517615593408 [label=ToCopyBackward0]
|
||
|
140517615593936 -> 140517615593408
|
||
|
140509590874608 [label="encoder.layer.4.intermediate.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590874608 -> 140517615593936
|
||
|
140517615593936 [label=AccumulateGrad]
|
||
|
140509587591952 -> 140509587591280
|
||
|
140509587591952 [label=TBackward0]
|
||
|
140509587593680 -> 140509587591952
|
||
|
140509587593680 [label=ToCopyBackward0]
|
||
|
140509587608144 -> 140509587593680
|
||
|
140509590874368 [label="encoder.layer.4.output.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590874368 -> 140509587608144
|
||
|
140509587608144 [label=AccumulateGrad]
|
||
|
140509587574208 -> 140509587573872
|
||
|
140509587573680 -> 140509587573200
|
||
|
140509590874128 [label="encoder.layer.4.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590874128 -> 140509587573680
|
||
|
140509587573680 [label=AccumulateGrad]
|
||
|
140509587573632 -> 140509587573200
|
||
|
140509590874208 [label="encoder.layer.4.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590874208 -> 140509587573632
|
||
|
140509587573632 [label=AccumulateGrad]
|
||
|
140509587572480 -> 140509587572960
|
||
|
140509587572480 [label=TBackward0]
|
||
|
140509587573152 -> 140509587572480
|
||
|
140509587573152 [label=ToCopyBackward0]
|
||
|
140509587574160 -> 140509587573152
|
||
|
140509590872448 [label="encoder.layer.5.attention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590872448 -> 140509587574160
|
||
|
140509587574160 [label=AccumulateGrad]
|
||
|
140509587572384 -> 140509587572336
|
||
|
140509587572384 [label=ReshapeAliasBackward0]
|
||
|
140509587572720 -> 140509587572384
|
||
|
140509587572720 [label=ExpandBackward0]
|
||
|
140509587572912 -> 140509587572720
|
||
|
140509587572912 [label=TransposeBackward0]
|
||
|
140509587573392 -> 140509587572912
|
||
|
140509587573392 [label=PermuteBackward0]
|
||
|
140509587573344 -> 140509587573392
|
||
|
140509587573344 [label=ViewBackward0]
|
||
|
140509587572528 -> 140509587573344
|
||
|
140509587572528 [label=ViewBackward0]
|
||
|
140509587592672 -> 140509587572528
|
||
|
140509587592672 [label=AddmmBackward0]
|
||
|
140509587593296 -> 140509587592672
|
||
|
140509587593296 [label=ToCopyBackward0]
|
||
|
140517615593552 -> 140509587593296
|
||
|
140509590872288 [label="encoder.layer.5.attention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590872288 -> 140517615593552
|
||
|
140517615593552 [label=AccumulateGrad]
|
||
|
140509587591760 -> 140509587592672
|
||
|
140509587591760 [label=ViewBackward0]
|
||
|
140517615593984 -> 140509587591760
|
||
|
140517615593984 [label=ToCopyBackward0]
|
||
|
140509587562112 -> 140517615593984
|
||
|
140517615593648 -> 140509587592672
|
||
|
140517615593648 [label=TBackward0]
|
||
|
140517615593696 -> 140517615593648
|
||
|
140517615593696 [label=ToCopyBackward0]
|
||
|
140517615594128 -> 140517615593696
|
||
|
140509590872208 [label="encoder.layer.5.attention.self.key.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590872208 -> 140517615594128
|
||
|
140517615594128 [label=AccumulateGrad]
|
||
|
140509587571472 -> 140509587571424
|
||
|
140509587571472 [label=ReshapeAliasBackward0]
|
||
|
140509587571808 -> 140509587571472
|
||
|
140509587571808 [label=ExpandBackward0]
|
||
|
140509587572000 -> 140509587571808
|
||
|
140509587572000 [label=PermuteBackward0]
|
||
|
140509587572192 -> 140509587572000
|
||
|
140509587572192 [label=ViewBackward0]
|
||
|
140509587571568 -> 140509587572192
|
||
|
140509587571568 [label=ViewBackward0]
|
||
|
140509587572816 -> 140509587571568
|
||
|
140509587572816 [label=AddmmBackward0]
|
||
|
140509587573968 -> 140509587572816
|
||
|
140509587573968 [label=ToCopyBackward0]
|
||
|
140509587592288 -> 140509587573968
|
||
|
140509590859664 [label="encoder.layer.5.attention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590859664 -> 140509587592288
|
||
|
140509587592288 [label=AccumulateGrad]
|
||
|
140509587573104 -> 140509587572816
|
||
|
140509587573104 [label=ViewBackward0]
|
||
|
140517615593888 -> 140509587573104
|
||
|
140517615593888 [label=ToCopyBackward0]
|
||
|
140509587562112 -> 140517615593888
|
||
|
140509587571616 -> 140509587572816
|
||
|
140509587571616 [label=TBackward0]
|
||
|
140517615593792 -> 140509587571616
|
||
|
140517615593792 [label=ToCopyBackward0]
|
||
|
140517615594032 -> 140517615593792
|
||
|
140509590859584 [label="encoder.layer.5.attention.self.value.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590859584 -> 140517615594032
|
||
|
140517615594032 [label=AccumulateGrad]
|
||
|
140509587570752 -> 140509587562400
|
||
|
140509587570752 [label=TBackward0]
|
||
|
140509587571184 -> 140509587570752
|
||
|
140509587571184 [label=ToCopyBackward0]
|
||
|
140509587571376 -> 140509587571184
|
||
|
140509590859344 [label="encoder.layer.5.attention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590859344 -> 140509587571376
|
||
|
140509587571376 [label=AccumulateGrad]
|
||
|
140509587562112 -> 140509587561968
|
||
|
140509587561920 -> 140509587561872
|
||
|
140509590859104 [label="encoder.layer.5.attention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590859104 -> 140509587561920
|
||
|
140509587561920 [label=AccumulateGrad]
|
||
|
140509587561200 -> 140509587561872
|
||
|
140509590859184 [label="encoder.layer.5.attention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590859184 -> 140509587561200
|
||
|
140509587561200 [label=AccumulateGrad]
|
||
|
140509587560720 -> 140509587561008
|
||
|
140509587560720 [label=TBackward0]
|
||
|
140509587561248 -> 140509587560720
|
||
|
140509587561248 [label=ToCopyBackward0]
|
||
|
140509587561632 -> 140509587561248
|
||
|
140509590857424 [label="encoder.layer.5.experts.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590857424 -> 140509587561632
|
||
|
140509587561632 [label=AccumulateGrad]
|
||
|
140509587560288 -> 140509587560480
|
||
|
140509587560288 [label=TBackward0]
|
||
|
140509587560960 -> 140509587560288
|
||
|
140509587560960 [label=ToCopyBackward0]
|
||
|
140509587561440 -> 140509587560960
|
||
|
140509590857184 [label="encoder.layer.5.experts.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590857184 -> 140509587561440
|
||
|
140509587561440 [label=AccumulateGrad]
|
||
|
140509587560192 -> 140509587560048
|
||
|
140509587560000 -> 140509587559904
|
||
|
140509590856944 [label="encoder.layer.5.experts.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590856944 -> 140509587560000
|
||
|
140509587560000 [label=AccumulateGrad]
|
||
|
140509587559952 -> 140509587559904
|
||
|
140509590857024 [label="encoder.layer.5.experts.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590857024 -> 140509587559952
|
||
|
140509587559952 [label=AccumulateGrad]
|
||
|
140509587559664 -> 140509587850432
|
||
|
140509587559664 [label=NativeLayerNormBackward0]
|
||
|
140509587560336 -> 140509587559664
|
||
|
140509587560336 [label=AddBackward0]
|
||
|
140509587561152 -> 140509587560336
|
||
|
140509587561152 [label=NativeDropoutBackward0]
|
||
|
140509587560864 -> 140509587561152
|
||
|
140509587560864 [label=ViewBackward0]
|
||
|
140509587561392 -> 140509587560864
|
||
|
140509587561392 [label=AddmmBackward0]
|
||
|
140509587562064 -> 140509587561392
|
||
|
140509587562064 [label=ToCopyBackward0]
|
||
|
140509587562352 -> 140509587562064
|
||
|
140509590858704 [label="encoder.layer.5.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590858704 -> 140509587562352
|
||
|
140509587562352 [label=AccumulateGrad]
|
||
|
140509587562016 -> 140509587561392
|
||
|
140509587562016 [label=ViewBackward0]
|
||
|
140509587571280 -> 140509587562016
|
||
|
140509587571280 [label=GeluBackward0]
|
||
|
140509587570848 -> 140509587571280
|
||
|
140509587570848 [label=ViewBackward0]
|
||
|
140509587571904 -> 140509587570848
|
||
|
140509587571904 [label=AddmmBackward0]
|
||
|
140509587572288 -> 140509587571904
|
||
|
140509587572288 [label=ToCopyBackward0]
|
||
|
140509587591520 -> 140509587572288
|
||
|
140509590858944 [label="encoder.layer.5.intermediate.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590858944 -> 140509587591520
|
||
|
140509587591520 [label=AccumulateGrad]
|
||
|
140509587572096 -> 140509587571904
|
||
|
140509587572096 [label=ViewBackward0]
|
||
|
140517615594320 -> 140509587572096
|
||
|
140517615594320 [label=ToCopyBackward0]
|
||
|
140509587560672 -> 140517615594320
|
||
|
140509587560672 [label=SliceBackward0]
|
||
|
140517615594368 -> 140509587560672
|
||
|
140517615594368 [label=SliceBackward0]
|
||
|
140517615594464 -> 140517615594368
|
||
|
140517615594464 [label=SliceBackward0]
|
||
|
140509587561872 -> 140517615594464
|
||
|
140509587571088 -> 140509587571904
|
||
|
140509587571088 [label=TBackward0]
|
||
|
140517615594080 -> 140509587571088
|
||
|
140517615594080 [label=ToCopyBackward0]
|
||
|
140517615594560 -> 140517615594080
|
||
|
140509590858864 [label="encoder.layer.5.intermediate.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590858864 -> 140517615594560
|
||
|
140517615594560 [label=AccumulateGrad]
|
||
|
140509587561824 -> 140509587561392
|
||
|
140509587561824 [label=TBackward0]
|
||
|
140509587571040 -> 140509587561824
|
||
|
140509587571040 [label=ToCopyBackward0]
|
||
|
140509587572624 -> 140509587571040
|
||
|
140509590858624 [label="encoder.layer.5.output.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590858624 -> 140509587572624
|
||
|
140509587572624 [label=AccumulateGrad]
|
||
|
140509587560672 -> 140509587560336
|
||
|
140509587560144 -> 140509587559664
|
||
|
140509590858384 [label="encoder.layer.5.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590858384 -> 140509587560144
|
||
|
140509587560144 [label=AccumulateGrad]
|
||
|
140509587560096 -> 140509587559664
|
||
|
140509590858464 [label="encoder.layer.5.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590858464 -> 140509587560096
|
||
|
140509587560096 [label=AccumulateGrad]
|
||
|
140509587558944 -> 140509587559424
|
||
|
140509587558944 [label=TBackward0]
|
||
|
140509587559616 -> 140509587558944
|
||
|
140509587559616 [label=ToCopyBackward0]
|
||
|
140509587560624 -> 140509587559616
|
||
|
140509590856704 [label="encoder.layer.6.attention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590856704 -> 140509587560624
|
||
|
140509587560624 [label=AccumulateGrad]
|
||
|
140509587558848 -> 140509587558800
|
||
|
140509587558848 [label=ReshapeAliasBackward0]
|
||
|
140509587559184 -> 140509587558848
|
||
|
140509587559184 [label=ExpandBackward0]
|
||
|
140509587559376 -> 140509587559184
|
||
|
140509587559376 [label=TransposeBackward0]
|
||
|
140509587559856 -> 140509587559376
|
||
|
140509587559856 [label=PermuteBackward0]
|
||
|
140509587561728 -> 140509587559856
|
||
|
140509587561728 [label=ViewBackward0]
|
||
|
140509587559808 -> 140509587561728
|
||
|
140509587559808 [label=ViewBackward0]
|
||
|
140509587562256 -> 140509587559808
|
||
|
140509587562256 [label=AddmmBackward0]
|
||
|
140509587571712 -> 140509587562256
|
||
|
140509587571712 [label=ToCopyBackward0]
|
||
|
140517615594272 -> 140509587571712
|
||
|
140509590856544 [label="encoder.layer.6.attention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590856544 -> 140517615594272
|
||
|
140517615594272 [label=AccumulateGrad]
|
||
|
140509587570800 -> 140509587562256
|
||
|
140509587570800 [label=ViewBackward0]
|
||
|
140517615594608 -> 140509587570800
|
||
|
140517615594608 [label=ToCopyBackward0]
|
||
|
140509587850432 -> 140517615594608
|
||
|
140517615594176 -> 140509587562256
|
||
|
140517615594176 [label=TBackward0]
|
||
|
140517615594224 -> 140517615594176
|
||
|
140517615594224 [label=ToCopyBackward0]
|
||
|
140517615594752 -> 140517615594224
|
||
|
140509590856464 [label="encoder.layer.6.attention.self.key.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590856464 -> 140517615594752
|
||
|
140517615594752 [label=AccumulateGrad]
|
||
|
140509587849376 -> 140509587849520
|
||
|
140509587849376 [label=ReshapeAliasBackward0]
|
||
|
140509587853120 -> 140509587849376
|
||
|
140509587853120 [label=ExpandBackward0]
|
||
|
140509587853216 -> 140509587853120
|
||
|
140509587853216 [label=PermuteBackward0]
|
||
|
140509587558656 -> 140509587853216
|
||
|
140509587558656 [label=ViewBackward0]
|
||
|
140509587558464 -> 140509587558656
|
||
|
140509587558464 [label=ViewBackward0]
|
||
|
140509587559280 -> 140509587558464
|
||
|
140509587559280 [label=AddmmBackward0]
|
||
|
140509587560432 -> 140509587559280
|
||
|
140509587560432 [label=ToCopyBackward0]
|
||
|
140509587558992 -> 140509587560432
|
||
|
140509590856304 [label="encoder.layer.6.attention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590856304 -> 140509587558992
|
||
|
140509587558992 [label=AccumulateGrad]
|
||
|
140509587559568 -> 140509587559280
|
||
|
140509587559568 [label=ViewBackward0]
|
||
|
140517615594512 -> 140509587559568
|
||
|
140517615594512 [label=ToCopyBackward0]
|
||
|
140509587850432 -> 140517615594512
|
||
|
140509587558512 -> 140509587559280
|
||
|
140509587558512 [label=TBackward0]
|
||
|
140517615594416 -> 140509587558512
|
||
|
140517615594416 [label=ToCopyBackward0]
|
||
|
140517615594656 -> 140517615594416
|
||
|
140509590856224 [label="encoder.layer.6.attention.self.value.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590856224 -> 140517615594656
|
||
|
140517615594656 [label=AccumulateGrad]
|
||
|
140509587850336 -> 140509587850144
|
||
|
140509587850336 [label=TBackward0]
|
||
|
140509587849664 -> 140509587850336
|
||
|
140509587849664 [label=ToCopyBackward0]
|
||
|
140509587849472 -> 140509587849664
|
||
|
140509590855984 [label="encoder.layer.6.attention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590855984 -> 140509587849472
|
||
|
140509587849472 [label=AccumulateGrad]
|
||
|
140509587850432 -> 140509587850672
|
||
|
140509587850624 -> 140509587850768
|
||
|
140509590855744 [label="encoder.layer.6.attention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590855744 -> 140509587850624
|
||
|
140509587850624 [label=AccumulateGrad]
|
||
|
140509587851248 -> 140509587850768
|
||
|
140509590855824 [label="encoder.layer.6.attention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590855824 -> 140509587851248
|
||
|
140509587851248 [label=AccumulateGrad]
|
||
|
140509587851920 -> 140509587851440
|
||
|
140509587851920 [label=TBackward0]
|
||
|
140509587851104 -> 140509587851920
|
||
|
140509587851104 [label=ToCopyBackward0]
|
||
|
140509587850720 -> 140509587851104
|
||
|
140509590843120 [label="encoder.layer.6.crossattention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590843120 -> 140509587850720
|
||
|
140509587850720 [label=AccumulateGrad]
|
||
|
140509587852016 -> 140509587851968
|
||
|
140509587852016 [label=ReshapeAliasBackward0]
|
||
|
140509587851584 -> 140509587852016
|
||
|
140509587851584 [label=ExpandBackward0]
|
||
|
140509587851392 -> 140509587851584
|
||
|
140509587851392 [label=TransposeBackward0]
|
||
|
140509587850912 -> 140509587851392
|
||
|
140509587850912 [label=PermuteBackward0]
|
||
|
140509587850576 -> 140509587850912
|
||
|
140509587850576 [label=ViewBackward0]
|
||
|
140509587851056 -> 140509587850576
|
||
|
140509587851056 [label=ViewBackward0]
|
||
|
140509587850288 -> 140509587851056
|
||
|
140509587850288 [label=AddmmBackward0]
|
||
|
140509587849952 -> 140509587850288
|
||
|
140509587849952 [label=ToCopyBackward0]
|
||
|
140509587849760 -> 140509587849952
|
||
|
140509590842960 [label="encoder.layer.6.crossattention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590842960 -> 140509587849760
|
||
|
140509587849760 [label=AccumulateGrad]
|
||
|
140509587850096 -> 140509587850288
|
||
|
140509587850096 [label=ViewBackward0]
|
||
|
140509587853024 -> 140509587850096
|
||
|
140509587853024 [label=ToCopyBackward0]
|
||
|
140517615539152 -> 140509587853024
|
||
|
140509587851776 -> 140509587850288
|
||
|
140509587851776 [label=TBackward0]
|
||
|
140509587850000 -> 140509587851776
|
||
|
140509587850000 [label=ToCopyBackward0]
|
||
|
140509587559088 -> 140509587850000
|
||
|
140509590842880 [label="encoder.layer.6.crossattention.self.key.weight
|
||
|
(768, 1408)" fillcolor=lightblue]
|
||
|
140509590842880 -> 140509587559088
|
||
|
140509587559088 [label=AccumulateGrad]
|
||
|
140509587852880 -> 140509587695984
|
||
|
140509587852880 [label=ReshapeAliasBackward0]
|
||
|
140509587852592 -> 140509587852880
|
||
|
140509587852592 [label=ExpandBackward0]
|
||
|
140509587852400 -> 140509587852592
|
||
|
140509587852400 [label=PermuteBackward0]
|
||
|
140509587852208 -> 140509587852400
|
||
|
140509587852208 [label=ViewBackward0]
|
||
|
140509587852736 -> 140509587852208
|
||
|
140509587852736 [label=ViewBackward0]
|
||
|
140509587851488 -> 140509587852736
|
||
|
140509587851488 [label=AddmmBackward0]
|
||
|
140509587850816 -> 140509587851488
|
||
|
140509587850816 [label=ToCopyBackward0]
|
||
|
140509587849328 -> 140509587850816
|
||
|
140509590842720 [label="encoder.layer.6.crossattention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590842720 -> 140509587849328
|
||
|
140509587849328 [label=AccumulateGrad]
|
||
|
140509587851200 -> 140509587851488
|
||
|
140509587851200 [label=ViewBackward0]
|
||
|
140509587850384 -> 140509587851200
|
||
|
140509587850384 [label=ToCopyBackward0]
|
||
|
140517615539152 -> 140509587850384
|
||
|
140509587852784 -> 140509587851488
|
||
|
140509587852784 [label=TBackward0]
|
||
|
140509587849568 -> 140509587852784
|
||
|
140509587849568 [label=ToCopyBackward0]
|
||
|
140509587561536 -> 140509587849568
|
||
|
140509590842640 [label="encoder.layer.6.crossattention.self.value.weight
|
||
|
(768, 1408)" fillcolor=lightblue]
|
||
|
140509590842640 -> 140509587561536
|
||
|
140509587561536 [label=AccumulateGrad]
|
||
|
140509587695216 -> 140509587695600
|
||
|
140509587695216 [label=TBackward0]
|
||
|
140509587697520 -> 140509587695216
|
||
|
140509587697520 [label=ToCopyBackward0]
|
||
|
140509587695552 -> 140509587697520
|
||
|
140509590842400 [label="encoder.layer.6.crossattention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590842400 -> 140509587695552
|
||
|
140509587695552 [label=AccumulateGrad]
|
||
|
140509587695120 -> 140509587694832
|
||
|
140509587696080 -> 140509587694592
|
||
|
140509590842160 [label="encoder.layer.6.crossattention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590842160 -> 140509587696080
|
||
|
140509587696080 [label=AccumulateGrad]
|
||
|
140509587697040 -> 140509587694592
|
||
|
140509590842240 [label="encoder.layer.6.crossattention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590842240 -> 140509587697040
|
||
|
140509587697040 [label=AccumulateGrad]
|
||
|
140509587697328 -> 140509587696464
|
||
|
140509587697328 [label=TBackward0]
|
||
|
140509587693632 -> 140509587697328
|
||
|
140509587693632 [label=ToCopyBackward0]
|
||
|
140509587694256 -> 140509587693632
|
||
|
140509590826016 [label="encoder.layer.6.experts.experts.0.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590826016 -> 140509587694256
|
||
|
140509587694256 [label=AccumulateGrad]
|
||
|
140509588196464 -> 140509588196752
|
||
|
140509588196464 [label=TBackward0]
|
||
|
140509588197136 -> 140509588196464
|
||
|
140509588197136 [label=ToCopyBackward0]
|
||
|
140509587693968 -> 140509588197136
|
||
|
140509590826176 [label="encoder.layer.6.experts.experts.0.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590826176 -> 140509587693968
|
||
|
140509587693968 [label=AccumulateGrad]
|
||
|
140509588196272 -> 140509588195888
|
||
|
140509588195984 -> 140509588195696
|
||
|
140509590825696 [label="encoder.layer.6.experts.experts.0.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590825696 -> 140509588195984
|
||
|
140509588195984 [label=AccumulateGrad]
|
||
|
140509588195456 -> 140509588195696
|
||
|
140509590826496 [label="encoder.layer.6.experts.experts.0.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590826496 -> 140509588195456
|
||
|
140509588195456 [label=AccumulateGrad]
|
||
|
140509588195408 -> 140509588195216
|
||
|
140509588195408 [label=UnsqueezeBackward0]
|
||
|
140509588195936 -> 140509588195408
|
||
|
140509588195936 [label=NativeLayerNormBackward0]
|
||
|
140509588196416 -> 140509588195936
|
||
|
140509588196416 [label=AddBackward0]
|
||
|
140509587694640 -> 140509588196416
|
||
|
140509587694640 [label=NativeDropoutBackward0]
|
||
|
140509587697424 -> 140509587694640
|
||
|
140509587697424 [label=ViewBackward0]
|
||
|
140509587693776 -> 140509587697424
|
||
|
140509587693776 [label=AddmmBackward0]
|
||
|
140509587694928 -> 140509587693776
|
||
|
140509587694928 [label=ToCopyBackward0]
|
||
|
140509587696848 -> 140509587694928
|
||
|
140509590825936 [label="encoder.layer.6.experts.experts.1.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590825936 -> 140509587696848
|
||
|
140509587696848 [label=AccumulateGrad]
|
||
|
140509587694736 -> 140509587693776
|
||
|
140509587694736 [label=ViewBackward0]
|
||
|
140509587695888 -> 140509587694736
|
||
|
140509587695888 [label=GeluBackward0]
|
||
|
140509587696176 -> 140509587695888
|
||
|
140509587696176 [label=ViewBackward0]
|
||
|
140509587695504 -> 140509587696176
|
||
|
140509587695504 [label=AddmmBackward0]
|
||
|
140509587852304 -> 140509587695504
|
||
|
140509587852304 [label=ToCopyBackward0]
|
||
|
140509587850528 -> 140509587852304
|
||
|
140509590825456 [label="encoder.layer.6.experts.experts.1.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590825456 -> 140509587850528
|
||
|
140509587850528 [label=AccumulateGrad]
|
||
|
140509587852496 -> 140509587695504
|
||
|
140509587852496 [label=ViewBackward0]
|
||
|
140509587558560 -> 140509587852496
|
||
|
140509587558560 [label=ToCopyBackward0]
|
||
|
140509588196272 -> 140509587558560
|
||
|
140509587852688 -> 140509587695504
|
||
|
140509587852688 [label=TBackward0]
|
||
|
140509587851680 -> 140509587852688
|
||
|
140509587851680 [label=ToCopyBackward0]
|
||
|
140517615594800 -> 140509587851680
|
||
|
140509590825536 [label="encoder.layer.6.experts.experts.1.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590825536 -> 140517615594800
|
||
|
140517615594800 [label=AccumulateGrad]
|
||
|
140509587697136 -> 140509587693776
|
||
|
140509587697136 [label=TBackward0]
|
||
|
140509587695312 -> 140509587697136
|
||
|
140509587695312 [label=ToCopyBackward0]
|
||
|
140509587558752 -> 140509587695312
|
||
|
140509590825296 [label="encoder.layer.6.experts.experts.1.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590825296 -> 140509587558752
|
||
|
140509587558752 [label=AccumulateGrad]
|
||
|
140509588196272 -> 140509588196416
|
||
|
140509588196368 -> 140509588195936
|
||
|
140509590825056 [label="encoder.layer.6.experts.experts.1.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590825056 -> 140509588196368
|
||
|
140509588196368 [label=AccumulateGrad]
|
||
|
140509588195792 -> 140509588195936
|
||
|
140509590824976 [label="encoder.layer.6.experts.experts.1.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590824976 -> 140509588195792
|
||
|
140509588195792 [label=AccumulateGrad]
|
||
|
140509588195312 -> 140509588194976
|
||
|
140509588195312 [label=UnsqueezeBackward0]
|
||
|
140509588196848 -> 140509588195312
|
||
|
140509588196848 [label=UnsqueezeBackward0]
|
||
|
140509588195504 -> 140509588196848
|
||
|
140509588195504 [label=MulBackward0]
|
||
|
140509587695024 -> 140509588195504
|
||
|
140509587695024 [label=ViewBackward0]
|
||
|
140509587696656 -> 140509587695024
|
||
|
140509587696656 [label=CloneBackward0]
|
||
|
140509587852832 -> 140509587696656
|
||
|
140509587852832 [label=ExpandBackward0]
|
||
|
140517615594896 -> 140509587852832
|
||
|
140517615594896 [label=UnsqueezeBackward0]
|
||
|
140517615594992 -> 140517615594896
|
||
|
140517615594992 [label=SoftmaxBackward0]
|
||
|
140517615595088 -> 140517615594992
|
||
|
140517615595088 [label=MmBackward0]
|
||
|
140517615595184 -> 140517615595088
|
||
|
140517615595184 [label=ToCopyBackward0]
|
||
|
140517615595328 -> 140517615595184
|
||
|
140517615595328 [label=DivBackward0]
|
||
|
140517615595424 -> 140517615595328
|
||
|
140517615595424 [label=SumBackward1]
|
||
|
140517615595472 -> 140517615595424
|
||
|
140517615595472 [label=MulBackward0]
|
||
|
140509587694352 -> 140517615595472
|
||
|
140517615595136 -> 140517615595088
|
||
|
140517615595136 [label=TBackward0]
|
||
|
140517615595232 -> 140517615595136
|
||
|
140517615595232 [label=ToCopyBackward0]
|
||
|
140517615595280 -> 140517615595232
|
||
|
140509590839840 [label="encoder.layer.6.experts.gate.weight
|
||
|
(2, 768)" fillcolor=lightblue]
|
||
|
140509590839840 -> 140517615595280
|
||
|
140517615595280 [label=AccumulateGrad]
|
||
|
140509588194448 -> 140509588165008
|
||
|
140509588194448 [label=ViewBackward0]
|
||
|
140509588196080 -> 140509588194448
|
||
|
140509588196080 [label=CloneBackward0]
|
||
|
140509588195120 -> 140509588196080
|
||
|
140509588195120 [label=ExpandBackward0]
|
||
|
140509587852112 -> 140509588195120
|
||
|
140509587852112 [label=UnsqueezeBackward0]
|
||
|
140509587694160 -> 140509587852112
|
||
|
140509587694160 [label=NativeLayerNormBackward0]
|
||
|
140517615594848 -> 140509587694160
|
||
|
140517615594848 [label=AddBackward0]
|
||
|
140517615726656 -> 140517615594848
|
||
|
140517615726656 [label=NativeDropoutBackward0]
|
||
|
140517615726896 -> 140517615726656
|
||
|
140517615726896 [label=ViewBackward0]
|
||
|
140517615726992 -> 140517615726896
|
||
|
140517615726992 [label=AddmmBackward0]
|
||
|
140517615727088 -> 140517615726992
|
||
|
140517615727088 [label=ToCopyBackward0]
|
||
|
140517615727280 -> 140517615727088
|
||
|
140509590841760 [label="encoder.layer.6.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590841760 -> 140517615727280
|
||
|
140517615727280 [label=AccumulateGrad]
|
||
|
140517615727040 -> 140517615726992
|
||
|
140517615727040 [label=ViewBackward0]
|
||
|
140517615727328 -> 140517615727040
|
||
|
140517615727328 [label=GeluBackward0]
|
||
|
140517615727424 -> 140517615727328
|
||
|
140517615727424 [label=ViewBackward0]
|
||
|
140517615727520 -> 140517615727424
|
||
|
140517615727520 [label=AddmmBackward0]
|
||
|
140517615727616 -> 140517615727520
|
||
|
140517615727616 [label=ToCopyBackward0]
|
||
|
140517615727808 -> 140517615727616
|
||
|
140509590842000 [label="encoder.layer.6.intermediate.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590842000 -> 140517615727808
|
||
|
140517615727808 [label=AccumulateGrad]
|
||
|
140517615727568 -> 140517615727520
|
||
|
140517615727568 [label=ViewBackward0]
|
||
|
140517615727856 -> 140517615727568
|
||
|
140517615727856 [label=ToCopyBackward0]
|
||
|
140517615726800 -> 140517615727856
|
||
|
140517615726800 [label=SliceBackward0]
|
||
|
140517615728000 -> 140517615726800
|
||
|
140517615728000 [label=SliceBackward0]
|
||
|
140517615728096 -> 140517615728000
|
||
|
140517615728096 [label=SliceBackward0]
|
||
|
140509587850768 -> 140517615728096
|
||
|
140517615727232 -> 140517615727520
|
||
|
140517615727232 [label=TBackward0]
|
||
|
140517615727760 -> 140517615727232
|
||
|
140517615727760 [label=ToCopyBackward0]
|
||
|
140517615728192 -> 140517615727760
|
||
|
140509590841920 [label="encoder.layer.6.intermediate.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509590841920 -> 140517615728192
|
||
|
140517615728192 [label=AccumulateGrad]
|
||
|
140517615726752 -> 140517615726992
|
||
|
140517615726752 [label=TBackward0]
|
||
|
140517615727472 -> 140517615726752
|
||
|
140517615727472 [label=ToCopyBackward0]
|
||
|
140517615727952 -> 140517615727472
|
||
|
140509590841680 [label="encoder.layer.6.output.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590841680 -> 140517615727952
|
||
|
140517615727952 [label=AccumulateGrad]
|
||
|
140517615726800 -> 140517615594848
|
||
|
140517615595040 -> 140509587694160
|
||
|
140509590841440 [label="encoder.layer.6.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590841440 -> 140517615595040
|
||
|
140517615595040 [label=AccumulateGrad]
|
||
|
140517615594944 -> 140509587694160
|
||
|
140509590841520 [label="encoder.layer.6.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590841520 -> 140517615594944
|
||
|
140517615594944 [label=AccumulateGrad]
|
||
|
140509588193344 -> 140509588194160
|
||
|
140509588193344 [label=TBackward0]
|
||
|
140509588194544 -> 140509588193344
|
||
|
140509588194544 [label=ToCopyBackward0]
|
||
|
140509588194928 -> 140509588194544
|
||
|
140509590840000 [label="encoder.layer.7.attention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590840000 -> 140509588194928
|
||
|
140509588194928 [label=AccumulateGrad]
|
||
|
140509588168464 -> 140509588168176
|
||
|
140509588168464 [label=UnsafeViewBackward0]
|
||
|
140509588168560 -> 140509588168464
|
||
|
140509588168560 [label=CloneBackward0]
|
||
|
140509588193776 -> 140509588168560
|
||
|
140509588193776 [label=ExpandBackward0]
|
||
|
140509588194256 -> 140509588193776
|
||
|
140509588194256 [label=TransposeBackward0]
|
||
|
140509588194832 -> 140509588194256
|
||
|
140509588194832 [label=PermuteBackward0]
|
||
|
140509587694448 -> 140509588194832
|
||
|
140509587694448 [label=ViewBackward0]
|
||
|
140517615595376 -> 140509587694448
|
||
|
140517615595376 [label=ViewBackward0]
|
||
|
140509588193392 -> 140517615595376
|
||
|
140509588193392 [label=AddmmBackward0]
|
||
|
140517615727136 -> 140509588193392
|
||
|
140517615727136 [label=ToCopyBackward0]
|
||
|
140517615728048 -> 140517615727136
|
||
|
140509590840560 [label="encoder.layer.7.attention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590840560 -> 140517615728048
|
||
|
140517615728048 [label=AccumulateGrad]
|
||
|
140517615726944 -> 140509588193392
|
||
|
140517615726944 [label=ViewBackward0]
|
||
|
140517615727376 -> 140517615726944
|
||
|
140517615727376 [label=ToCopyBackward0]
|
||
|
140509588165008 -> 140517615727376
|
||
|
140517615726704 -> 140509588193392
|
||
|
140517615726704 [label=TBackward0]
|
||
|
140517615727664 -> 140517615726704
|
||
|
140517615727664 [label=ToCopyBackward0]
|
||
|
140517615728240 -> 140517615727664
|
||
|
140509590840240 [label="encoder.layer.7.attention.self.key.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590840240 -> 140517615728240
|
||
|
140517615728240 [label=AccumulateGrad]
|
||
|
140509588166736 -> 140509588166832
|
||
|
140509588166736 [label=UnsafeViewBackward0]
|
||
|
140509588167504 -> 140509588166736
|
||
|
140509588167504 [label=CloneBackward0]
|
||
|
140509588167792 -> 140509588167504
|
||
|
140509588167792 [label=ExpandBackward0]
|
||
|
140509588168080 -> 140509588167792
|
||
|
140509588168080 [label=PermuteBackward0]
|
||
|
140509588166928 -> 140509588168080
|
||
|
140509588166928 [label=ViewBackward0]
|
||
|
140509588167120 -> 140509588166928
|
||
|
140509588167120 [label=ViewBackward0]
|
||
|
140509588194736 -> 140509588167120
|
||
|
140509588194736 [label=AddmmBackward0]
|
||
|
140517615594704 -> 140509588194736
|
||
|
140517615594704 [label=ToCopyBackward0]
|
||
|
140517615727712 -> 140517615594704
|
||
|
140509590839760 [label="encoder.layer.7.attention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590839760 -> 140517615727712
|
||
|
140517615727712 [label=AccumulateGrad]
|
||
|
140509587695792 -> 140509588194736
|
||
|
140509587695792 [label=ViewBackward0]
|
||
|
140517615728336 -> 140509587695792
|
||
|
140517615728336 [label=ToCopyBackward0]
|
||
|
140509588165008 -> 140517615728336
|
||
|
140509588193488 -> 140509588194736
|
||
|
140509588193488 [label=TBackward0]
|
||
|
140517615727184 -> 140509588193488
|
||
|
140517615727184 [label=ToCopyBackward0]
|
||
|
140517615728384 -> 140517615727184
|
||
|
140509590840480 [label="encoder.layer.7.attention.self.value.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590840480 -> 140517615728384
|
||
|
140517615728384 [label=AccumulateGrad]
|
||
|
140509588165056 -> 140509588165488
|
||
|
140509588165056 [label=TBackward0]
|
||
|
140509588166256 -> 140509588165056
|
||
|
140509588166256 [label=ToCopyBackward0]
|
||
|
140509588166496 -> 140509588166256
|
||
|
140509590839600 [label="encoder.layer.7.attention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590839600 -> 140509588166496
|
||
|
140509588166496 [label=AccumulateGrad]
|
||
|
140509588165008 -> 140509588164912
|
||
|
140509588164720 -> 140509588139888
|
||
|
140509590839520 [label="encoder.layer.7.attention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590839520 -> 140509588164720
|
||
|
140509588164720 [label=AccumulateGrad]
|
||
|
140509588164672 -> 140509588139888
|
||
|
140509985419152 [label="encoder.layer.7.attention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509985419152 -> 140509588164672
|
||
|
140509588164672 [label=AccumulateGrad]
|
||
|
140509588138160 -> 140509588138640
|
||
|
140509588138160 [label=TBackward0]
|
||
|
140509588138928 -> 140509588138160
|
||
|
140509588138928 [label=ToCopyBackward0]
|
||
|
140509588139456 -> 140509588138928
|
||
|
140509591342032 [label="encoder.layer.7.experts.experts.0.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591342032 -> 140509588139456
|
||
|
140509588139456 [label=AccumulateGrad]
|
||
|
140509588137296 -> 140509588137536
|
||
|
140509588137296 [label=TBackward0]
|
||
|
140509588138448 -> 140509588137296
|
||
|
140509588138448 [label=ToCopyBackward0]
|
||
|
140509588139216 -> 140509588138448
|
||
|
140509591341712 [label="encoder.layer.7.experts.experts.0.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591341712 -> 140509588139216
|
||
|
140509588139216 [label=AccumulateGrad]
|
||
|
140509588137056 -> 140509588137104
|
||
|
140509588136816 -> 140509588136912
|
||
|
140509591341472 [label="encoder.layer.7.experts.experts.0.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591341472 -> 140509588136816
|
||
|
140509588136816 [label=AccumulateGrad]
|
||
|
140509588136720 -> 140509588136912
|
||
|
140509591341792 [label="encoder.layer.7.experts.experts.0.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591341792 -> 140509588136720
|
||
|
140509588136720 [label=AccumulateGrad]
|
||
|
140509588136624 -> 140509588136432
|
||
|
140509588136624 [label=UnsqueezeBackward0]
|
||
|
140509588137200 -> 140509588136624
|
||
|
140509588137200 [label=NativeLayerNormBackward0]
|
||
|
140509588137680 -> 140509588137200
|
||
|
140509588137680 [label=AddBackward0]
|
||
|
140509588139024 -> 140509588137680
|
||
|
140509588139024 [label=NativeDropoutBackward0]
|
||
|
140509588138256 -> 140509588139024
|
||
|
140509588138256 [label=ViewBackward0]
|
||
|
140509588139312 -> 140509588138256
|
||
|
140509588139312 [label=AddmmBackward0]
|
||
|
140509588137968 -> 140509588139312
|
||
|
140509588137968 [label=ToCopyBackward0]
|
||
|
140509588165776 -> 140509588137968
|
||
|
140509591342192 [label="encoder.layer.7.experts.experts.1.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591342192 -> 140509588165776
|
||
|
140509588165776 [label=AccumulateGrad]
|
||
|
140509588165104 -> 140509588139312
|
||
|
140509588165104 [label=ViewBackward0]
|
||
|
140509588166448 -> 140509588165104
|
||
|
140509588166448 [label=GeluBackward0]
|
||
|
140509588166064 -> 140509588166448
|
||
|
140509588166064 [label=ViewBackward0]
|
||
|
140509588167600 -> 140509588166064
|
||
|
140509588167600 [label=AddmmBackward0]
|
||
|
140509588168272 -> 140509588167600
|
||
|
140509588168272 [label=ToCopyBackward0]
|
||
|
140509588193968 -> 140509588168272
|
||
|
140509591341552 [label="encoder.layer.7.experts.experts.1.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591341552 -> 140509588193968
|
||
|
140509588193968 [label=AccumulateGrad]
|
||
|
140509588167984 -> 140509588167600
|
||
|
140509588167984 [label=ViewBackward0]
|
||
|
140517615727904 -> 140509588167984
|
||
|
140517615727904 [label=ToCopyBackward0]
|
||
|
140509588137056 -> 140517615727904
|
||
|
140509588165872 -> 140509588167600
|
||
|
140509588165872 [label=TBackward0]
|
||
|
140517615726848 -> 140509588165872
|
||
|
140517615726848 [label=ToCopyBackward0]
|
||
|
140517615728288 -> 140517615726848
|
||
|
140509591341232 [label="encoder.layer.7.experts.experts.1.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591341232 -> 140517615728288
|
||
|
140517615728288 [label=AccumulateGrad]
|
||
|
140509588164816 -> 140509588139312
|
||
|
140509588164816 [label=TBackward0]
|
||
|
140509588166016 -> 140509588164816
|
||
|
140509588166016 [label=ToCopyBackward0]
|
||
|
140509588193536 -> 140509588166016
|
||
|
140509591340992 [label="encoder.layer.7.experts.experts.1.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591340992 -> 140509588193536
|
||
|
140509588193536 [label=AccumulateGrad]
|
||
|
140509588137056 -> 140509588137680
|
||
|
140509588137584 -> 140509588137200
|
||
|
140509591340752 [label="encoder.layer.7.experts.experts.1.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591340752 -> 140509588137584
|
||
|
140509588137584 [label=AccumulateGrad]
|
||
|
140509588136576 -> 140509588137200
|
||
|
140509591341072 [label="encoder.layer.7.experts.experts.1.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591341072 -> 140509588136576
|
||
|
140509588136576 [label=AccumulateGrad]
|
||
|
140509588136096 -> 140509588136240
|
||
|
140509588136096 [label=UnsqueezeBackward0]
|
||
|
140509588138064 -> 140509588136096
|
||
|
140509588138064 [label=UnsqueezeBackward0]
|
||
|
140509588139408 -> 140509588138064
|
||
|
140509588139408 [label=MulBackward0]
|
||
|
140509588139696 -> 140509588139408
|
||
|
140509588139696 [label=SoftmaxBackward0]
|
||
|
140509588167312 -> 140509588139696
|
||
|
140509588167312 [label=MmBackward0]
|
||
|
140509588165392 -> 140509588167312
|
||
|
140509588165392 [label=ToCopyBackward0]
|
||
|
140517615728480 -> 140509588165392
|
||
|
140517615728480 [label=DivBackward0]
|
||
|
140517615728672 -> 140517615728480
|
||
|
140517615728672 [label=SumBackward1]
|
||
|
140517615728768 -> 140517615728672
|
||
|
140517615728768 [label=MulBackward0]
|
||
|
140509588137056 -> 140517615728768
|
||
|
140517615728144 -> 140509588167312
|
||
|
140517615728144 [label=TBackward0]
|
||
|
140517615728720 -> 140517615728144
|
||
|
140517615728720 [label=ToCopyBackward0]
|
||
|
140517615728816 -> 140517615728720
|
||
|
140509590823376 [label="encoder.layer.7.experts.gate.weight
|
||
|
(2, 768)" fillcolor=lightblue]
|
||
|
140509590823376 -> 140517615728816
|
||
|
140517615728816 [label=AccumulateGrad]
|
||
|
140509588106928 -> 140509588077488
|
||
|
140509588106928 [label=IndexBackward0]
|
||
|
140509588137008 -> 140509588106928
|
||
|
140509588137008 [label=NativeLayerNormBackward0]
|
||
|
140509588136336 -> 140509588137008
|
||
|
140509588136336 [label=AddBackward0]
|
||
|
140517615728864 -> 140509588136336
|
||
|
140517615728864 [label=NativeDropoutBackward0]
|
||
|
140517615728528 -> 140517615728864
|
||
|
140517615728528 [label=ViewBackward0]
|
||
|
140517615729008 -> 140517615728528
|
||
|
140517615729008 [label=AddmmBackward0]
|
||
|
140517615729104 -> 140517615729008
|
||
|
140517615729104 [label=ToCopyBackward0]
|
||
|
140517615729296 -> 140517615729104
|
||
|
140509590826656 [label="encoder.layer.7.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590826656 -> 140517615729296
|
||
|
140517615729296 [label=AccumulateGrad]
|
||
|
140517615729056 -> 140517615729008
|
||
|
140517615729056 [label=ViewBackward0]
|
||
|
140517615729344 -> 140517615729056
|
||
|
140517615729344 [label=GeluBackward0]
|
||
|
140517615729440 -> 140517615729344
|
||
|
140517615729440 [label=ViewBackward0]
|
||
|
140517615729536 -> 140517615729440
|
||
|
140517615729536 [label=AddmmBackward0]
|
||
|
140517615729632 -> 140517615729536
|
||
|
140517615729632 [label=ToCopyBackward0]
|
||
|
140517615729824 -> 140517615729632
|
||
|
140509590826896 [label="encoder.layer.7.intermediate.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509590826896 -> 140517615729824
|
||
|
140517615729824 [label=AccumulateGrad]
|
||
|
140517615729584 -> 140517615729536
|
||
|
140517615729584 [label=ViewBackward0]
|
||
|
140517615729872 -> 140517615729584
|
||
|
140517615729872 [label=ToCopyBackward0]
|
||
|
140517615728624 -> 140517615729872
|
||
|
140517615728624 [label=SliceBackward0]
|
||
|
140517615730016 -> 140517615728624
|
||
|
140517615730016 [label=SliceBackward0]
|
||
|
140517615730112 -> 140517615730016
|
||
|
140517615730112 [label=SliceBackward0]
|
||
|
140509588139888 -> 140517615730112
|
||
|
140517615729248 -> 140517615729536
|
||
|
140517615729248 [label=TBackward0]
|
||
|
140517615729776 -> 140517615729248
|
||
|
140517615729776 [label=ToCopyBackward0]
|
||
|
140517615730208 -> 140517615729776
|
||
|
140509985417872 [label="encoder.layer.7.intermediate.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509985417872 -> 140517615730208
|
||
|
140517615730208 [label=AccumulateGrad]
|
||
|
140517615728912 -> 140517615729008
|
||
|
140517615728912 [label=TBackward0]
|
||
|
140517615729488 -> 140517615728912
|
||
|
140517615729488 [label=ToCopyBackward0]
|
||
|
140517615729968 -> 140517615729488
|
||
|
140509590826416 [label="encoder.layer.7.output.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509590826416 -> 140517615729968
|
||
|
140517615729968 [label=AccumulateGrad]
|
||
|
140517615728624 -> 140509588136336
|
||
|
140509588138736 -> 140509588137008
|
||
|
140509590826736 [label="encoder.layer.7.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590826736 -> 140509588138736
|
||
|
140509588138736 [label=AccumulateGrad]
|
||
|
140509588136048 -> 140509588137008
|
||
|
140509590824496 [label="encoder.layer.7.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590824496 -> 140509588136048
|
||
|
140509588136048 [label=AccumulateGrad]
|
||
|
140509588105392 -> 140509588106352
|
||
|
140509588105392 [label=TBackward0]
|
||
|
140509588106640 -> 140509588105392
|
||
|
140509588106640 [label=ToCopyBackward0]
|
||
|
140509588165584 -> 140509588106640
|
||
|
140509590823616 [label="encoder.layer.8.attention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590823616 -> 140509588165584
|
||
|
140509588165584 [label=AccumulateGrad]
|
||
|
140509588105200 -> 140509588105296
|
||
|
140509588105200 [label=UnsafeViewBackward0]
|
||
|
140509588136144 -> 140509588105200
|
||
|
140509588136144 [label=CloneBackward0]
|
||
|
140509588106064 -> 140509588136144
|
||
|
140509588106064 [label=ExpandBackward0]
|
||
|
140509588106448 -> 140509588106064
|
||
|
140509588106448 [label=TransposeBackward0]
|
||
|
140509588107216 -> 140509588106448
|
||
|
140509588107216 [label=PermuteBackward0]
|
||
|
140509588106880 -> 140509588107216
|
||
|
140509588106880 [label=ViewBackward0]
|
||
|
140517615728960 -> 140509588106880
|
||
|
140517615728960 [label=ViewBackward0]
|
||
|
140517615729200 -> 140517615728960
|
||
|
140517615729200 [label=AddmmBackward0]
|
||
|
140517615729728 -> 140517615729200
|
||
|
140517615729728 [label=ToCopyBackward0]
|
||
|
140517615729920 -> 140517615729728
|
||
|
140509590823776 [label="encoder.layer.8.attention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590823776 -> 140517615729920
|
||
|
140517615729920 [label=AccumulateGrad]
|
||
|
140517615729680 -> 140517615729200
|
||
|
140517615729680 [label=ViewBackward0]
|
||
|
140517615730256 -> 140517615729680
|
||
|
140517615730256 [label=ToCopyBackward0]
|
||
|
140509588077488 -> 140517615730256
|
||
|
140517615728432 -> 140517615729200
|
||
|
140517615728432 [label=TBackward0]
|
||
|
140517615729392 -> 140517615728432
|
||
|
140517615729392 [label=ToCopyBackward0]
|
||
|
140517615730400 -> 140517615729392
|
||
|
140509590823856 [label="encoder.layer.8.attention.self.key.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590823856 -> 140517615730400
|
||
|
140517615730400 [label=AccumulateGrad]
|
||
|
140509588103856 -> 140509588103520
|
||
|
140509588103856 [label=UnsafeViewBackward0]
|
||
|
140509588104240 -> 140509588103856
|
||
|
140509588104240 [label=CloneBackward0]
|
||
|
140509588104480 -> 140509588104240
|
||
|
140509588104480 [label=ExpandBackward0]
|
||
|
140509588104912 -> 140509588104480
|
||
|
140509588104912 [label=PermuteBackward0]
|
||
|
140509588104048 -> 140509588104912
|
||
|
140509588104048 [label=ViewBackward0]
|
||
|
140509588105968 -> 140509588104048
|
||
|
140509588105968 [label=ViewBackward0]
|
||
|
140509588106736 -> 140509588105968
|
||
|
140509588106736 [label=AddmmBackward0]
|
||
|
140509588105584 -> 140509588106736
|
||
|
140509588105584 [label=ToCopyBackward0]
|
||
|
140517615730160 -> 140509588105584
|
||
|
140509590824016 [label="encoder.layer.8.attention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590824016 -> 140517615730160
|
||
|
140517615730160 [label=AccumulateGrad]
|
||
|
140509588103952 -> 140509588106736
|
||
|
140509588103952 [label=ViewBackward0]
|
||
|
140517615730496 -> 140509588103952
|
||
|
140517615730496 [label=ToCopyBackward0]
|
||
|
140509588077488 -> 140517615730496
|
||
|
140517615728576 -> 140509588106736
|
||
|
140517615728576 [label=TBackward0]
|
||
|
140517615730064 -> 140517615728576
|
||
|
140517615730064 [label=ToCopyBackward0]
|
||
|
140517615730544 -> 140517615730064
|
||
|
140509590824096 [label="encoder.layer.8.attention.self.value.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590824096 -> 140517615730544
|
||
|
140517615730544 [label=AccumulateGrad]
|
||
|
140509588077584 -> 140509588077968
|
||
|
140509588077584 [label=TBackward0]
|
||
|
140509588078256 -> 140509588077584
|
||
|
140509588078256 [label=ToCopyBackward0]
|
||
|
140509588103664 -> 140509588078256
|
||
|
140509590823296 [label="encoder.layer.8.attention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509590823296 -> 140509588103664
|
||
|
140509588103664 [label=AccumulateGrad]
|
||
|
140509588077488 -> 140509588076960
|
||
|
140509588077104 -> 140509588076912
|
||
|
140509590823136 [label="encoder.layer.8.attention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509590823136 -> 140509588077104
|
||
|
140509588077104 [label=AccumulateGrad]
|
||
|
140509588076000 -> 140509588076912
|
||
|
140509591342912 [label="encoder.layer.8.attention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591342912 -> 140509588076000
|
||
|
140509588076000 [label=AccumulateGrad]
|
||
|
140509588074800 -> 140509588075760
|
||
|
140509588074800 [label=TBackward0]
|
||
|
140509588076336 -> 140509588074800
|
||
|
140509588076336 [label=ToCopyBackward0]
|
||
|
140509588077008 -> 140509588076336
|
||
|
140509591342992 [label="encoder.layer.8.crossattention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591342992 -> 140509588077008
|
||
|
140509588077008 [label=AccumulateGrad]
|
||
|
140509588074704 -> 140509588074656
|
||
|
140509588074704 [label=UnsafeViewBackward0]
|
||
|
140509588075376 -> 140509588074704
|
||
|
140509588075376 [label=CloneBackward0]
|
||
|
140509588075664 -> 140509588075376
|
||
|
140509588075664 [label=ExpandBackward0]
|
||
|
140509588076144 -> 140509588075664
|
||
|
140509588076144 [label=TransposeBackward0]
|
||
|
140509588076816 -> 140509588076144
|
||
|
140509588076816 [label=PermuteBackward0]
|
||
|
140509588077296 -> 140509588076816
|
||
|
140509588077296 [label=ViewBackward0]
|
||
|
140509588077440 -> 140509588077296
|
||
|
140509588077440 [label=ViewBackward0]
|
||
|
140509588077920 -> 140509588077440
|
||
|
140509588077920 [label=AddmmBackward0]
|
||
|
140509588078544 -> 140509588077920
|
||
|
140509588078544 [label=ToCopyBackward0]
|
||
|
140509588104432 -> 140509588078544
|
||
|
140509591342752 [label="encoder.layer.8.crossattention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591342752 -> 140509588104432
|
||
|
140509588104432 [label=AccumulateGrad]
|
||
|
140509588075088 -> 140509588077920
|
||
|
140509588075088 [label=ViewBackward0]
|
||
|
140509588104720 -> 140509588075088
|
||
|
140509588104720 [label=ToCopyBackward0]
|
||
|
140509588105776 -> 140509588104720
|
||
|
140509588105776 [label=ViewBackward0]
|
||
|
140509588106256 -> 140509588105776
|
||
|
140509588106256 [label=CloneBackward0]
|
||
|
140517615730352 -> 140509588106256
|
||
|
140517615730352 [label=ExpandBackward0]
|
||
|
140517615730592 -> 140517615730352
|
||
|
140517615730592 [label=UnsqueezeBackward0]
|
||
|
140517615539152 -> 140517615730592
|
||
|
140509588103568 -> 140509588077920
|
||
|
140509588103568 [label=TBackward0]
|
||
|
140509588103280 -> 140509588103568
|
||
|
140509588103280 [label=ToCopyBackward0]
|
||
|
140509588104960 -> 140509588103280
|
||
|
140509591342672 [label="encoder.layer.8.crossattention.self.key.weight
|
||
|
(768, 1408)" fillcolor=lightblue]
|
||
|
140509591342672 -> 140509588104960
|
||
|
140509588104960 [label=AccumulateGrad]
|
||
|
140509588048624 -> 140509588048432
|
||
|
140509588048624 [label=UnsafeViewBackward0]
|
||
|
140509588048960 -> 140509588048624
|
||
|
140509588048960 [label=CloneBackward0]
|
||
|
140509588049392 -> 140509588048960
|
||
|
140509588049392 [label=ExpandBackward0]
|
||
|
140509588048816 -> 140509588049392
|
||
|
140509588048816 [label=PermuteBackward0]
|
||
|
140509588048720 -> 140509588048816
|
||
|
140509588048720 [label=ViewBackward0]
|
||
|
140509588075568 -> 140509588048720
|
||
|
140509588075568 [label=ViewBackward0]
|
||
|
140509588076624 -> 140509588075568
|
||
|
140509588076624 [label=AddmmBackward0]
|
||
|
140509588076432 -> 140509588076624
|
||
|
140509588076432 [label=ToCopyBackward0]
|
||
|
140509588103376 -> 140509588076432
|
||
|
140509591340592 [label="encoder.layer.8.crossattention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591340592 -> 140509588103376
|
||
|
140509588103376 [label=AccumulateGrad]
|
||
|
140509588077200 -> 140509588076624
|
||
|
140509588077200 [label=ViewBackward0]
|
||
|
140509588104000 -> 140509588077200
|
||
|
140509588104000 [label=ToCopyBackward0]
|
||
|
140509588105776 -> 140509588104000
|
||
|
140509588074608 -> 140509588076624
|
||
|
140509588074608 [label=TBackward0]
|
||
|
140517615730640 -> 140509588074608
|
||
|
140517615730640 [label=ToCopyBackward0]
|
||
|
140517615730448 -> 140517615730640
|
||
|
140509591342512 [label="encoder.layer.8.crossattention.self.value.weight
|
||
|
(768, 1408)" fillcolor=lightblue]
|
||
|
140509591342512 -> 140517615730448
|
||
|
140517615730448 [label=AccumulateGrad]
|
||
|
140509588047088 -> 140509588047376
|
||
|
140509588047088 [label=TBackward0]
|
||
|
140509588048144 -> 140509588047088
|
||
|
140509588048144 [label=ToCopyBackward0]
|
||
|
140509588048528 -> 140509588048144
|
||
|
140509591340832 [label="encoder.layer.8.crossattention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591340832 -> 140509588048528
|
||
|
140509588048528 [label=AccumulateGrad]
|
||
|
140509588046896 -> 140509588046608
|
||
|
140509588046320 -> 140509588046416
|
||
|
140509591340512 [label="encoder.layer.8.crossattention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591340512 -> 140509588046320
|
||
|
140509588046320 [label=AccumulateGrad]
|
||
|
140509588045888 -> 140509588046416
|
||
|
140509591340272 [label="encoder.layer.8.crossattention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591340272 -> 140509588045888
|
||
|
140509588045888 [label=AccumulateGrad]
|
||
|
140509588024432 -> 140509588024912
|
||
|
140509588024432 [label=TBackward0]
|
||
|
140509588046128 -> 140509588024432
|
||
|
140509588046128 [label=ToCopyBackward0]
|
||
|
140509588046512 -> 140509588046128
|
||
|
140509591319952 [label="encoder.layer.8.experts.experts.0.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591319952 -> 140509588046512
|
||
|
140509588046512 [label=AccumulateGrad]
|
||
|
140509588023568 -> 140509588023856
|
||
|
140509588023568 [label=TBackward0]
|
||
|
140509588024576 -> 140509588023568
|
||
|
140509588024576 [label=ToCopyBackward0]
|
||
|
140509588025008 -> 140509588024576
|
||
|
140509591320032 [label="encoder.layer.8.experts.experts.0.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591320032 -> 140509588025008
|
||
|
140509588025008 [label=AccumulateGrad]
|
||
|
140509588023376 -> 140509588023280
|
||
|
140509588023088 -> 140509588023184
|
||
|
140509591319792 [label="encoder.layer.8.experts.experts.0.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591319792 -> 140509588023088
|
||
|
140509588023088 [label=AccumulateGrad]
|
||
|
140509588022992 -> 140509588023184
|
||
|
140509591319712 [label="encoder.layer.8.experts.experts.0.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591319712 -> 140509588022992
|
||
|
140509588022992 [label=AccumulateGrad]
|
||
|
140509588022800 -> 140509588022704
|
||
|
140509588022800 [label=UnsqueezeBackward0]
|
||
|
140509588023472 -> 140509588022800
|
||
|
140509588023472 [label=NativeLayerNormBackward0]
|
||
|
140509588023952 -> 140509588023472
|
||
|
140509588023952 [label=AddBackward0]
|
||
|
140509588024528 -> 140509588023952
|
||
|
140509588024528 [label=NativeDropoutBackward0]
|
||
|
140509588046032 -> 140509588024528
|
||
|
140509588046032 [label=ViewBackward0]
|
||
|
140509588045936 -> 140509588046032
|
||
|
140509588045936 [label=AddmmBackward0]
|
||
|
140509588047472 -> 140509588045936
|
||
|
140509588047472 [label=ToCopyBackward0]
|
||
|
140509588047520 -> 140509588047472
|
||
|
140509591320512 [label="encoder.layer.8.experts.experts.1.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591320512 -> 140509588047520
|
||
|
140509588047520 [label=AccumulateGrad]
|
||
|
140509588047040 -> 140509588045936
|
||
|
140509588047040 [label=ViewBackward0]
|
||
|
140509588048048 -> 140509588047040
|
||
|
140509588048048 [label=GeluBackward0]
|
||
|
140509588049440 -> 140509588048048
|
||
|
140509588049440 [label=ViewBackward0]
|
||
|
140509588048912 -> 140509588049440
|
||
|
140509588048912 [label=AddmmBackward0]
|
||
|
140509588077680 -> 140509588048912
|
||
|
140509588077680 [label=ToCopyBackward0]
|
||
|
140517615729152 -> 140509588077680
|
||
|
140509591319472 [label="encoder.layer.8.experts.experts.1.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591319472 -> 140517615729152
|
||
|
140517615729152 [label=AccumulateGrad]
|
||
|
140509588075856 -> 140509588048912
|
||
|
140509588075856 [label=ViewBackward0]
|
||
|
140517615268000 -> 140509588075856
|
||
|
140517615268000 [label=ToCopyBackward0]
|
||
|
140509588023376 -> 140517615268000
|
||
|
140509588074560 -> 140509588048912
|
||
|
140509588074560 [label=TBackward0]
|
||
|
140517615267904 -> 140509588074560
|
||
|
140517615267904 [label=ToCopyBackward0]
|
||
|
140517615268144 -> 140517615267904
|
||
|
140509591319552 [label="encoder.layer.8.experts.experts.1.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591319552 -> 140517615268144
|
||
|
140517615268144 [label=AccumulateGrad]
|
||
|
140509588046992 -> 140509588045936
|
||
|
140509588046992 [label=TBackward0]
|
||
|
140509588075184 -> 140509588046992
|
||
|
140509588075184 [label=ToCopyBackward0]
|
||
|
140517615730304 -> 140509588075184
|
||
|
140509591319312 [label="encoder.layer.8.experts.experts.1.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591319312 -> 140517615730304
|
||
|
140517615730304 [label=AccumulateGrad]
|
||
|
140509588023376 -> 140509588023952
|
||
|
140509588023760 -> 140509588023472
|
||
|
140509591319072 [label="encoder.layer.8.experts.experts.1.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591319072 -> 140509588023760
|
||
|
140509588023760 [label=AccumulateGrad]
|
||
|
140509588022896 -> 140509588023472
|
||
|
140509591318992 [label="encoder.layer.8.experts.experts.1.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591318992 -> 140509588022896
|
||
|
140509588022896 [label=AccumulateGrad]
|
||
|
140509588022416 -> 140509588022512
|
||
|
140509588022416 [label=UnsqueezeBackward0]
|
||
|
140509588024240 -> 140509588022416
|
||
|
140509588024240 [label=UnsqueezeBackward0]
|
||
|
140509588024096 -> 140509588024240
|
||
|
140509588024096 [label=MulBackward0]
|
||
|
140509588047664 -> 140509588024096
|
||
|
140509588047664 [label=SoftmaxBackward0]
|
||
|
140509588049200 -> 140509588047664
|
||
|
140509588049200 [label=MmBackward0]
|
||
|
140509588046080 -> 140509588049200
|
||
|
140509588046080 [label=ToCopyBackward0]
|
||
|
140517615268048 -> 140509588046080
|
||
|
140517615268048 [label=DivBackward0]
|
||
|
140517615268336 -> 140517615268048
|
||
|
140517615268336 [label=SumBackward1]
|
||
|
140517615268432 -> 140517615268336
|
||
|
140517615268432 [label=MulBackward0]
|
||
|
140509588023376 -> 140517615268432
|
||
|
140517615267952 -> 140509588049200
|
||
|
140517615267952 [label=TBackward0]
|
||
|
140517615268384 -> 140517615267952
|
||
|
140517615268384 [label=ToCopyBackward0]
|
||
|
140517615268480 -> 140517615268384
|
||
|
140509591321392 [label="encoder.layer.8.experts.gate.weight
|
||
|
(2, 768)" fillcolor=lightblue]
|
||
|
140509591321392 -> 140517615268480
|
||
|
140517615268480 [label=AccumulateGrad]
|
||
|
140509588021840 -> 140509587963664
|
||
|
140509588021840 [label=IndexBackward0]
|
||
|
140509588023136 -> 140509588021840
|
||
|
140509588023136 [label=NativeLayerNormBackward0]
|
||
|
140509588022608 -> 140509588023136
|
||
|
140509588022608 [label=AddBackward0]
|
||
|
140517615268528 -> 140509588022608
|
||
|
140517615268528 [label=NativeDropoutBackward0]
|
||
|
140517615268192 -> 140517615268528
|
||
|
140517615268192 [label=ViewBackward0]
|
||
|
140517615268672 -> 140517615268192
|
||
|
140517615268672 [label=AddmmBackward0]
|
||
|
140517615268768 -> 140517615268672
|
||
|
140517615268768 [label=ToCopyBackward0]
|
||
|
140517615268960 -> 140517615268768
|
||
|
140509591339792 [label="encoder.layer.8.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591339792 -> 140517615268960
|
||
|
140517615268960 [label=AccumulateGrad]
|
||
|
140517615268720 -> 140517615268672
|
||
|
140517615268720 [label=ViewBackward0]
|
||
|
140517615269008 -> 140517615268720
|
||
|
140517615269008 [label=GeluBackward0]
|
||
|
140517615269104 -> 140517615269008
|
||
|
140517615269104 [label=ViewBackward0]
|
||
|
140517615269200 -> 140517615269104
|
||
|
140517615269200 [label=AddmmBackward0]
|
||
|
140517615269296 -> 140517615269200
|
||
|
140517615269296 [label=ToCopyBackward0]
|
||
|
140517615269488 -> 140517615269296
|
||
|
140509591340032 [label="encoder.layer.8.intermediate.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591340032 -> 140517615269488
|
||
|
140517615269488 [label=AccumulateGrad]
|
||
|
140517615269248 -> 140517615269200
|
||
|
140517615269248 [label=ViewBackward0]
|
||
|
140517615269536 -> 140517615269248
|
||
|
140517615269536 [label=ToCopyBackward0]
|
||
|
140517615268288 -> 140517615269536
|
||
|
140517615268288 [label=SliceBackward0]
|
||
|
140517615269680 -> 140517615268288
|
||
|
140517615269680 [label=SliceBackward0]
|
||
|
140517615269776 -> 140517615269680
|
||
|
140517615269776 [label=SliceBackward0]
|
||
|
140509588076912 -> 140517615269776
|
||
|
140517615268912 -> 140517615269200
|
||
|
140517615268912 [label=TBackward0]
|
||
|
140517615269440 -> 140517615268912
|
||
|
140517615269440 [label=ToCopyBackward0]
|
||
|
140517615269872 -> 140517615269440
|
||
|
140509591340352 [label="encoder.layer.8.intermediate.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591340352 -> 140517615269872
|
||
|
140517615269872 [label=AccumulateGrad]
|
||
|
140517615268576 -> 140517615268672
|
||
|
140517615268576 [label=TBackward0]
|
||
|
140517615269152 -> 140517615268576
|
||
|
140517615269152 [label=ToCopyBackward0]
|
||
|
140517615269632 -> 140517615269152
|
||
|
140509591340112 [label="encoder.layer.8.output.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591340112 -> 140517615269632
|
||
|
140517615269632 [label=AccumulateGrad]
|
||
|
140517615268288 -> 140509588022608
|
||
|
140509588022176 -> 140509588023136
|
||
|
140509591339872 [label="encoder.layer.8.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591339872 -> 140509588022176
|
||
|
140509588022176 [label=AccumulateGrad]
|
||
|
140509588046560 -> 140509588023136
|
||
|
140509591339552 [label="encoder.layer.8.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591339552 -> 140509588046560
|
||
|
140509588046560 [label=AccumulateGrad]
|
||
|
140509588021312 -> 140509587991520
|
||
|
140509588021312 [label=TBackward0]
|
||
|
140509588021648 -> 140509588021312
|
||
|
140509588021648 [label=ToCopyBackward0]
|
||
|
140509588048336 -> 140509588021648
|
||
|
140509591321632 [label="encoder.layer.9.attention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591321632 -> 140509588048336
|
||
|
140509588048336 [label=AccumulateGrad]
|
||
|
140509587991472 -> 140509587991568
|
||
|
140509587991472 [label=UnsafeViewBackward0]
|
||
|
140509587992144 -> 140509587991472
|
||
|
140509587992144 [label=CloneBackward0]
|
||
|
140509587992528 -> 140509587992144
|
||
|
140509587992528 [label=ExpandBackward0]
|
||
|
140509587991856 -> 140509587992528
|
||
|
140509587991856 [label=TransposeBackward0]
|
||
|
140509588022320 -> 140509587991856
|
||
|
140509588022320 [label=PermuteBackward0]
|
||
|
140509588021936 -> 140509588022320
|
||
|
140509588021936 [label=ViewBackward0]
|
||
|
140517615268624 -> 140509588021936
|
||
|
140517615268624 [label=ViewBackward0]
|
||
|
140517615268864 -> 140517615268624
|
||
|
140517615268864 [label=AddmmBackward0]
|
||
|
140517615269392 -> 140517615268864
|
||
|
140517615269392 [label=ToCopyBackward0]
|
||
|
140517615269584 -> 140517615269392
|
||
|
140509591322192 [label="encoder.layer.9.attention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591322192 -> 140517615269584
|
||
|
140517615269584 [label=AccumulateGrad]
|
||
|
140517615269344 -> 140517615268864
|
||
|
140517615269344 [label=ViewBackward0]
|
||
|
140517615269920 -> 140517615269344
|
||
|
140517615269920 [label=ToCopyBackward0]
|
||
|
140509587963664 -> 140517615269920
|
||
|
140517615268096 -> 140517615268864
|
||
|
140517615268096 [label=TBackward0]
|
||
|
140517615269056 -> 140517615268096
|
||
|
140517615269056 [label=ToCopyBackward0]
|
||
|
140517615270064 -> 140517615269056
|
||
|
140509591321872 [label="encoder.layer.9.attention.self.key.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591321872 -> 140517615270064
|
||
|
140517615270064 [label=AccumulateGrad]
|
||
|
140509587990128 -> 140509587989840
|
||
|
140509587990128 [label=UnsafeViewBackward0]
|
||
|
140509587990512 -> 140509587990128
|
||
|
140509587990512 [label=CloneBackward0]
|
||
|
140509587990800 -> 140509587990512
|
||
|
140509587990800 [label=ExpandBackward0]
|
||
|
140509587991040 -> 140509587990800
|
||
|
140509587991040 [label=PermuteBackward0]
|
||
|
140509587990224 -> 140509587991040
|
||
|
140509587990224 [label=ViewBackward0]
|
||
|
140509587992336 -> 140509587990224
|
||
|
140509587992336 [label=ViewBackward0]
|
||
|
140509587990080 -> 140509587992336
|
||
|
140509587990080 [label=AddmmBackward0]
|
||
|
140509588021360 -> 140509587990080
|
||
|
140509588021360 [label=ToCopyBackward0]
|
||
|
140517615269824 -> 140509588021360
|
||
|
140509591322432 [label="encoder.layer.9.attention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591322432 -> 140517615269824
|
||
|
140517615269824 [label=AccumulateGrad]
|
||
|
140509588021744 -> 140509587990080
|
||
|
140509588021744 [label=ViewBackward0]
|
||
|
140517615270160 -> 140509588021744
|
||
|
140517615270160 [label=ToCopyBackward0]
|
||
|
140509587963664 -> 140517615270160
|
||
|
140517615268240 -> 140509587990080
|
||
|
140517615268240 [label=TBackward0]
|
||
|
140517615269728 -> 140517615268240
|
||
|
140517615269728 [label=ToCopyBackward0]
|
||
|
140517615270208 -> 140517615269728
|
||
|
140509591322112 [label="encoder.layer.9.attention.self.value.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591322112 -> 140517615270208
|
||
|
140517615270208 [label=AccumulateGrad]
|
||
|
140509587988688 -> 140509587988784
|
||
|
140509587988688 [label=TBackward0]
|
||
|
140509587989648 -> 140509587988688
|
||
|
140509587989648 [label=ToCopyBackward0]
|
||
|
140509587989936 -> 140509587989648
|
||
|
140509591321712 [label="encoder.layer.9.attention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591321712 -> 140509587989936
|
||
|
140509587989936 [label=AccumulateGrad]
|
||
|
140509587963664 -> 140509587963280
|
||
|
140509587963376 -> 140509587963040
|
||
|
140509591321232 [label="encoder.layer.9.attention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591321232 -> 140509587963376
|
||
|
140509587963376 [label=AccumulateGrad]
|
||
|
140509587962032 -> 140509587963040
|
||
|
140509591321472 [label="encoder.layer.9.attention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591321472 -> 140509587962032
|
||
|
140509587962032 [label=AccumulateGrad]
|
||
|
140509587961120 -> 140509587961600
|
||
|
140509587961120 [label=TBackward0]
|
||
|
140509587962224 -> 140509587961120
|
||
|
140509587962224 [label=ToCopyBackward0]
|
||
|
140509587962896 -> 140509587962224
|
||
|
140509591311760 [label="encoder.layer.9.experts.experts.0.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591311760 -> 140509587962896
|
||
|
140509587962896 [label=AccumulateGrad]
|
||
|
140509587960688 -> 140509587960976
|
||
|
140509587960688 [label=TBackward0]
|
||
|
140509587961744 -> 140509587960688
|
||
|
140509587961744 [label=ToCopyBackward0]
|
||
|
140509587962608 -> 140509587961744
|
||
|
140509591311440 [label="encoder.layer.9.experts.experts.0.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591311440 -> 140509587962608
|
||
|
140509587962608 [label=AccumulateGrad]
|
||
|
140509587960496 -> 140509587960112
|
||
|
140509587960208 -> 140509588463424
|
||
|
140509591311200 [label="encoder.layer.9.experts.experts.0.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591311200 -> 140509587960208
|
||
|
140509587960208 [label=AccumulateGrad]
|
||
|
140509587960016 -> 140509588463424
|
||
|
140509591311520 [label="encoder.layer.9.experts.experts.0.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591311520 -> 140509587960016
|
||
|
140509587960016 [label=AccumulateGrad]
|
||
|
140509588463376 -> 140509588463184
|
||
|
140509588463376 [label=UnsqueezeBackward0]
|
||
|
140509587960160 -> 140509588463376
|
||
|
140509587960160 [label=NativeLayerNormBackward0]
|
||
|
140509587960640 -> 140509587960160
|
||
|
140509587960640 [label=AddBackward0]
|
||
|
140509587963184 -> 140509587960640
|
||
|
140509587963184 [label=NativeDropoutBackward0]
|
||
|
140509587961648 -> 140509587963184
|
||
|
140509587961648 [label=ViewBackward0]
|
||
|
140509587962320 -> 140509587961648
|
||
|
140509587962320 [label=AddmmBackward0]
|
||
|
140509587963472 -> 140509587962320
|
||
|
140509587963472 [label=ToCopyBackward0]
|
||
|
140509587989168 -> 140509587963472
|
||
|
140509591311920 [label="encoder.layer.9.experts.experts.1.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591311920 -> 140509587989168
|
||
|
140509587989168 [label=AccumulateGrad]
|
||
|
140509587963568 -> 140509587962320
|
||
|
140509587963568 [label=ViewBackward0]
|
||
|
140509587989744 -> 140509587963568
|
||
|
140509587989744 [label=GeluBackward0]
|
||
|
140509587989072 -> 140509587989744
|
||
|
140509587989072 [label=ViewBackward0]
|
||
|
140509587990560 -> 140509587989072
|
||
|
140509587990560 [label=AddmmBackward0]
|
||
|
140509587991280 -> 140509587990560
|
||
|
140509587991280 [label=ToCopyBackward0]
|
||
|
140509588022224 -> 140509587991280
|
||
|
140509591311280 [label="encoder.layer.9.experts.experts.1.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591311280 -> 140509588022224
|
||
|
140509588022224 [label=AccumulateGrad]
|
||
|
140509587990992 -> 140509587990560
|
||
|
140509587990992 [label=ViewBackward0]
|
||
|
140517615270016 -> 140509587990992
|
||
|
140517615270016 [label=ToCopyBackward0]
|
||
|
140509587960496 -> 140517615270016
|
||
|
140509587988880 -> 140509587990560
|
||
|
140509587988880 [label=TBackward0]
|
||
|
140517615268816 -> 140509587988880
|
||
|
140517615268816 [label=ToCopyBackward0]
|
||
|
140517615270112 -> 140517615268816
|
||
|
140509591310960 [label="encoder.layer.9.experts.experts.1.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591310960 -> 140517615270112
|
||
|
140517615270112 [label=AccumulateGrad]
|
||
|
140509587961264 -> 140509587962320
|
||
|
140509587961264 [label=TBackward0]
|
||
|
140509587989456 -> 140509587961264
|
||
|
140509587989456 [label=ToCopyBackward0]
|
||
|
140509587992048 -> 140509587989456
|
||
|
140509591310720 [label="encoder.layer.9.experts.experts.1.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591310720 -> 140509587992048
|
||
|
140509587992048 [label=AccumulateGrad]
|
||
|
140509587960496 -> 140509587960640
|
||
|
140509587960592 -> 140509587960160
|
||
|
140509591310480 [label="encoder.layer.9.experts.experts.1.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591310480 -> 140509587960592
|
||
|
140509587960592 [label=AccumulateGrad]
|
||
|
140509587959920 -> 140509587960160
|
||
|
140509591310800 [label="encoder.layer.9.experts.experts.1.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591310800 -> 140509587959920
|
||
|
140509587959920 [label=AccumulateGrad]
|
||
|
140509588463280 -> 140509588462944
|
||
|
140509588463280 [label=UnsqueezeBackward0]
|
||
|
140509588463472 -> 140509588463280
|
||
|
140509588463472 [label=UnsqueezeBackward0]
|
||
|
140509587962704 -> 140509588463472
|
||
|
140509587962704 [label=MulBackward0]
|
||
|
140509587963856 -> 140509587962704
|
||
|
140509587963856 [label=SoftmaxBackward0]
|
||
|
140509587990320 -> 140509587963856
|
||
|
140509587990320 [label=MmBackward0]
|
||
|
140509587960304 -> 140509587990320
|
||
|
140509587960304 [label=ToCopyBackward0]
|
||
|
140517615270304 -> 140509587960304
|
||
|
140517615270304 [label=DivBackward0]
|
||
|
140517615270496 -> 140517615270304
|
||
|
140517615270496 [label=SumBackward1]
|
||
|
140517615270592 -> 140517615270496
|
||
|
140517615270592 [label=MulBackward0]
|
||
|
140509587960496 -> 140517615270592
|
||
|
140517615269968 -> 140509587990320
|
||
|
140517615269968 [label=TBackward0]
|
||
|
140517615270544 -> 140517615269968
|
||
|
140517615270544 [label=ToCopyBackward0]
|
||
|
140517615270640 -> 140517615270544
|
||
|
140509591313200 [label="encoder.layer.9.experts.gate.weight
|
||
|
(2, 768)" fillcolor=lightblue]
|
||
|
140509591313200 -> 140517615270640
|
||
|
140517615270640 [label=AccumulateGrad]
|
||
|
140509588462416 -> 140509588428880
|
||
|
140509588462416 [label=IndexBackward0]
|
||
|
140509588462896 -> 140509588462416
|
||
|
140509588462896 [label=NativeLayerNormBackward0]
|
||
|
140509587963088 -> 140509588462896
|
||
|
140509587963088 [label=AddBackward0]
|
||
|
140517615270688 -> 140509587963088
|
||
|
140517615270688 [label=NativeDropoutBackward0]
|
||
|
140517615270352 -> 140517615270688
|
||
|
140517615270352 [label=ViewBackward0]
|
||
|
140517615270832 -> 140517615270352
|
||
|
140517615270832 [label=AddmmBackward0]
|
||
|
140517615270928 -> 140517615270832
|
||
|
140517615270928 [label=ToCopyBackward0]
|
||
|
140517615271120 -> 140517615270928
|
||
|
140509591320672 [label="encoder.layer.9.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591320672 -> 140517615271120
|
||
|
140517615271120 [label=AccumulateGrad]
|
||
|
140517615270880 -> 140517615270832
|
||
|
140517615270880 [label=ViewBackward0]
|
||
|
140517615271168 -> 140517615270880
|
||
|
140517615271168 [label=GeluBackward0]
|
||
|
140517615271264 -> 140517615271168
|
||
|
140517615271264 [label=ViewBackward0]
|
||
|
140517615271360 -> 140517615271264
|
||
|
140517615271360 [label=AddmmBackward0]
|
||
|
140517615271456 -> 140517615271360
|
||
|
140517615271456 [label=ToCopyBackward0]
|
||
|
140517615271648 -> 140517615271456
|
||
|
140509591320752 [label="encoder.layer.9.intermediate.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591320752 -> 140517615271648
|
||
|
140517615271648 [label=AccumulateGrad]
|
||
|
140517615271408 -> 140517615271360
|
||
|
140517615271408 [label=ViewBackward0]
|
||
|
140517615271696 -> 140517615271408
|
||
|
140517615271696 [label=ToCopyBackward0]
|
||
|
140517615270448 -> 140517615271696
|
||
|
140517615270448 [label=SliceBackward0]
|
||
|
140517615271840 -> 140517615270448
|
||
|
140517615271840 [label=SliceBackward0]
|
||
|
140517615271888 -> 140517615271840
|
||
|
140517615271888 [label=SliceBackward0]
|
||
|
140509587963040 -> 140517615271888
|
||
|
140517615271072 -> 140517615271360
|
||
|
140517615271072 [label=TBackward0]
|
||
|
140517615271600 -> 140517615271072
|
||
|
140517615271600 [label=ToCopyBackward0]
|
||
|
140517615271552 -> 140517615271600
|
||
|
140509591320912 [label="encoder.layer.9.intermediate.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591320912 -> 140517615271552
|
||
|
140517615271552 [label=AccumulateGrad]
|
||
|
140517615270736 -> 140517615270832
|
||
|
140517615270736 [label=TBackward0]
|
||
|
140517615271312 -> 140517615270736
|
||
|
140517615271312 [label=ToCopyBackward0]
|
||
|
140517615271792 -> 140517615271312
|
||
|
140509591320992 [label="encoder.layer.9.output.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591320992 -> 140517615271792
|
||
|
140517615271792 [label=AccumulateGrad]
|
||
|
140517615270448 -> 140509587963088
|
||
|
140509587962128 -> 140509588462896
|
||
|
140509591320432 [label="encoder.layer.9.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591320432 -> 140509587962128
|
||
|
140509587962128 [label=AccumulateGrad]
|
||
|
140509587961072 -> 140509588462896
|
||
|
140509591318592 [label="encoder.layer.9.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591318592 -> 140509587961072
|
||
|
140509587961072 [label=AccumulateGrad]
|
||
|
140509588461168 -> 140509588462128
|
||
|
140509588461168 [label=TBackward0]
|
||
|
140509588462512 -> 140509588461168
|
||
|
140509588462512 [label=ToCopyBackward0]
|
||
|
140509587988592 -> 140509588462512
|
||
|
140509591313440 [label="encoder.layer.10.attention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591313440 -> 140509587988592
|
||
|
140509587988592 [label=AccumulateGrad]
|
||
|
140509588461072 -> 140509588460784
|
||
|
140509588461072 [label=UnsafeViewBackward0]
|
||
|
140509588461456 -> 140509588461072
|
||
|
140509588461456 [label=CloneBackward0]
|
||
|
140509588461744 -> 140509588461456
|
||
|
140509588461744 [label=ExpandBackward0]
|
||
|
140509588462224 -> 140509588461744
|
||
|
140509588462224 [label=TransposeBackward0]
|
||
|
140509588463088 -> 140509588462224
|
||
|
140509588463088 [label=PermuteBackward0]
|
||
|
140509588462800 -> 140509588463088
|
||
|
140509588462800 [label=ViewBackward0]
|
||
|
140517615270784 -> 140509588462800
|
||
|
140517615270784 [label=ViewBackward0]
|
||
|
140517615271024 -> 140517615270784
|
||
|
140517615271024 [label=AddmmBackward0]
|
||
|
140517615271744 -> 140517615271024
|
||
|
140517615271744 [label=ToCopyBackward0]
|
||
|
140517615321248 -> 140517615271744
|
||
|
140509591313600 [label="encoder.layer.10.attention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591313600 -> 140517615321248
|
||
|
140517615321248 [label=AccumulateGrad]
|
||
|
140517615271504 -> 140517615271024
|
||
|
140517615271504 [label=ViewBackward0]
|
||
|
140517615321296 -> 140517615271504
|
||
|
140517615321296 [label=ToCopyBackward0]
|
||
|
140509588428880 -> 140517615321296
|
||
|
140517615270256 -> 140517615271024
|
||
|
140517615270256 [label=TBackward0]
|
||
|
140517615321152 -> 140517615270256
|
||
|
140517615321152 [label=ToCopyBackward0]
|
||
|
140517615321440 -> 140517615321152
|
||
|
140509591313680 [label="encoder.layer.10.attention.self.key.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591313680 -> 140517615321440
|
||
|
140517615321440 [label=AccumulateGrad]
|
||
|
140509588429936 -> 140509588430704
|
||
|
140509588429936 [label=UnsafeViewBackward0]
|
||
|
140509588460112 -> 140509588429936
|
||
|
140509588460112 [label=CloneBackward0]
|
||
|
140509588460400 -> 140509588460112
|
||
|
140509588460400 [label=ExpandBackward0]
|
||
|
140509588460688 -> 140509588460400
|
||
|
140509588460688 [label=PermuteBackward0]
|
||
|
140509588459632 -> 140509588460688
|
||
|
140509588459632 [label=ViewBackward0]
|
||
|
140509588461504 -> 140509588459632
|
||
|
140509588461504 [label=ViewBackward0]
|
||
|
140509588462704 -> 140509588461504
|
||
|
140509588462704 [label=AddmmBackward0]
|
||
|
140509588461024 -> 140509588462704
|
||
|
140509588461024 [label=ToCopyBackward0]
|
||
|
140517615271216 -> 140509588461024
|
||
|
140509591313840 [label="encoder.layer.10.attention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591313840 -> 140517615271216
|
||
|
140517615271216 [label=AccumulateGrad]
|
||
|
140509588459680 -> 140509588462704
|
||
|
140509588459680 [label=ViewBackward0]
|
||
|
140517615321536 -> 140509588459680
|
||
|
140517615321536 [label=ToCopyBackward0]
|
||
|
140509588428880 -> 140517615321536
|
||
|
140517615270400 -> 140509588462704
|
||
|
140517615270400 [label=TBackward0]
|
||
|
140517615321392 -> 140517615270400
|
||
|
140517615321392 [label=ToCopyBackward0]
|
||
|
140517615321584 -> 140517615321392
|
||
|
140509591313920 [label="encoder.layer.10.attention.self.value.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591313920 -> 140517615321584
|
||
|
140517615321584 [label=AccumulateGrad]
|
||
|
140509588428928 -> 140509588429360
|
||
|
140509588428928 [label=TBackward0]
|
||
|
140509588430128 -> 140509588428928
|
||
|
140509588430128 [label=ToCopyBackward0]
|
||
|
140509588430368 -> 140509588430128
|
||
|
140509591313120 [label="encoder.layer.10.attention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591313120 -> 140509588430368
|
||
|
140509588430368 [label=AccumulateGrad]
|
||
|
140509588428880 -> 140509588428784
|
||
|
140509588428448 -> 140509588428592
|
||
|
140509591312640 [label="encoder.layer.10.attention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591312640 -> 140509588428448
|
||
|
140509588428448 [label=AccumulateGrad]
|
||
|
140509588427824 -> 140509588428592
|
||
|
140509591312880 [label="encoder.layer.10.attention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591312880 -> 140509588427824
|
||
|
140509588427824 [label=AccumulateGrad]
|
||
|
140509588426816 -> 140509588427536
|
||
|
140509588426816 [label=TBackward0]
|
||
|
140509588427728 -> 140509588426816
|
||
|
140509588427728 [label=ToCopyBackward0]
|
||
|
140509588428400 -> 140509588427728
|
||
|
140509591312720 [label="encoder.layer.10.crossattention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591312720 -> 140509588428400
|
||
|
140509588428400 [label=AccumulateGrad]
|
||
|
140509588405840 -> 140509588405504
|
||
|
140509588405840 [label=UnsafeViewBackward0]
|
||
|
140509588406032 -> 140509588405840
|
||
|
140509588406032 [label=CloneBackward0]
|
||
|
140509588427008 -> 140509588406032
|
||
|
140509588427008 [label=ExpandBackward0]
|
||
|
140509588427488 -> 140509588427008
|
||
|
140509588427488 [label=TransposeBackward0]
|
||
|
140509588428208 -> 140509588427488
|
||
|
140509588428208 [label=PermuteBackward0]
|
||
|
140509588428688 -> 140509588428208
|
||
|
140509588428688 [label=ViewBackward0]
|
||
|
140509588429264 -> 140509588428688
|
||
|
140509588429264 [label=ViewBackward0]
|
||
|
140509588429744 -> 140509588429264
|
||
|
140509588429744 [label=AddmmBackward0]
|
||
|
140509588430320 -> 140509588429744
|
||
|
140509588430320 [label=ToCopyBackward0]
|
||
|
140509588460208 -> 140509588430320
|
||
|
140509591312480 [label="encoder.layer.10.crossattention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591312480 -> 140509588460208
|
||
|
140509588460208 [label=AccumulateGrad]
|
||
|
140509588429648 -> 140509588429744
|
||
|
140509588429648 [label=ViewBackward0]
|
||
|
140509588460592 -> 140509588429648
|
||
|
140509588460592 [label=ToCopyBackward0]
|
||
|
140509588461264 -> 140509588460592
|
||
|
140509588461264 [label=ViewBackward0]
|
||
|
140517615270976 -> 140509588461264
|
||
|
140517615270976 [label=CloneBackward0]
|
||
|
140509588459584 -> 140517615270976
|
||
|
140509588459584 [label=ExpandBackward0]
|
||
|
140517615321632 -> 140509588459584
|
||
|
140517615321632 [label=UnsqueezeBackward0]
|
||
|
140517615539152 -> 140517615321632
|
||
|
140509588426864 -> 140509588429744
|
||
|
140509588426864 [label=TBackward0]
|
||
|
140509588461936 -> 140509588426864
|
||
|
140509588461936 [label=ToCopyBackward0]
|
||
|
140509588460880 -> 140509588461936
|
||
|
140509591312400 [label="encoder.layer.10.crossattention.self.key.weight
|
||
|
(768, 1408)" fillcolor=lightblue]
|
||
|
140509591312400 -> 140509588460880
|
||
|
140509588460880 [label=AccumulateGrad]
|
||
|
140509588404064 -> 140509588404208
|
||
|
140509588404064 [label=UnsafeViewBackward0]
|
||
|
140509588404880 -> 140509588404064
|
||
|
140509588404880 [label=CloneBackward0]
|
||
|
140509588405168 -> 140509588404880
|
||
|
140509588405168 [label=ExpandBackward0]
|
||
|
140509588405552 -> 140509588405168
|
||
|
140509588405552 [label=PermuteBackward0]
|
||
|
140509588404304 -> 140509588405552
|
||
|
140509588404304 [label=ViewBackward0]
|
||
|
140509588405936 -> 140509588404304
|
||
|
140509588405936 [label=ViewBackward0]
|
||
|
140509588427968 -> 140509588405936
|
||
|
140509588427968 [label=AddmmBackward0]
|
||
|
140509588428112 -> 140509588427968
|
||
|
140509588428112 [label=ToCopyBackward0]
|
||
|
140509588459920 -> 140509588428112
|
||
|
140509591310560 [label="encoder.layer.10.crossattention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591310560 -> 140509588459920
|
||
|
140509588459920 [label=AccumulateGrad]
|
||
|
140509588428976 -> 140509588427968
|
||
|
140509588428976 [label=ViewBackward0]
|
||
|
140509588429888 -> 140509588428976
|
||
|
140509588429888 [label=ToCopyBackward0]
|
||
|
140509588461264 -> 140509588429888
|
||
|
140509588426960 -> 140509588427968
|
||
|
140509588426960 [label=TBackward0]
|
||
|
140517615321680 -> 140509588426960
|
||
|
140517615321680 [label=ToCopyBackward0]
|
||
|
140517615321344 -> 140517615321680
|
||
|
140509591312240 [label="encoder.layer.10.crossattention.self.value.weight
|
||
|
(768, 1408)" fillcolor=lightblue]
|
||
|
140509591312240 -> 140517615321344
|
||
|
140517615321344 [label=AccumulateGrad]
|
||
|
140509588402576 -> 140509588402864
|
||
|
140509588402576 [label=TBackward0]
|
||
|
140509588403584 -> 140509588402576
|
||
|
140509588403584 [label=ToCopyBackward0]
|
||
|
140509588404016 -> 140509588403584
|
||
|
140509591311040 [label="encoder.layer.10.crossattention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591311040 -> 140509588404016
|
||
|
140509588404016 [label=AccumulateGrad]
|
||
|
140509588402384 -> 140509588373360
|
||
|
140509588372784 -> 140509588373456
|
||
|
140509591293760 [label="encoder.layer.10.crossattention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591293760 -> 140509588372784
|
||
|
140509588372784 [label=AccumulateGrad]
|
||
|
140509588402240 -> 140509588373456
|
||
|
140509591293520 [label="encoder.layer.10.crossattention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591293520 -> 140509588402240
|
||
|
140509588402240 [label=AccumulateGrad]
|
||
|
140509588372016 -> 140509588372496
|
||
|
140509588372016 [label=TBackward0]
|
||
|
140509588372688 -> 140509588372016
|
||
|
140509588372688 [label=ToCopyBackward0]
|
||
|
140509588373168 -> 140509588372688
|
||
|
140509591289920 [label="encoder.layer.10.experts.experts.0.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591289920 -> 140509588373168
|
||
|
140509588373168 [label=AccumulateGrad]
|
||
|
140509588371008 -> 140509588371440
|
||
|
140509588371008 [label=TBackward0]
|
||
|
140509588372208 -> 140509588371008
|
||
|
140509588372208 [label=ToCopyBackward0]
|
||
|
140509588372928 -> 140509588372208
|
||
|
140509591290240 [label="encoder.layer.10.experts.experts.0.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591290240 -> 140509588372928
|
||
|
140509588372928 [label=AccumulateGrad]
|
||
|
140509588370960 -> 140509588370864
|
||
|
140509588370528 -> 140509588370672
|
||
|
140509591285328 [label="encoder.layer.10.experts.experts.0.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591285328 -> 140509588370528
|
||
|
140509588370528 [label=AccumulateGrad]
|
||
|
140509588370576 -> 140509588370672
|
||
|
140509591285248 [label="encoder.layer.10.experts.experts.0.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591285248 -> 140509588370576
|
||
|
140509588370576 [label=AccumulateGrad]
|
||
|
140509588370384 -> 140509588370192
|
||
|
140509588370384 [label=UnsqueezeBackward0]
|
||
|
140509588371056 -> 140509588370384
|
||
|
140509588371056 [label=NativeLayerNormBackward0]
|
||
|
140509588371536 -> 140509588371056
|
||
|
140509588371536 [label=AddBackward0]
|
||
|
140509588373072 -> 140509588371536
|
||
|
140509588373072 [label=NativeDropoutBackward0]
|
||
|
140509588371968 -> 140509588373072
|
||
|
140509588371968 [label=ViewBackward0]
|
||
|
140509588402288 -> 140509588371968
|
||
|
140509588402288 [label=AddmmBackward0]
|
||
|
140509588403248 -> 140509588402288
|
||
|
140509588403248 [label=ToCopyBackward0]
|
||
|
140509588403440 -> 140509588403248
|
||
|
140509591284528 [label="encoder.layer.10.experts.experts.1.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591284528 -> 140509588403440
|
||
|
140509588403440 [label=AccumulateGrad]
|
||
|
140509588402960 -> 140509588402288
|
||
|
140509588402960 [label=ViewBackward0]
|
||
|
140509588403536 -> 140509588402960
|
||
|
140509588403536 [label=GeluBackward0]
|
||
|
140509588405360 -> 140509588403536
|
||
|
140509588405360 [label=ViewBackward0]
|
||
|
140509588404592 -> 140509588405360
|
||
|
140509588404592 [label=AddmmBackward0]
|
||
|
140509588429456 -> 140509588404592
|
||
|
140509588429456 [label=ToCopyBackward0]
|
||
|
140517615321776 -> 140509588429456
|
||
|
140509591284768 [label="encoder.layer.10.experts.experts.1.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591284768 -> 140517615321776
|
||
|
140517615321776 [label=AccumulateGrad]
|
||
|
140509588427248 -> 140509588404592
|
||
|
140509588427248 [label=ViewBackward0]
|
||
|
140517615321488 -> 140509588427248
|
||
|
140517615321488 [label=ToCopyBackward0]
|
||
|
140509588370960 -> 140517615321488
|
||
|
140509588404688 -> 140509588404592
|
||
|
140509588404688 [label=TBackward0]
|
||
|
140517615321728 -> 140509588404688
|
||
|
140517615321728 [label=ToCopyBackward0]
|
||
|
140517615321968 -> 140517615321728
|
||
|
140509591285088 [label="encoder.layer.10.experts.experts.1.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591285088 -> 140517615321968
|
||
|
140517615321968 [label=AccumulateGrad]
|
||
|
140509588402768 -> 140509588402288
|
||
|
140509588402768 [label=TBackward0]
|
||
|
140509588405648 -> 140509588402768
|
||
|
140509588405648 [label=ToCopyBackward0]
|
||
|
140509588405072 -> 140509588405648
|
||
|
140509591284848 [label="encoder.layer.10.experts.experts.1.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591284848 -> 140509588405072
|
||
|
140509588405072 [label=AccumulateGrad]
|
||
|
140509588370960 -> 140509588371536
|
||
|
140509588371344 -> 140509588371056
|
||
|
140509591284608 [label="encoder.layer.10.experts.experts.1.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591284608 -> 140509588371344
|
||
|
140509588371344 [label=AccumulateGrad]
|
||
|
140509588370480 -> 140509588371056
|
||
|
140509591285008 [label="encoder.layer.10.experts.experts.1.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591285008 -> 140509588370480
|
||
|
140509588370480 [label=AccumulateGrad]
|
||
|
140509588370000 -> 140509588370096
|
||
|
140509588370000 [label=UnsqueezeBackward0]
|
||
|
140509588371824 -> 140509588370000
|
||
|
140509588371824 [label=UnsqueezeBackward0]
|
||
|
140509588371728 -> 140509588371824
|
||
|
140509588371728 [label=MulBackward0]
|
||
|
140509588370048 -> 140509588371728
|
||
|
140509588370048 [label=SoftmaxBackward0]
|
||
|
140509588403824 -> 140509588370048
|
||
|
140509588403824 [label=MmBackward0]
|
||
|
140517615321824 -> 140509588403824
|
||
|
140517615321824 [label=ToCopyBackward0]
|
||
|
140517615321872 -> 140517615321824
|
||
|
140517615321872 [label=DivBackward0]
|
||
|
140517615322160 -> 140517615321872
|
||
|
140517615322160 [label=SumBackward1]
|
||
|
140517615322256 -> 140517615322160
|
||
|
140517615322256 [label=MulBackward0]
|
||
|
140509588370960 -> 140517615322256
|
||
|
140517615322064 -> 140509588403824
|
||
|
140517615322064 [label=TBackward0]
|
||
|
140517615322208 -> 140517615322064
|
||
|
140517615322208 [label=ToCopyBackward0]
|
||
|
140517615322304 -> 140517615322208
|
||
|
140509591291120 [label="encoder.layer.10.experts.gate.weight
|
||
|
(2, 768)" fillcolor=lightblue]
|
||
|
140509591291120 -> 140517615322304
|
||
|
140517615322304 [label=AccumulateGrad]
|
||
|
140509588369520 -> 140509588315344
|
||
|
140509588369520 [label=IndexBackward0]
|
||
|
140509588370768 -> 140509588369520
|
||
|
140509588370768 [label=NativeLayerNormBackward0]
|
||
|
140509588372448 -> 140509588370768
|
||
|
140509588372448 [label=AddBackward0]
|
||
|
140517615322352 -> 140509588372448
|
||
|
140517615322352 [label=NativeDropoutBackward0]
|
||
|
140517615322016 -> 140517615322352
|
||
|
140517615322016 [label=ViewBackward0]
|
||
|
140517615322496 -> 140517615322016
|
||
|
140517615322496 [label=AddmmBackward0]
|
||
|
140517615322592 -> 140517615322496
|
||
|
140517615322592 [label=ToCopyBackward0]
|
||
|
140517615322784 -> 140517615322592
|
||
|
140509591293040 [label="encoder.layer.10.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591293040 -> 140517615322784
|
||
|
140517615322784 [label=AccumulateGrad]
|
||
|
140517615322544 -> 140517615322496
|
||
|
140517615322544 [label=ViewBackward0]
|
||
|
140517615322832 -> 140517615322544
|
||
|
140517615322832 [label=GeluBackward0]
|
||
|
140517615322928 -> 140517615322832
|
||
|
140517615322928 [label=ViewBackward0]
|
||
|
140517615323024 -> 140517615322928
|
||
|
140517615323024 [label=AddmmBackward0]
|
||
|
140517615323120 -> 140517615323024
|
||
|
140517615323120 [label=ToCopyBackward0]
|
||
|
140517615323312 -> 140517615323120
|
||
|
140509591293280 [label="encoder.layer.10.intermediate.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591293280 -> 140517615323312
|
||
|
140517615323312 [label=AccumulateGrad]
|
||
|
140517615323072 -> 140517615323024
|
||
|
140517615323072 [label=ViewBackward0]
|
||
|
140517615323360 -> 140517615323072
|
||
|
140517615323360 [label=ToCopyBackward0]
|
||
|
140517615322112 -> 140517615323360
|
||
|
140517615322112 [label=SliceBackward0]
|
||
|
140517615323504 -> 140517615322112
|
||
|
140517615323504 [label=SliceBackward0]
|
||
|
140517615323600 -> 140517615323504
|
||
|
140517615323600 [label=SliceBackward0]
|
||
|
140509588428592 -> 140517615323600
|
||
|
140517615322736 -> 140517615323024
|
||
|
140517615322736 [label=TBackward0]
|
||
|
140517615323264 -> 140517615322736
|
||
|
140517615323264 [label=ToCopyBackward0]
|
||
|
140517615323696 -> 140517615323264
|
||
|
140509591293600 [label="encoder.layer.10.intermediate.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591293600 -> 140517615323696
|
||
|
140517615323696 [label=AccumulateGrad]
|
||
|
140517615322400 -> 140517615322496
|
||
|
140517615322400 [label=TBackward0]
|
||
|
140517615322976 -> 140517615322400
|
||
|
140517615322976 [label=ToCopyBackward0]
|
||
|
140517615323456 -> 140517615322976
|
||
|
140509591293360 [label="encoder.layer.10.output.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591293360 -> 140517615323456
|
||
|
140517615323456 [label=AccumulateGrad]
|
||
|
140517615322112 -> 140509588372448
|
||
|
140509588369808 -> 140509588370768
|
||
|
140509591293120 [label="encoder.layer.10.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591293120 -> 140509588369808
|
||
|
140509588369808 [label=AccumulateGrad]
|
||
|
140509588403104 -> 140509588370768
|
||
|
140509591292800 [label="encoder.layer.10.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591292800 -> 140509588403104
|
||
|
140509588403104 [label=AccumulateGrad]
|
||
|
140509588347344 -> 140509588348304
|
||
|
140509588347344 [label=TBackward0]
|
||
|
140509588348880 -> 140509588347344
|
||
|
140509588348880 [label=ToCopyBackward0]
|
||
|
140509588402480 -> 140509588348880
|
||
|
140509591291360 [label="encoder.layer.11.attention.self.query.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591291360 -> 140509588402480
|
||
|
140509588402480 [label=AccumulateGrad]
|
||
|
140509588347104 -> 140509588347248
|
||
|
140509588347104 [label=UnsafeViewBackward0]
|
||
|
140509588347920 -> 140509588347104
|
||
|
140509588347920 [label=CloneBackward0]
|
||
|
140509588348208 -> 140509588347920
|
||
|
140509588348208 [label=ExpandBackward0]
|
||
|
140509588348688 -> 140509588348208
|
||
|
140509588348688 [label=TransposeBackward0]
|
||
|
140509588347632 -> 140509588348688
|
||
|
140509588347632 [label=PermuteBackward0]
|
||
|
140509588369712 -> 140509588347632
|
||
|
140509588369712 [label=ViewBackward0]
|
||
|
140517615322448 -> 140509588369712
|
||
|
140517615322448 [label=ViewBackward0]
|
||
|
140517615322688 -> 140517615322448
|
||
|
140517615322688 [label=AddmmBackward0]
|
||
|
140517615323216 -> 140517615322688
|
||
|
140517615323216 [label=ToCopyBackward0]
|
||
|
140517615323408 -> 140517615323216
|
||
|
140509591291920 [label="encoder.layer.11.attention.self.key.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591291920 -> 140517615323408
|
||
|
140517615323408 [label=AccumulateGrad]
|
||
|
140517615323168 -> 140517615322688
|
||
|
140517615323168 [label=ViewBackward0]
|
||
|
140517615323744 -> 140517615323168
|
||
|
140517615323744 [label=ToCopyBackward0]
|
||
|
140509588315344 -> 140517615323744
|
||
|
140517615321200 -> 140517615322688
|
||
|
140517615321200 [label=TBackward0]
|
||
|
140517615322880 -> 140517615321200
|
||
|
140517615322880 [label=ToCopyBackward0]
|
||
|
140517615323888 -> 140517615322880
|
||
|
140509591291600 [label="encoder.layer.11.attention.self.key.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591291600 -> 140517615323888
|
||
|
140517615323888 [label=AccumulateGrad]
|
||
|
140509588345808 -> 140509588345616
|
||
|
140509588345808 [label=UnsafeViewBackward0]
|
||
|
140509588346144 -> 140509588345808
|
||
|
140509588346144 [label=CloneBackward0]
|
||
|
140509588346576 -> 140509588346144
|
||
|
140509588346576 [label=ExpandBackward0]
|
||
|
140509588346864 -> 140509588346576
|
||
|
140509588346864 [label=PermuteBackward0]
|
||
|
140509588346000 -> 140509588346864
|
||
|
140509588346000 [label=ViewBackward0]
|
||
|
140509588348112 -> 140509588346000
|
||
|
140509588348112 [label=ViewBackward0]
|
||
|
140509588348400 -> 140509588348112
|
||
|
140509588348400 [label=AddmmBackward0]
|
||
|
140509588369616 -> 140509588348400
|
||
|
140509588369616 [label=ToCopyBackward0]
|
||
|
140517615323648 -> 140509588369616
|
||
|
140509591292160 [label="encoder.layer.11.attention.self.value.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591292160 -> 140517615323648
|
||
|
140517615323648 [label=AccumulateGrad]
|
||
|
140509588369904 -> 140509588348400
|
||
|
140509588369904 [label=ViewBackward0]
|
||
|
140517615323984 -> 140509588369904
|
||
|
140517615323984 [label=ToCopyBackward0]
|
||
|
140509588315344 -> 140517615323984
|
||
|
140517615321920 -> 140509588348400
|
||
|
140517615321920 [label=TBackward0]
|
||
|
140517615323552 -> 140517615321920
|
||
|
140517615323552 [label=ToCopyBackward0]
|
||
|
140517615324032 -> 140517615323552
|
||
|
140509591291840 [label="encoder.layer.11.attention.self.value.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591291840 -> 140517615324032
|
||
|
140517615324032 [label=AccumulateGrad]
|
||
|
140509588315536 -> 140509588315824
|
||
|
140509588315536 [label=TBackward0]
|
||
|
140509588345328 -> 140509588315536
|
||
|
140509588345328 [label=ToCopyBackward0]
|
||
|
140509588345712 -> 140509588345328
|
||
|
140509591291440 [label="encoder.layer.11.attention.output.dense.weight
|
||
|
(768, 768)" fillcolor=lightblue]
|
||
|
140509591291440 -> 140509588345712
|
||
|
140509588345712 [label=AccumulateGrad]
|
||
|
140509588315344 -> 140509588314960
|
||
|
140509588315056 -> 140509588314768
|
||
|
140509591290960 [label="encoder.layer.11.attention.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591290960 -> 140509588315056
|
||
|
140509588315056 [label=AccumulateGrad]
|
||
|
140509588313568 -> 140509588314768
|
||
|
140509591291200 [label="encoder.layer.11.attention.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591291200 -> 140509588313568
|
||
|
140509588313568 [label=AccumulateGrad]
|
||
|
140509588312272 -> 140509588313328
|
||
|
140509588312272 [label=TBackward0]
|
||
|
140509588313904 -> 140509588312272
|
||
|
140509588313904 [label=ToCopyBackward0]
|
||
|
140509588314576 -> 140509588313904
|
||
|
140509591260912 [label="encoder.layer.11.experts.experts.0.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591260912 -> 140509588314576
|
||
|
140509588314576 [label=AccumulateGrad]
|
||
|
140509588312848 -> 140509588313232
|
||
|
140509588312848 [label=TBackward0]
|
||
|
140509588312128 -> 140509588312848
|
||
|
140509588312128 [label=ToCopyBackward0]
|
||
|
140509588314192 -> 140509588312128
|
||
|
140509591260592 [label="encoder.layer.11.experts.experts.0.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591260592 -> 140509588314192
|
||
|
140509588314192 [label=AccumulateGrad]
|
||
|
140509588312608 -> 140509591317376
|
||
|
140509591314832 -> 140509591314640
|
||
|
140509591260352 [label="encoder.layer.11.experts.experts.0.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591260352 -> 140509591314832
|
||
|
140509591314832 [label=AccumulateGrad]
|
||
|
140509591317568 -> 140509591314640
|
||
|
140509591260832 [label="encoder.layer.11.experts.experts.0.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591260832 -> 140509591317568
|
||
|
140509591317568 [label=AccumulateGrad]
|
||
|
140509591315408 -> 140509588282864
|
||
|
140509591315408 [label=UnsqueezeBackward0]
|
||
|
140509591268800 -> 140509591315408
|
||
|
140509591268800 [label=NativeLayerNormBackward0]
|
||
|
140509588313088 -> 140509591268800
|
||
|
140509588313088 [label=AddBackward0]
|
||
|
140509588314864 -> 140509588313088
|
||
|
140509588314864 [label=NativeDropoutBackward0]
|
||
|
140509588312224 -> 140509588314864
|
||
|
140509588312224 [label=ViewBackward0]
|
||
|
140509588314000 -> 140509588312224
|
||
|
140509588314000 [label=AddmmBackward0]
|
||
|
140509588315008 -> 140509588314000
|
||
|
140509588315008 [label=ToCopyBackward0]
|
||
|
140509588315920 -> 140509588315008
|
||
|
140509591259952 [label="encoder.layer.11.experts.experts.1.output_query.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591259952 -> 140509588315920
|
||
|
140509588315920 [label=AccumulateGrad]
|
||
|
140509588315152 -> 140509588314000
|
||
|
140509588315152 [label=ViewBackward0]
|
||
|
140509588315488 -> 140509588315152
|
||
|
140509588315488 [label=GeluBackward0]
|
||
|
140509588345232 -> 140509588315488
|
||
|
140509588345232 [label=ViewBackward0]
|
||
|
140509588346384 -> 140509588345232
|
||
|
140509588346384 [label=AddmmBackward0]
|
||
|
140509588347056 -> 140509588346384
|
||
|
140509588347056 [label=ToCopyBackward0]
|
||
|
140509588345904 -> 140509588347056
|
||
|
140509591260192 [label="encoder.layer.11.experts.experts.1.intermediate_query.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591260192 -> 140509588345904
|
||
|
140509588345904 [label=AccumulateGrad]
|
||
|
140509588346624 -> 140509588346384
|
||
|
140509588346624 [label=ViewBackward0]
|
||
|
140517615323840 -> 140509588346624
|
||
|
140517615323840 [label=ToCopyBackward0]
|
||
|
140509588312608 -> 140517615323840
|
||
|
140509588346096 -> 140509588346384
|
||
|
140509588346096 [label=TBackward0]
|
||
|
140517615322640 -> 140509588346096
|
||
|
140517615322640 [label=ToCopyBackward0]
|
||
|
140517615323936 -> 140517615322640
|
||
|
140509591260112 [label="encoder.layer.11.experts.experts.1.intermediate_query.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591260112 -> 140517615323936
|
||
|
140517615323936 [label=AccumulateGrad]
|
||
|
140509588312464 -> 140509588314000
|
||
|
140509588312464 [label=TBackward0]
|
||
|
140509588344944 -> 140509588312464
|
||
|
140509588344944 [label=ToCopyBackward0]
|
||
|
140509588347728 -> 140509588344944
|
||
|
140509591259872 [label="encoder.layer.11.experts.experts.1.output_query.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591259872 -> 140509588347728
|
||
|
140509588347728 [label=AccumulateGrad]
|
||
|
140509588312608 -> 140509588313088
|
||
|
140509588313136 -> 140509591268800
|
||
|
140509591259632 [label="encoder.layer.11.experts.experts.1.output_query.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591259632 -> 140509588313136
|
||
|
140509588313136 [label=AccumulateGrad]
|
||
|
140509588312752 -> 140509591268800
|
||
|
140509591260432 [label="encoder.layer.11.experts.experts.1.output_query.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591260432 -> 140509588312752
|
||
|
140509588312752 [label=AccumulateGrad]
|
||
|
140509588282672 -> 140509588283152
|
||
|
140509588282672 [label=UnsqueezeBackward0]
|
||
|
140509591318432 -> 140509588282672
|
||
|
140509591318432 [label=UnsqueezeBackward0]
|
||
|
140509588314384 -> 140509591318432
|
||
|
140509588314384 [label=MulBackward0]
|
||
|
140509588315440 -> 140509588314384
|
||
|
140509588315440 [label=SoftmaxBackward0]
|
||
|
140509588345520 -> 140509588315440
|
||
|
140509588345520 [label=MmBackward0]
|
||
|
140509588312656 -> 140509588345520
|
||
|
140509588312656 [label=ToCopyBackward0]
|
||
|
140517615324128 -> 140509588312656
|
||
|
140517615324128 [label=DivBackward0]
|
||
|
140517615324320 -> 140517615324128
|
||
|
140517615324320 [label=SumBackward1]
|
||
|
140517615324416 -> 140517615324320
|
||
|
140517615324416 [label=MulBackward0]
|
||
|
140509588312608 -> 140517615324416
|
||
|
140517615323792 -> 140509588345520
|
||
|
140517615323792 [label=TBackward0]
|
||
|
140517615324368 -> 140517615323792
|
||
|
140517615324368 [label=ToCopyBackward0]
|
||
|
140517615324464 -> 140517615324368
|
||
|
140509591282928 [label="encoder.layer.11.experts.gate.weight
|
||
|
(2, 768)" fillcolor=lightblue]
|
||
|
140509591282928 -> 140517615324464
|
||
|
140517615324464 [label=AccumulateGrad]
|
||
|
140509588282432 -> 140509588281712
|
||
|
140509588282432 [label=IndexBackward0]
|
||
|
140509588283248 -> 140509588282432
|
||
|
140509588283248 [label=IndexBackward0]
|
||
|
140509591317952 -> 140509588283248
|
||
|
140509591317952 [label=NativeLayerNormBackward0]
|
||
|
140509588345040 -> 140509591317952
|
||
|
140509588345040 [label=AddBackward0]
|
||
|
140517615324560 -> 140509588345040
|
||
|
140517615324560 [label=NativeDropoutBackward0]
|
||
|
140517615324608 -> 140517615324560
|
||
|
140517615324608 [label=ViewBackward0]
|
||
|
140517615324704 -> 140517615324608
|
||
|
140517615324704 [label=AddmmBackward0]
|
||
|
140517615324800 -> 140517615324704
|
||
|
140517615324800 [label=ToCopyBackward0]
|
||
|
140517615324992 -> 140517615324800
|
||
|
140509591290400 [label="encoder.layer.11.output.dense.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591290400 -> 140517615324992
|
||
|
140517615324992 [label=AccumulateGrad]
|
||
|
140517615324752 -> 140517615324704
|
||
|
140517615324752 [label=ViewBackward0]
|
||
|
140517615325040 -> 140517615324752
|
||
|
140517615325040 [label=GeluBackward0]
|
||
|
140517615325136 -> 140517615325040
|
||
|
140517615325136 [label=ViewBackward0]
|
||
|
140517615324944 -> 140517615325136
|
||
|
140517615324944 [label=AddmmBackward0]
|
||
|
140517615382736 -> 140517615324944
|
||
|
140517615382736 [label=ToCopyBackward0]
|
||
|
140517615382928 -> 140517615382736
|
||
|
140509591290480 [label="encoder.layer.11.intermediate.dense.bias
|
||
|
(3072)" fillcolor=lightblue]
|
||
|
140509591290480 -> 140517615382928
|
||
|
140517615382928 [label=AccumulateGrad]
|
||
|
140517615382688 -> 140517615324944
|
||
|
140517615382688 [label=ViewBackward0]
|
||
|
140517615382976 -> 140517615382688
|
||
|
140517615382976 [label=ToCopyBackward0]
|
||
|
140517615324512 -> 140517615382976
|
||
|
140517615324512 [label=SliceBackward0]
|
||
|
140517615383120 -> 140517615324512
|
||
|
140517615383120 [label=SliceBackward0]
|
||
|
140517615383216 -> 140517615383120
|
||
|
140517615383216 [label=SliceBackward0]
|
||
|
140509588314768 -> 140517615383216
|
||
|
140517615382592 -> 140517615324944
|
||
|
140517615382592 [label=TBackward0]
|
||
|
140517615382880 -> 140517615382592
|
||
|
140517615382880 [label=ToCopyBackward0]
|
||
|
140517615383312 -> 140517615382880
|
||
|
140509591290640 [label="encoder.layer.11.intermediate.dense.weight
|
||
|
(3072, 768)" fillcolor=lightblue]
|
||
|
140509591290640 -> 140517615383312
|
||
|
140517615383312 [label=AccumulateGrad]
|
||
|
140517615324080 -> 140517615324704
|
||
|
140517615324080 [label=TBackward0]
|
||
|
140517615324896 -> 140517615324080
|
||
|
140517615324896 [label=ToCopyBackward0]
|
||
|
140517615383072 -> 140517615324896
|
||
|
140509591290720 [label="encoder.layer.11.output.dense.weight
|
||
|
(768, 3072)" fillcolor=lightblue]
|
||
|
140509591290720 -> 140517615383072
|
||
|
140517615383072 [label=AccumulateGrad]
|
||
|
140517615324512 -> 140509588345040
|
||
|
140509588314672 -> 140509591317952
|
||
|
140509591290160 [label="encoder.layer.11.output.LayerNorm.weight
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591290160 -> 140509588314672
|
||
|
140509588314672 [label=AccumulateGrad]
|
||
|
140509588313712 -> 140509591317952
|
||
|
140509591290000 [label="encoder.layer.11.output.LayerNorm.bias
|
||
|
(768)" fillcolor=lightblue]
|
||
|
140509591290000 -> 140509588313712
|
||
|
140509588313712 [label=AccumulateGrad]
|
||
|
140509588281712 -> 140509988778688
|
||
|
}
|