thug/Code/Gfx/NGPS/NX/vu0code.dsm

614 lines
20 KiB
Plaintext
Raw Permalink Normal View History

2016-02-13 21:39:12 +00:00
.global MPGStart0
.global MPGEnd0
.global TestFunc
.global InitialiseOccluders
.global TestSphereAgainstOccluders
.global RayTriangleCollision
.global BatchRayTriangleCollision
.global ViewCullTest
.global OuterCullTest
.global BothCullTests
; align to a 2^4=16 byte boundary, so it can be the target of a dma::ref
.align 4
MPGStart0:
MPG 0, *
;-----------------------------------------------------------------------------------------------------------------------------
TestFunc:
; just a little test func to make sure I understand how to communicate with a vu0 microprogram
; integer register test
NOP IADD VI02,VI01,VI01
; float register test
ADDw VF02,VF01,VF00w NOP
; memory test
NOP LQ VF03,0(VI00)
ADDw VF03,VF03,VF00w NOP
; quit
NOP[E] NOP
NOP NOP
InitialiseOccluders:
; this function will be optimised later... not a priority now since only called once per frame (per camera)
; assumes an initial state where the 5 planes of occluder 0 are at addresses 0,1,2,3,4
; occluder 1 at 5,6,7,8,9, etc, and the number of occluders is in VI01
; get address of 1st spare occluder
NOP IADD VI05,VI01,VI01
NOP IADD VI05,VI05,VI05
NOP IADD VI05,VI05,VI01
; set up a sentinel plane that's colossally far away
NOP LOI -1.0e+35
MULi VF01,VF00,I NOP
; add 1 sentinel occluder (5 sentinel planes)
NOP SQI VF01,(VI05++)
NOP SQI VF01,(VI05++)
NOP SQI VF01,(VI05++)
NOP SQI VF01,(VI05++)
NOP SQI VF01,(VI05++)
; rearrange memory
NOP IADDIU VI02,VI00,0
NOP MFIR.xyzw VF31,VI00
MemLoop:
; load 4 occluders, 5 planes each
NOP LQ VF01,0(VI02)
NOP LQ VF02,5(VI02)
NOP LQ VF03,10(VI02)
NOP LQ VF04,15(VI02)
NOP LQ VF05,1(VI02)
NOP LQ VF06,6(VI02)
NOP LQ VF07,11(VI02)
NOP LQ VF08,16(VI02)
NOP LQ VF09,2(VI02)
NOP LQ VF10,7(VI02)
NOP LQ VF11,12(VI02)
NOP LQ VF12,17(VI02)
NOP LQ VF13,3(VI02)
NOP LQ VF14,8(VI02)
NOP LQ VF15,13(VI02)
NOP LQ VF16,18(VI02)
NOP LQ VF17,4(VI02)
NOP LQ VF18,9(VI02)
NOP LQ VF19,14(VI02)
NOP LQ VF20,19(VI02)
; transpose VF01,VF02,VF03,VF04
NOP MOVE VF21,VF01
NOP MOVE VF22,VF02
NOP MOVE VF23,VF03
ADDx.y VF01,VF31,VF02x NOP
ADDx.z VF01,VF31,VF03x NOP
ADDx.w VF01,VF31,VF04x NOP
ADDy.x VF02,VF31,VF21y NOP
ADDy.z VF02,VF31,VF03y NOP
ADDy.w VF02,VF31,VF04y NOP
ADDz.x VF03,VF31,VF21z NOP
ADDz.y VF03,VF31,VF22z NOP
ADDz.w VF03,VF31,VF04z NOP
ADDw.x VF04,VF31,VF21w NOP
ADDw.y VF04,VF31,VF22w NOP
ADDw.z VF04,VF31,VF23w NOP
; transpose VF05,VF06,VF07,VF08
NOP MOVE VF21,VF05
NOP MOVE VF22,VF06
NOP MOVE VF23,VF07
ADDx.y VF05,VF31,VF06x NOP
ADDx.z VF05,VF31,VF07x NOP
ADDx.w VF05,VF31,VF08x NOP
ADDy.x VF06,VF31,VF21y NOP
ADDy.z VF06,VF31,VF07y NOP
ADDy.w VF06,VF31,VF08y NOP
ADDz.x VF07,VF31,VF21z NOP
ADDz.y VF07,VF31,VF22z NOP
ADDz.w VF07,VF31,VF08z NOP
ADDw.x VF08,VF31,VF21w NOP
ADDw.y VF08,VF31,VF22w NOP
ADDw.z VF08,VF31,VF23w NOP
; transpose VF09,VF10,VF11,VF12
NOP MOVE VF21,VF09
NOP MOVE VF22,VF10
NOP MOVE VF23,VF11
ADDx.y VF09,VF31,VF10x NOP
ADDx.z VF09,VF31,VF11x NOP
ADDx.w VF09,VF31,VF12x NOP
ADDy.x VF10,VF31,VF21y NOP
ADDy.z VF10,VF31,VF11y NOP
ADDy.w VF10,VF31,VF12y NOP
ADDz.x VF11,VF31,VF21z NOP
ADDz.y VF11,VF31,VF22z NOP
ADDz.w VF11,VF31,VF12z NOP
ADDw.x VF12,VF31,VF21w NOP
ADDw.y VF12,VF31,VF22w NOP
ADDw.z VF12,VF31,VF23w NOP
; transpose VF13,VF14,VF15,VF16
NOP MOVE VF21,VF13
NOP MOVE VF22,VF14
NOP MOVE VF23,VF15
ADDx.y VF13,VF31,VF14x NOP
ADDx.z VF13,VF31,VF15x NOP
ADDx.w VF13,VF31,VF16x NOP
ADDy.x VF14,VF31,VF21y NOP
ADDy.z VF14,VF31,VF15y NOP
ADDy.w VF14,VF31,VF16y NOP
ADDz.x VF15,VF31,VF21z NOP
ADDz.y VF15,VF31,VF22z NOP
ADDz.w VF15,VF31,VF16z NOP
ADDw.x VF16,VF31,VF21w NOP
ADDw.y VF16,VF31,VF22w NOP
ADDw.z VF16,VF31,VF23w NOP
; transpose VF17,VF18,VF19,VF20
NOP MOVE VF21,VF17
NOP MOVE VF22,VF18
NOP MOVE VF23,VF19
ADDx.y VF17,VF31,VF18x NOP
ADDx.z VF17,VF31,VF19x NOP
ADDx.w VF17,VF31,VF20x NOP
ADDy.x VF18,VF31,VF21y NOP
ADDy.z VF18,VF31,VF19y NOP
ADDy.w VF18,VF31,VF20y NOP
ADDz.x VF19,VF31,VF21z NOP
ADDz.y VF19,VF31,VF22z NOP
ADDz.w VF19,VF31,VF20z NOP
ADDw.x VF20,VF31,VF21w NOP
ADDw.y VF20,VF31,VF22w NOP
ADDw.z VF20,VF31,VF23w NOP
; store the rearranged occluders
NOP SQI VF04,(VI02++)
NOP SQI VF01,(VI02++)
NOP SQI VF02,(VI02++)
NOP SQI VF03,(VI02++)
NOP SQI VF08,(VI02++)
NOP SQI VF05,(VI02++)
NOP SQI VF06,(VI02++)
NOP SQI VF07,(VI02++)
NOP SQI VF12,(VI02++)
NOP SQI VF09,(VI02++)
NOP SQI VF10,(VI02++)
NOP SQI VF11,(VI02++)
NOP SQI VF16,(VI02++)
NOP SQI VF13,(VI02++)
NOP SQI VF14,(VI02++)
NOP SQI VF15,(VI02++)
NOP SQI VF20,(VI02++)
NOP SQI VF17,(VI02++)
NOP SQI VF18,(VI02++)
NOP SQI VF19,(VI02++)
; loop if we haven't just processed the sentinel planes
NOP ISUB VI01,VI02,VI05
NOP NOP
NOP IBLTZ VI01,MemLoop
NOP NOP
; quit
NOP[E] NOP
NOP NOP
;-----------------------------------------------------------------------------------------------------------------------------
TestSphereAgainstOccluders:
; sphere is in VF07 as (x,y,z,R)
; for each plane p, we must compute px*x + py*w + pz*z + pw + R
; uses a sentinel occluder to terminate
NOP IADDIU VI02,VI00,0 ; initialise pointer
; loop prologue
ADD.y VF05,VF00,VF00 LQI VF04,(VI02++)
NOP LQI VF01,(VI02++)
NOP LQI VF02,(VI02++)
NOP LQI VF03,(VI02++)
ADDAw ACC,VF04,VF07w LQI VF04,(VI02++)
MADDAx ACC,VF01,VF07x LQI VF01,(VI02++)
MADDAy ACC,VF02,VF07y LQI VF02,(VI02++)
MADDz VF00,VF03,VF07z IADDIU VI10,VI00,0xF0 ; VI10 = mask for Sxyzw FMAC flags
; main loop, tests 4 occluders (20 planes) per pass
Loop: ADDw.y VF05,VF05,VF00w LQI VF03,(VI02++)
ADDAw ACC,VF04,VF07w LQI VF04,(VI02++)
MADDAx ACC,VF01,VF07x LQI VF01,(VI02++)
MADDAy ACC,VF02,VF07y FMAND VI01,VI10
MADDz VF00,VF03,VF07z LQI VF02,(VI02++)
ADDw.y VF05,VF05,VF00w LQI VF03,(VI02++)
ADDAw ACC,VF04,VF07w LQI VF04,(VI02++)
MADDAx ACC,VF01,VF07x LQI VF01,(VI02++)
MADDAy ACC,VF02,VF07y FMAND VI01,VI01
MADDz VF00,VF03,VF07z LQI VF02,(VI02++)
ADDw.y VF05,VF05,VF00w LQI VF03,(VI02++)
ADDAw ACC,VF04,VF07w LQI VF04,(VI02++)
MADDAx ACC,VF01,VF07x LQI VF01,(VI02++)
MADDAy ACC,VF02,VF07y FMAND VI01,VI01
MADDz VF00,VF03,VF07z LQI VF02,(VI02++)
ADDw.y VF05,VF05,VF00w LQI VF03,(VI02++)
ADDAw ACC,VF04,VF07w LQI VF04,(VI02++)
MADDAx ACC,VF01,VF07x LQI VF01,(VI02++)
MADDAy ACC,VF02,VF07y FMAND VI01,VI01
MADDz VF00,VF03,VF07z LQI VF02,(VI02++)
FTOI0.y VF06,VF05 LQI VF03,(VI02++)
ADDAw ACC,VF04,VF07w LQI VF04,(VI02++)
MADDAx ACC,VF01,VF07x LQI VF01,(VI02++)
MADDAy ACC,VF02,VF07y FMAND VI01,VI01
NOP IBEQ VI01,VI00,Loop
MADDz VF00,VF03,VF07z LQI VF02,(VI02++)
; (macflags & 0x80) => possible occluder in 1st slot of group of 4
NOP ISUBIU VI10,VI01,0x80
NOP MTIR VI06,VF06y
NOP IBGEZ VI10,TestForSentinel
NOP ISUBIU VI03,VI02,22
; (macflags & 0x40) => possible occluder in 2nd slot of group of 4
NOP ISUBIU VI10,VI01,0x40
NOP IADDIU VI06,VI06,1
NOP IBGEZ VI10,TestForSentinel
NOP ISUBIU VI03,VI02,17
; (macflags & 0x20) => possible occluder in 3rd slot of group of 4
NOP ISUBIU VI10,VI01,0x20
NOP IADDIU VI06,VI06,1
NOP IBGEZ VI10,TestForSentinel
NOP ISUBIU VI03,VI02,12
; (macflags & 0x10) => possible occluder in 4th slot of group of 4
NOP IADDIU VI06,VI06,1
NOP ISUBIU VI03,VI02,7
TestForSentinel:
; sentinel if VI03==VI05, else occluded
NOP[E] ISUB VI01,VI03,VI05
NOP ISUBIU VI06,VI06,4
;-----------------------------------------------------------------------------------------------------------------------------
BatchRayTriangleCollision:
NOP ILW.y VI04, 0(VI00) ; total number of collision tests
NOP ILW.x VI05, 0(VI00) ; starting index
NOP IADDIU VI01, VI00, 0 ; init number of found collisions
NOP IADDIU VI03, VI00, 1 ; init address
NOP IADD VI06, VI00, VI03 ; init output address (writes over input)
NOP ISUB VI04, VI04, VI05 ; decrement the start index from the array size
NOP LQ VF06, 2(VI03) ; v0
NOP LQ VF08, 4(VI03) ; v2
NOP LQ VF07, 3(VI03) ; v1
NOP LQ VF04, 0(VI03) ; rayStart
NOP LQ VF05, 1(VI03) ; rayDir
NextCollision:
SUB.xyz VF10, VF08, VF06 NOP ; edge2 = v2 - v0
SUB.xyz VF09, VF07, VF06 MFIR.w VF17w, VI05 ; edge1 = v1 - v0 ; store index in VF17w (output)
SUB.xyz VF13, VF04, VF06 FSSET 0 ; tvec = rayStart - v0
ADDW.x VF02, VF00, VF00w LOI 1.00001 ; Load 1.0f to VF02x ; load 1 + EPSILON into I
OPMULA.xyz ACC, VF05, VF10 NOP ; Cross product of ray normal and edge2 (pvec)
OPMSUB.xyz VF12,VF10, VF05 NOP ; Second part of cross product
OPMULA.xyz ACC, VF13, VF09 IADDIU VI14, VI00, 0x80 ; qvec = crossProd(tvec, edge1) ; Set the mask X sign flag
OPMSUB.xyz VF14,VF09, VF13 IADDIU VI13, VI00, 0x10 ; Set the mask W sign flag
SUBi.x VF03,VF00, I NOP ; put -(1 + EPSILON) in VF03x
MUL.xyz VF11,VF09, VF12 NOP ; det = edge1 * pvec [start]
MUL.xyz VF15,VF13, VF12 NOP ; u = tvec * pvec [start]
MULAx.x ACC, VF09, VF12x IADDI VI04, VI04, -1 ; decrement number of collision tests
MADDAy.x ACC, VF02, VF11y IADDIU VI03, VI03, 5 ; increment address
MADDz.x VF11,VF02, VF11z NOP ; det = edge1 * pvec [ready]
MULAx.x ACC, VF02, VF15x LOI 0.00001 ; load 1 + EPSILON into I
MADDAy.x ACC, VF02, VF15y LQ VF04, 0(VI03) ; next rayStart
MUL.xyz VF17,VF10, VF14 NOP ; t = edge2 * qvec [start]
SUBi.x VF01,VF11, I DIV Q, VF00w,VF11x ; If det < EPSILON ; Q = 1.0f / det
MADDz.x VF15,VF02, VF15z LQ VF06, 2(VI03) ; u = tvec * pvec [ready] ; next v0
MULAx.x ACC, VF10, VF14x LQ VF08, 4(VI03) ; next v2
MADDAy.x ACC, VF02, VF17 LQ VF07, 3(VI03) ; next v1
MUL.xyz VF16,VF05, VF14 FMAND VI15,VI14 ; v = rayDir * qvec [start] ; check if det result (MAC register)
MADDz.x VF17,VF02, VF17z IBNE VI15,VI00,BatchRayTriDone ; t = edge2 * qvec [ready]
MULAx.x ACC, VF05, VF14x LQ VF05, 1(VI03) ; next rayDir
MULQ.x VF15,VF15, Q NOP ; u = (tvec * pvec) / det
MADDAy.x ACC, VF02, VF16 NOP
MADDz.x VF16,VF02, VF16z NOP ; v = rayDir * qvec [ready]
NOP NOP
ADDx.x VF00,VF15, VF03x FMAND VI15,VI14 ; if u > 1 + EPSILON ; check if u result (MAC register)
NOP IBNE VI15,VI00,BatchRayTriDone
MULQ.x VF16,VF16, Q NOP ; v = (rayDir * qvec) / det;
MULQ.x VF17,VF17, Q NOP ; t = (edge2 * qvec) / det
NOP FMAND VI15,VI14 ; check if u > 1 + EPSILON result (MAC register)
NOP IBEQ VI15,VI00,BatchRayTriDone
ADDA.x ACC ,VF15, VF16 FMAND VI15,VI14 ; check if v < 0 result (MAC register)
MADDw.x VF31,VF03, VF00w IBNE VI15,VI00,BatchRayTriDone ; if (u+v) > 1 + EPSILON [start]
NOP NOP
SUB.x VF00,VF17, VF00 NOP
NOP NOP
SUBx.w VF00,VF00, VF17 FMAND VI15,VI14 ; if t > 1 [start] ; check if u+v > 1 + EPSILON result (MAC register)
NOP IBEQ VI15,VI00,BatchRayTriDone
NOP FMAND VI15,VI14 ; check if t < 0 result (MAC register)
NOP IBNE VI15,VI00,BatchRayTriDone
NOP FMAND VI15,VI13 ; check if t > 1 result (MAC register)
NOP IBNE VI15,VI00,BatchRayTriDone
NOP NOP
NOP SQI.xw VF17, (VI06++) ; store distance (x) and index (w)
NOP IADDIU VI01, VI01, 1 ; inc number of found collisions
BatchRayTriDone:
NOP IBNE VI04, VI00,NextCollision
NOP IADDIU VI05, VI05, 1 ; increment index for output
NOP[T] NOP
NOP NOP
RayTriangleCollision:
SUB.xyz VF10, VF08, VF06 NOP ; edge2 = v2 - v0
SUB.xyz VF09, VF07, VF06 FSSET 0 ; edge1 = v1 - v0
SUB.xyz VF13, VF04, VF06 NOP ; tvec = rayStart - v0
ADDW.x VF02, VF00, VF00w LOI 1.00001 ; Load 1.0f to VF02x ; load 1 + EPSILON into I
OPMULA.xyz ACC, VF05, VF10 NOP ; Cross product of ray normal and edge2 (pvec)
OPMSUB.xyz VF12,VF10, VF05 NOP ; Second part of cross product
OPMULA.xyz ACC, VF13, VF09 IADDIU VI14, VI00, 0x80 ; qvec = crossProd(tvec, edge1) ; Set the mask X sign flag
OPMSUB.xyz VF14,VF09, VF13 IADDIU VI13, VI00, 0x10 ; Set the mask W sign flag
SUBi.x VF03,VF00, I IADDIU VI02, VI00, 0 ; put -(1 + EPSILON) in VF03x ; store 0 for return value
MUL.xyz VF11,VF09, VF12 NOP ; det = edge1 * pvec [start]
MUL.xyz VF15,VF13, VF12 NOP ; u = tvec * pvec [start]
MULAx.x ACC, VF09, VF12x NOP
MADDAy.x ACC, VF02, VF11y NOP
MADDz.x VF11,VF02, VF11z NOP ; det = edge1 * pvec [ready]
MULAx.x ACC, VF02, VF15x LOI 0.00001 ; load 1 + EPSILON into I
MADDAy.x ACC, VF02, VF15y NOP
MUL.xyz VF17,VF10, VF14 NOP ; t = edge2 * qvec [start]
SUBi.x VF01,VF11, I DIV Q, VF00w,VF11x ; If det < EPSILON ; Q = 1.0f / det
MADDz.x VF15,VF02, VF15z NOP ; u = tvec * pvec [ready]
MULAx.x ACC, VF10, VF14x NOP
MADDAy.x ACC, VF02, VF17 NOP
MUL.xyz VF16,VF05, VF14 FMAND VI15,VI14 ; v = rayDir * qvec [start] ; check if det result (MAC register)
MADDz.x VF17,VF02, VF17z IBNE VI15,VI00,vu0RayTriDone ; t = edge2 * qvec [ready]
MULAx.x ACC, VF05, VF14x NOP
MULQ.x VF15,VF15, Q NOP ; u = (tvec * pvec) / det
MADDAy.x ACC, VF02, VF16 NOP
MADDz.x VF16,VF02, VF16z NOP ; v = rayDir * qvec [ready]
NOP NOP
ADDx.x VF00,VF15, VF03x FMAND VI15,VI14 ; if u > 1 + EPSILON ; check if u result (MAC register)
NOP IBNE VI15,VI00,vu0RayTriDone
MULQ.x VF16,VF16, Q NOP ; v = (rayDir * qvec) / det;
MULQ.x VF17,VF17, Q NOP ; t = (edge2 * qvec) / det
NOP FMAND VI15,VI14 ; check if u > 1 + EPSILON result (MAC register)
NOP IBEQ VI15,VI00,vu0RayTriDone
ADDA.x ACC ,VF15, VF16 FMAND VI15,VI14 ; check if v < 0 result (MAC register)
MADDw.x VF31,VF03, VF00w IBNE VI15,VI00,vu0RayTriDone ; if (u+v) > 1 + EPSILON [start]
NOP NOP
SUB.x VF00,VF17, VF00 NOP
NOP NOP
SUBx.w VF00,VF00, VF17 FMAND VI15,VI14 ; if t > 1 [start] ; check if u+v > 1 + EPSILON result (MAC register)
NOP IBEQ VI15,VI00,vu0RayTriDone
NOP FMAND VI15,VI14 ; check if t < 0 result (MAC register)
NOP IBNE VI15,VI00,vu0RayTriDone
NOP FMAND VI15,VI13 ; check if t > 1 result (MAC register)
NOP IBNE VI15,VI00,vu0RayTriDone
NOP NOP
;qmfc2 $9, $vf17 # move t to $9
;sw $9, 0(%1) # and write out
NOP IADDIU VI02, VI00, 1 ; store 1 for return value
vu0RayTriDone:
NOP[E] NOP
NOP NOP
;-----------------------------------------------------------------------------------------------------------------------------
; new bounding volume / frustum intersection tests
; VF08 = (x0,y0,z0,r)
; VF09 = (bx,by,bz,?)
; VF10 = (nx)
; VF11 = (ny)
; VF12 = (nz)
; VF13 = (|nx|)
; VF14 = (|ny|)
; VF15 = (|nz|)
; VF16 = (nw)
; VF17 = (nx)
; VF18 = (ny)
; VF19 = (nz)
; VF20 = (|nx|)
; VF21 = (|ny|)
; VF22 = (|nz|)
; VF23 = (nw)
; VF24 = (nx)
; VF25 = (ny)
; VF26 = (nz)
; VF27 = (|nx|)
; VF28 = (|ny|)
; VF29 = (|nz|)
; VF30 = (nw)
; VF31 = (1)
ViewCullTest:
ADDAx ACC,VF16,VF00x NOP
MADDAx ACC,VF10,VF08x NOP
MADDAy ACC,VF11,VF08y NOP
MADDAz ACC,VF12,VF08z NOP
MADDw VF00,VF31,VF08w NOP
MADDAx ACC,VF13,VF09x NOP
MADDAy ACC,VF14,VF09y NOP
MADDz VF00,VF15,VF09z NOP
ADDAx.xy ACC,VF23,VF00x FMOR VI01,VI00
MADDAx.xy ACC,VF17,VF08x NOP
MADDAy.xy ACC,VF18,VF08y NOP
MADDAz.xy ACC,VF19,VF08z FMOR VI01,VI01
MADDw.xy VF00,VF31,VF08w NOP
MADDAx.xy ACC,VF20,VF09x NOP
MADDAy.xy ACC,VF21,VF09y NOP
MADDz.xy VF00,VF22,VF09z NOP
NOP FMOR VI01,VI01
NOP NOP
NOP NOP
NOP[E] FMOR VI01,VI01
NOP NOP
OuterCullTest:
ADDAx.zw ACC,VF23,VF00x NOP
MADDAx.zw ACC,VF17,VF08x NOP
MADDAy.zw ACC,VF18,VF08y NOP
MADDAz.zw ACC,VF19,VF08z NOP
MSUBw.zw VF00,VF31,VF08w NOP
MSUBAx.zw ACC,VF20,VF09x NOP
MSUBAy.zw ACC,VF21,VF09y NOP
MSUBz.zw VF00,VF22,VF09z NOP
ADDAx ACC,VF30,VF00x FMOR VI01,VI00
MADDAx ACC,VF24,VF08x NOP
MADDAy ACC,VF25,VF08y NOP
MADDAz ACC,VF26,VF08z FMOR VI01,VI01
MSUBw VF00,VF31,VF08w NOP
MSUBAx ACC,VF27,VF09x NOP
MSUBAy ACC,VF28,VF09y NOP
MSUBz VF00,VF29,VF09z NOP
NOP FMOR VI01,VI01
NOP NOP
NOP NOP
NOP[E] FMOR VI01,VI01
NOP NOP
BothCullTests:
MULAx.zw ACC,VF20,VF09x NOP
MADDAy.zw ACC,VF21,VF09y NOP
MADDz.zw VF01,VF22,VF09z NOP
ADDAx ACC,VF16,VF00x NOP
MADDAx ACC,VF10,VF08x NOP
MADDAy ACC,VF11,VF08y NOP
MADDAz ACC,VF12,VF08z NOP
MADDw VF00,VF31,VF08w NOP
MADDAx ACC,VF13,VF09x NOP
MADDAy ACC,VF14,VF09y NOP
MADDz VF00,VF15,VF09z NOP
ADDAx.zw ACC,VF23,VF00x FMOR VI01,VI00
MADDAx.zw ACC,VF17,VF08x NOP
MADDAy.zw ACC,VF18,VF08y NOP
MADDAz.zw ACC,VF19,VF08z FMOR VI01,VI01
MADDx.z VF05,VF00,VF00x NOP
MADDw.zw VF00,VF31,VF08w NOP
MADDw.zw VF00,VF01,VF00w NOP
MSUBw.zw VF00,VF31,VF08w NOP
MSUBw.zw VF00,VF01,VF00w NOP
ADDAx ACC,VF30,VF00x FMOR VI01,VI01
MADDAx ACC,VF24,VF08x FMOR VI01,VI01
MADDAy ACC,VF25,VF08y FMOR VI11,VI00
MADDAz ACC,VF26,VF08z FMOR VI11,VI11
MSUBw VF00,VF31,VF08w NOP
MSUBAx ACC,VF27,VF09x NOP
MSUBAy ACC,VF28,VF09y NOP
MSUBz VF00,VF29,VF09z NOP
SUBz.x VF05,VF00,VF05z FMOR VI11,VI11
NOP NOP
NOP NOP
NOP FMOR VI11,VI11
NOP[E] NOP
NOP NOP
;-----------------------------------------------------------------------------------------------------------------------------
.EndMPG
MPGEnd0: