thug/Code/Gfx/NGPS/NX/dma.cpp

1268 lines
30 KiB
C++
Raw Permalink Normal View History

2016-02-13 21:39:12 +00:00
#include <core/defines.h>
#include "dma.h"
#include "vif.h"
#include "vu1.h"
#include "gs.h"
#include "group.h"
#include "render.h"
#include "vu1code.h"
#include "switches.h"
#include "geomnode.h"
/*
Mick Notes:
The calls the the DMA functions suggest that you are immediately executing something.
You are not. You are simply building a list of DMA commands that get executed the next frame
The DMA list is the primary mechanism for initiating rendering on the PS2.
To render something (like a line), you "simply" need to generate the appropiate DMA
packets that contain GIF packets, containing GS primitives, and then link this into the
main DMA list.
Things such as world sectors have pre-build DMA lists that contain commands
to upload lists of vertices to VU1 micro-memory, and then upload lists of
triangles, and then trigger the appropiate VU1 microcode to transform and render them.
These pre-built lists are linked in each frame with the appropiate transformation
matricies that are needed based on the current camera position and the current
object position (if it is a moving object)
Thus, each object is mostly pre-build, and so rendering it requires very little CPU time.
Each frame, we build a series of dynamic DMA lists.
There are two lists for each group (see group.cpp).
The first list is the texture upload. This is pretty simple, just uploading a few textures.
Texture uploading happens asyncrnously with rendering, so we can be executing a DMA list
of rendering stuff at the same time as we are uploading the textures for the next group.
A lot of the DMA code is in DMA.H, as it is small inline functions.
The Runtime buffer is dynamically allocated in InitializeEngine(). Last time I looked it
was 512K in size (note though, this is increased to 16MB for the debugging wireframe mode)
The Runtime DMA lists are double buffered. You are executing one whilst building the next one.
The pRunTimeBuffer is split in two, and pointers to the base of each half are stored in
pList[0] and pList[1]
The integer varaible "Field" is 0 or 1, and indicates which field (odd or even) we are rendering to.
The DMA lists are build using a global variable char * pLoc.
At the start of a frame, pLoc is initilized like:
dma::pLoc = dma::pList[Field];
So it points to the start of one of our 256K DMA buffers (remember the other is being executed)
DMA packets are now built using calls to the functions here and in DMA.H, for example:
dma::Tag(dma::ref, ((uint8 *)&MPGEnd-(uint8 *)&MPGStart+15)/16, (uint)&MPGStart);
vif::NOP();
vif::NOP();
(the above simply transfers the microcode from RAM to VU1 micromem)
Now, it's important to note that DMA execution does NOT simply start at pList[Field] and
then run all the way through a frame's worth of data. There are actually several DMA lists,
which are fired off individually. Some of these (the groups) are logically connected so that
the end of one group's DMA (and the GS/VU activity it triggers) causes an interrupt,
which the CPU handles by starting the DMA for another group (see interrupt.cpp)
After the groups, there is the "immediate mode" DMA list, triggered by pEpilogue->pRender[Field]
this is simply a raw DMA list that you can put anything you want into. It starts at the value
of pLoc after RenderWorld has been called (a terrible hack, as noted in the code :)
All of the "immediate mode" rendering is considered to be part of pEpilogue. It is terminated
by the RenderEpilogue() function, which just links in a final interrupt trigger, which will
get picked up by GsHandler(), and sGroup::pRenderGroup will be set to NULL, which is what
WaitForRendering() uses to determine if rendering has finished.
*/
namespace NxPs2
{
// begin a subroutine
void dma::BeginSub(eTag ID)
{
Align(0,16);
pSub = pLoc;
dma::ID = ID;
vu1::Loc = vu1::Buffer = 0;
}
// end a subroutine and return its address
uint64 dma::EndSub(void)
{
uint64 GosubTag;
Align(0,16);
GosubTag = ((uint64)pSub<<32) | ((uint64)ID<<28) | (vu1::Loc & 0x3FF) << 16;
Dbg_MsgAssert(ID!=refe && ID!=refs, ("refe and refs not supported in dma tag!"));
if (ID==ref)
{
GosubTag |= (pLoc - pSub) >> 4;
}
return GosubTag;
}
// call a subroutine (using a dma::call or a dma::ref)
void dma::Gosub(uint Num, uint Path)
{
register uint64 Tag = Gosubs[Num];
Store64(Tag);
switch (Path)
{
case 1:
vif::BASE(vu1::Loc);
vif::OFFSET(0);
vu1::Loc += (Tag>>16) & 0x3FF; // VUMem size
break;
case 2:
vif::NOP();
vif::NOP();
break;
case 3:
pLoc += 8; // no need to write anything since TTE=0
break;
#ifdef __PLAT_NGPS__
default:
printf("error: dma::Gosub() called with unrecognised path number\n");
exit(1);
#endif
}
}
// begin a 3D subroutine
void dma::BeginSub3D(void)
{
Align(0,16);
pSub = pLoc;
vu1::Loc = vu1::Buffer = 0;
}
// end a 3D subroutine and return its address
uint8 *dma::EndSub3D(void)
{
((uint16 *)pSub)[1] |= vu1::Loc&0x3FF;
return pSub;
}
// call a 3D subroutine (always using a dma::call)
void dma::Gosub3D(uint8 *pSub, uint RenderFlags)
{
BeginTag(call,(uint)pSub);
vif::BASE(vu1::Loc);
vif::OFFSET(0);
vif::ITOP(RenderFlags);
EndTag();
vu1::Loc += ((uint16 *)pSub)[1];
}
// dma list traversal function
uint8 *dma::NextTag(uint8 *pTag, bool stepInto)
{
Dbg_MsgAssert((*(uint32 *)pTag&0x80000000)==0, ("IRQ bit set in dma tag"));
Dbg_MsgAssert(*(uint32 *)pTag!=refe<<28 && *(uint32 *)pTag!=refs<<28, ("refe and refs not supported in dma tag!"));
switch (*(uint32 *)pTag>>28)
{
case cnt:
return pTag + 16 + ((uint)((uint16 *)pTag)[0] << 4);
case next:
return ((uint8 **)pTag)[1];
case ref:
return pTag + 16;
case call:
Dbg_MsgAssert(sp<2, ("dma call stack overflow"));
if (stepInto)
{
Stack[sp++] = pTag + 16 + ((uint)((uint16 *)pTag)[0] << 4);
return ((uint8 **)pTag)[1];
}
else
{
return pTag + 16 + ((uint)((uint16 *)pTag)[0] << 4);
}
case ret:
Dbg_MsgAssert(sp>0, ("dma call stack underflow"));
return Stack[--sp];
case end:
default:
return NULL;
}
}
#ifdef __PLAT_NGPS__
// auxilliary comparison function for dma sort
int dma::Cmp(const void *p1, const void *p2)
{
return ((SSortElement *)p1)->z < ((SSortElement *)p2)->z ? -1 :
((SSortElement *)p1)->z > ((SSortElement *)p2)->z ? +1 :
0 ;
}
#if 0
// original version
// sort a dma list of mesh packets on z
// the z is stored in the unused ADDR word of the first cnt tag of the mesh packet
uint8 *dma::SortGroup(uint8 *pList)
{
int num_elements;
SSortElement *p_element;
uint8 *p_tag, *p_end_tag, *p_prev_tag=NULL;
eTag ID;
// set array base at start of scratchpad
SSortElement *p_array = (SSortElement *)0x70000000;
// copy the addresses and z-values into array
p_element = p_array;
p_tag = pList;
sp = 0; // starting at top level of dma list
while ((ID = (eTag)(*(uint32 *)p_tag>>28)) != end)
{
if (ID == cnt)
{
p_element->address = p_tag;
p_element->z = ((float *)p_tag)[1];
p_element++;
}
p_tag = NextTag(p_tag, false);
}
// check array fits within scratchpad
num_elements = p_element-p_array;
Dbg_MsgAssert(num_elements*sizeof(SSortElement)<=16384, ("Can't fit array in scratchpad"));
// record address of end tag
p_end_tag = p_tag;
// sort the array
qsort(p_array, num_elements, sizeof(SSortElement), Cmp);
// reorder the dma list according to the sorted array
for (p_element=p_array; p_element<p_array+num_elements; p_element++)
{
p_tag = p_element->address;
do
{
p_prev_tag = p_tag;
p_tag = NextTag(p_tag, false);
ID = (eTag)(*(uint32 *)p_tag >> 28);
} while (ID!=cnt && ID!=end);
((uint8 **)p_prev_tag)[1] = (p_element+1)->address;
}
// patch up the final dma::next tag to point to the dma::end tag
((uint8 **)p_prev_tag)[1] = p_end_tag;
// chain through the whole list to adjust vu1 base pointers
vu1::Loc = 0;
for (p_tag = p_array->address; *(uint32 *)p_tag>>28 != end; p_tag = NextTag(p_tag, false))
{
//if (*(uint32 *)p_tag & 0x03FF0000)
if (((uint8 *)p_tag)[11] == 0x03)
{
((uint16 *)p_tag)[4] &= ~0x3FF;
((uint16 *)p_tag)[4] |= vu1::Loc;
vu1::Loc += ((uint16 *)p_tag)[1];
}
}
// return the address of the head tag
return p_array->address;
}
#else
// new version:
// the list is sorted in segments corresponding to viewports
// sort a dma list of mesh packets on z
// the z is stored in the unused ADDR word of the first cnt tag of the mesh packet
uint8 *dma::SortGroup(uint8 *pList)
{
int num_elements, i, num_segments;
SSortElement *p_element, *p_segment[4];
uint8 *p_tag, *p_end_tag, *p_prev_tag=NULL;
eTag ID;
// set array base at start of scratchpad
SSortElement *p_array = (SSortElement *)0x70000000;
// copy the addresses and z-values into array
p_element = p_array;
p_tag = pList;
sp = 0; // starting at top level of dma list
num_segments = 0;
while ((ID = (eTag)(*(uint32 *)p_tag>>28)) != end)
{
for (i=0; i<render::sMarkerIndex; i++)
{
if ((int)p_tag == render::sSortedListMarker[i])
{
//printf("matched marker %d\n", i);
p_segment[num_segments++] = p_element;
}
}
if (ID == cnt)
{
p_element->address = p_tag;
p_element->z = ((float *)p_tag)[1];
p_element++;
}
p_tag = NextTag(p_tag, false);
}
// check array fits within scratchpad
num_elements = p_element-p_array;
Dbg_MsgAssert(num_elements*sizeof(SSortElement)<=16384, ("Can't fit array in scratchpad"));
// record address of end tag
p_end_tag = p_tag;
// sort the array in segments
if (num_segments)
{
for (i=0; i<num_segments-1; i++)
{
//printf("sorting from %08X to %08X\n", p_segment[i], p_segment[i+1]);
qsort(p_segment[i], p_segment[i+1]-p_segment[i], sizeof(SSortElement), Cmp);
}
//printf("sorting from %08X to %08X\n", p_segment[i], p_element);
qsort(p_segment[i], p_element-p_segment[i], sizeof(SSortElement), Cmp);
}
// reorder the dma list according to the sorted array
for (p_element=p_array; p_element<p_array+num_elements; p_element++)
{
p_tag = p_element->address;
do
{
p_prev_tag = p_tag;
p_tag = NextTag(p_tag, false);
ID = (eTag)(*(uint32 *)p_tag >> 28);
} while (ID!=cnt && ID!=end);
((uint8 **)p_prev_tag)[1] = (p_element+1)->address;
}
// patch up the final dma::next tag to point to the dma::end tag
((uint8 **)p_prev_tag)[1] = p_end_tag;
// chain through the whole list to adjust vu1 base pointers
vu1::Loc = 0;
for (p_tag = p_array->address; *(uint32 *)p_tag>>28 != end; p_tag = NextTag(p_tag, false))
{
if (*(uint32 *)p_tag & 0x3FF0000)
{
((uint16 *)p_tag)[4] &= ~0x3FF;
((uint16 *)p_tag)[4] |= vu1::Loc;
vu1::Loc += ((uint16 *)p_tag)[1];
}
}
// return the address of the head tag
return p_array->address;
}
#endif
#endif
void dma::BeginList(void *pGroup)
{
#ifdef __PLAT_NGPS__
// assume group isn't used
((sGroup *)pGroup)->Used[render::Field] = false;
// set the dma list pointer
((sGroup *)pGroup)->pRender[render::Field] = pLoc;
// VIF1 and VU1 setup
BeginTag(cnt, 0xFF000000); // bit of a cheat, so it will stay at the start of any sorted list
vif::FLUSH();
vif::STMASK(0);
vif::STMOD(0);
vif::STCYCL(1,1);
vif::BASE(0);
vif::OFFSET(0);
vif::MSCAL(VU1_ADDR(Setup));
EndTag();
dma::Tag(dma::next, 0, 0);
vif::NOP();
vif::NOP();
((sGroup *)pGroup)->vu1_loc = 0;
((sGroup *)pGroup)->p_tag = pTag;
#endif
}
void dma::EndList(void *pGroup)
{
SetList(pGroup);
// end dma list for this group
BeginTag(end, 0);
#if USE_INTERRUPTS
//vif::BASE(((sGroup *)pGroup)->vu1_loc);
vif::BASE(vu1::Loc);
vif::OFFSET(0);
vu1::Loc = 0; // must do this as a relative prim for a sortable list...
gs::BeginPrim(REL,0,0);
gs::Reg1(gs::SIGNAL, PackSIGNAL(1,1)); // signal the end of rendering this group
gs::EndPrim(1);
vif::MSCAL(VU1_ADDR(Parser));
#endif
EndTag();
((uint16 *)pTag)[1] |= vu1::Loc & 0x3FF; // must write some code for doing this automatically
}
void dma::ReallySetList(void *pGroup)
{
// finish with the previous dma context
if (sp_group)
{
// ensure the last tag was a 'next'...
// get the tag ID
uint ID = *(uint32 *)pTag>>28;
// take care of 'refe' and 'refs'
Dbg_MsgAssert(ID!=refe && ID!=refs, ("refe and refs not supported in dma tag!"));
// take care of 'call' and 'ref'
if (ID==call || ID==ref)
{
Tag(next, 0, 0);
vif::NOP();
vif::NOP();
}
// take care of 'cnt'
else if (ID==cnt)
{
pTag[3] = next<<4;
}
// 'end' and 'ret' won't have anything after them in the same context
// and 'next' is fine as it is
// save the vu1 location and dma tag location
((sGroup *)sp_group)->vu1_loc = vu1::Loc;
((sGroup *)sp_group)->p_tag = pTag;
}
// change bucket
sp_group = pGroup;
// set up the new bucket
if (pGroup)
{
// restore the vu1 location and dma tag location
vu1::Loc = ((sGroup *)pGroup)->vu1_loc;
pTag = ((sGroup *)pGroup)->p_tag;
// patch the pointer of the dangling 'next' tag
((uint32 *)pTag)[1] = (uint32)pLoc;
}
}
int dma::GetDmaSize(uint8 *pTag)
{
return (*(uint16 *)pTag + 1) << 4; // (QWC+1)*16 bytes
}
int dma::GetNumVertices(uint8 *pTag)
{
// get start and end of dma packet
uint8 *p_start = pTag + 8;
uint8 *p_end = pTag + 16 + (*(uint16 *)pTag << 4);
// parse vifcodes
uint8 *p_code = p_start;
int num_verts = 0;
do
{
if (((p_code[3] & 0x7F) == 0x05) && (p_code[0] == 1)) // look for STMOD(1)
{
p_code = vif::NextCode(p_code);
Dbg_MsgAssert((p_code[3] & 0x7E)==0x6C, ("0x%08X: expected UNPACK V4_16 or V4_32", *(uint32 *)p_code));
num_verts += p_code[2];
}
p_code=vif::NextCode(p_code);
}
while (p_code < p_end);
return num_verts;
}
int dma::GetNumTris(uint8 *pTag)
{
// get start and end of dma packet
uint8 *p_start = pTag + 8;
uint8 *p_end = pTag + 16 + (*(uint16 *)pTag << 4);
// parse vifcodes
uint8 *p_code = p_start;
int num_tris = 0;
int num_verts;
do
{
if (((p_code[3] & 0x7F) == 0x05) && (p_code[0] == 1)) // look for STMOD(1)
{
p_code = vif::NextCode(p_code);
Dbg_MsgAssert((p_code[3] & 0x7E)==0x6C, ("0x%08X: expected UNPACK V4_16 or V4_32", *(uint32 *)p_code));
num_verts = p_code[2];
// loop over verts, counting the adc bits which are zero
if (p_code[3] & 0x01) // V4_16
{
uint16 *p_adc = ((uint16 *)p_code)+5;
for (int i=0; i<num_verts; i++,p_adc+=4)
{
if ((*p_adc & 0x8000) == 0)
{
num_tris++;
}
}
}
else // V4_32
{
uint32 *p_adc = ((uint32 *)p_code)+4;
for (int i=0; i<num_verts; i++,p_adc+=4)
{
if ((*p_adc & 0x00008000) == 0)
{
num_tris++;
}
}
}
}
p_code=vif::NextCode(p_code);
}
while (p_code < p_end);
return num_tris;
}
void dma::Copy(uint8 *pTag, uint8 *pDest)
{
memcpy(pDest, pTag, (*(uint16 *)pTag + 1) << 4);
}
void dma::TransferValues(uint8 *pTag, uint8 *pArray, int size, int dir, uint32 vifcodeMask, uint32 vifcodePattern)
{
// get start and end of dma packet
uint8 *p_start = pTag + 8;
uint8 *p_end = pTag + 16 + (*(uint16 *)pTag << 4);
// parse vifcodes
uint8 *p_code = p_start;
uint32 *pSource, *pDest;
int i, num_words;
*(dir ? &pSource : &pDest) = (uint32 *)pArray;
do
{
if ((*(uint32 *)p_code & vifcodeMask) == vifcodePattern)
{
*(!dir ? &pSource : &pDest) = (uint32 *)(p_code+4);
num_words = (p_code[2] * size) >> 2;
for (i=0; i<num_words; i++)
{
*pDest++ = *pSource++;
}
}
p_code=vif::NextCode(p_code);
}
while (p_code < p_end);
}
void dma::TransferColours(uint8 *pTag, uint8 *pArray, int dir)
{
// get start and end of dma packet
uint8 *p_start = pTag + 8;
uint8 *p_end = pTag + 16 + (*(uint16 *)pTag << 4);
// parse vifcodes
uint8 *p_code = p_start;
uint8 *pSource, *pDest;
int i, num_words;
*(dir ? &pSource : &pDest) = (uint8 *)pArray;
do
{
if ((*(uint32 *)p_code & 0x7B000000) == 0x6A000000)
{
if ((*(uint32 *)p_code & 0x7F000000) == 0x6E000000)
{
TransferValues(pTag, pArray, 4, dir, 0x7F000000, 0x6E000000);
return;
}
*(!dir ? &pSource : &pDest) = p_code+4;
num_words = p_code[2];
for (i=0; i<num_words; i++)
{
*pDest++ = *pSource++;
*pDest++ = *pSource++;
*pDest++ = *pSource++;
(*(dir ? &pSource : &pDest))++;
}
}
p_code=vif::NextCode(p_code);
}
while (p_code < p_end);
}
int dma::GetBitLengthXYZ(uint8 *pTag)
{
// get start and end of dma packet
uint8 *p_start = pTag + 8;
uint8 *p_end = pTag + 16 + (*(uint16 *)pTag << 4);
// parse vifcodes
uint8 *p_code = p_start;
int bit_length = 0;
do
{
if (((p_code[3] & 0x7F) == 0x05) && (p_code[0] == 1)) // look for STMOD(1)
{
p_code = vif::NextCode(p_code);
Dbg_MsgAssert((p_code[3] & 0x7E)==0x6C, ("0x%08X: expected UNPACK V4_16 or V4_32", *(uint32 *)p_code));
bit_length = 32 >> (p_code[3] & 0x03);
break;
}
p_code=vif::NextCode(p_code);
}
while (p_code < p_end);
return bit_length;
}
void dma::ExtractXYZs(uint8 *pTag, uint8 *pArray)
{
// get start and end of dma packet
uint8 *p_start = pTag + 8;
uint8 *p_end = pTag + 16 + (*(uint16 *)pTag << 4);
// parse vifcodes
uint8 *p_code = p_start;
int i, num_words;
sint32 *p_dest = (sint32 *)pArray;
do
{
if ((*(uint32 *)p_code & 0x7F000001) == 0x05000001)
{
p_code = vif::NextCode(p_code);
Dbg_MsgAssert((p_code[3] & 0x7E)==0x6C, ("0x%08X: expected UNPACK V4_16 or V4_32", *(uint32 *)p_code));
num_words = (int)((((uint32)p_code[2]-1)&0xFF)+1) << 2;
if ((p_code[3] & 0x7F) == 0x6C)
{
// 32 bit
sint32 *p_source = (sint32 *)(p_code+4);
for (i=0; i<num_words; i++)
{
*p_dest++ = *p_source++;
}
}
else
{
// 16 bit
sint16 *p_source = (sint16 *)(p_code+4);
for (i=0; i<num_words; i++)
{
*p_dest++ = (sint32)*p_source++;
}
}
}
p_code=vif::NextCode(p_code);
}
while (p_code < p_end);
}
void dma::ReplaceXYZs(uint8 *pTag, uint8 *pArray, bool skipW)
{
//printf("Replacing XYZs...\n");
// get start and end of dma packet
uint8 *p_start = pTag + 8;
uint8 *p_end = pTag + 16 + (*(uint16 *)pTag << 4);
// parse vifcodes
uint8 *p_code = p_start;
int i, num_words;
sint32 *p_source = (sint32 *)pArray;
do
{
if ((*(uint32 *)p_code & 0x7F000001) == 0x05000001)
{
p_code = vif::NextCode(p_code);
Dbg_MsgAssert((p_code[3] & 0x7E)==0x6C, ("0x%08X: expected UNPACK V4_16 or V4_32", *(uint32 *)p_code));
num_words = (int)((((uint32)p_code[2]-1)&0xFF)+1) << 2;
if ((p_code[3] & 0x7F) == 0x6C)
{
// 32 bit
sint32 *p_dest = (sint32 *)(p_code+4);
if (skipW)
{
for (i=0; i<num_words; i++, p_dest++, p_source++)
{
if ((i & 3) == W)
{
continue;
}
*p_dest = *p_source;
}
} else {
for (i=0; i<num_words; i++)
{
*p_dest++ = *p_source++;
}
}
}
else
{
// 16 bit
sint16 *p_dest = (sint16 *)(p_code+4);
if (skipW)
{
for (i=0; i<num_words; i++, p_dest++, p_source++)
{
if ((i & 3) == W)
{
continue;
}
*p_dest = (sint32)*p_source;
}
} else {
for (i=0; i<num_words; i++)
{
*p_dest++ = (sint32)*p_source++;
}
}
}
}
p_code=vif::NextCode(p_code);
}
while (p_code < p_end);
}
void dma::ExtractRGBAs(uint8 *pTag, uint8 *pArray)
{
//TransferValues(pTag, pArray, 4, 0, 0x7F000000, 0x6E000000);
TransferColours(pTag, pArray, 0);
}
void dma::ReplaceRGBAs(uint8 *pTag, uint8 *pArray)
{
//TransferValues(pTag, pArray, 4, 1, 0x7F000000, 0x6E000000);
TransferColours(pTag, pArray, 1);
}
void dma::ExtractSTs(uint8 *pTag, uint8 *pArray)
{
// get start and end of dma packet
uint8 *p_start = pTag + 8;
uint8 *p_end = pTag + 16 + (*(uint16 *)pTag << 4);
// parse vifcodes
uint8 *p_code = p_start;
int i, num_words;
sint32 *p_dest = (sint32 *)pArray;
do
{
if ((*(uint32 *)p_code & 0x7E000000) == 0x64000000)
{
num_words = (int)((((uint32)p_code[2]-1)&0xFF)+1) << 1;
if ((p_code[3] & 0x7F) == 0x64)
{
// 32-bit
sint32 *p_source = (sint32 *)(p_code+4);
for (i=0; i<num_words; i++)
{
*p_dest++ = *p_source++;
}
}
else
{
// 16-bit
sint16 *p_source = (sint16 *)(p_code+4);
for (i=0; i<num_words; i++)
{
*p_dest++ = *p_source++;
}
}
}
p_code=vif::NextCode(p_code);
}
while (p_code < p_end);
}
void dma::ReplaceSTs(uint8 *pTag, uint8 *pArray)
{
// get start and end of dma packet
uint8 *p_start = pTag + 8;
uint8 *p_end = pTag + 16 + (*(uint16 *)pTag << 4);
// parse vifcodes
uint8 *p_code = p_start;
int i, num_words;
sint32 *p_source = (sint32 *)pArray;
do
{
if ((*(uint32 *)p_code & 0x7E000000) == 0x64000000)
{
num_words = (int)((((uint32)p_code[2]-1)&0xFF)+1) << 1;
if ((p_code[3] & 0x7F) == 0x64)
{
// 32-bit
sint32 *p_dest = (sint32 *)(p_code+4);
for (i=0; i<num_words; i++)
{
*p_dest++ = *p_source++;
}
}
else
{
// 16-bit
sint16 *p_dest = (sint16 *)(p_code+4);
for (i=0; i<num_words; i++)
{
*p_dest++ = *p_source++;
}
}
}
p_code=vif::NextCode(p_code);
}
while (p_code < p_end);
}
void dma::TransformSTs(uint8 *pTag, const Mth::Matrix &mat)
{
// get start and end of dma packet
uint8 *p_start = pTag + 8;
uint8 *p_end = pTag + 16 + (*(uint16 *)pTag << 4);
// parse vifcodes
uint8 *p_code = p_start;
int i, num_verts;
Mth::Vector texcoords;
do
{
if ((p_code[3] & 0x6E) == 0x64)
{
num_verts = (int)((((uint32)p_code[2]-1)&0xFF)+1);
if ((p_code[3] & 0x6F) == 0x64)
{
// 32-bit float st's
float *p_coords = (float *)(p_code+4);
for (i=0; i<num_verts; i++)
{
texcoords[0] = p_coords[0];
texcoords[1] = p_coords[1];
texcoords[2] = 0.0f;
texcoords[3] = 1.0f;
texcoords *= mat;
*p_coords++ = texcoords[0];
*p_coords++ = texcoords[1];
//printf("(%g, %g)\n", texcoords[0], texcoords[1]);
}
}
else
{
// 16-bit fixed uv's
sint16 *p_coords = (sint16 *)(p_code+4);
for (i=0; i<num_verts; i++)
{
texcoords[0] = (float)(p_coords[0]-0x2000);
texcoords[1] = (float)(p_coords[1]-0x2000);
texcoords[2] = 0.0f;
texcoords[3] = 1024.0f; // this should really be "texture width * 16",
// but we don't have access to texture width here
texcoords *= mat;
*p_coords++ = (sint16)texcoords[0] + 0x2000;
*p_coords++ = (sint16)texcoords[1] + 0x2000;
//printf("(%g, %g)\n", texcoords[0], texcoords[1]);
}
}
}
p_code=vif::NextCode(p_code);
}
while (p_code < p_end);
}
void dma::SqueezeADC(uint8 *pTag)
{
uint8 *p_code, *p_end, *p_unpack[5];
uint16 addr, vumem_size;
uint32 nloop=0, nreg=0, texcrds_size, *p_giftag=NULL, i, num_squeezed, addr_diff;
uint32 *p_texcrds_source, *p_texcrds_dest;
uint8 *p_weights_source, *p_weights_dest;
uint16 *p_normal_source, *p_normal_dest;
uint32 *p_colour_source, *p_colour_dest;
uint32 *p_coords_source, *p_coords_dest;
int unpack_num;
bool seenMSCAL;
// get start and end of vifcode packet
p_code = pTag + 8;
p_end = pTag + 16 + (*(uint16 *)pTag << 4);
// initialise state
addr_diff = 0;
unpack_num = -1;
seenMSCAL = true;
// parse vifcodes
while (p_code < p_end)
{
if ((p_code[3] & 0x60) == 0x60) // if it's an unpack
{
// adjust ADDR field
addr = *(uint16 *)p_code;
addr -= addr_diff;
addr &= 0x3FF;
*(uint16 *)p_code = *(uint16 *)p_code & 0xFC00 | addr;
// if we have a VU1_ADR(Jump) in a giftag, reset offset
if (((uint16 *)p_code)[1]==0x6C01 && ((uint32 *)p_code)[1]==0x00008000 && ((uint32 *)p_code)[2]==0x00000052)
addr_diff = 0;
// look for vertex packets
if (vif::WL==1 && vif::CL>1 && seenMSCAL)
{
// look for giftag
if (unpack_num==-1)
{
p_giftag = (uint32 *)p_code + 1;
nloop = p_giftag[0] & 0x7FFF;
nreg = p_giftag[1] >> 28;
}
// look for vertex elements
else
{
p_unpack[unpack_num] = p_code;
}
// next element
unpack_num++;
}
}
else
{
if ((p_code[3] & 0x7F) == 0x14) // mscal
{
seenMSCAL = true;
}
}
// step to next vifcode
p_code = vif::NextCode(p_code);
// have we found all 5 unpacks of a vertex packet?
if (unpack_num==5)
{
// perform compression...
// set element pointers
p_texcrds_source = p_texcrds_dest = (uint32 *)(p_unpack[0]+4);
p_weights_source = p_weights_dest = (uint8 *)(p_unpack[1]+4);
p_normal_source = p_normal_dest = (uint16 *)(p_unpack[2]+4);
p_colour_source = p_colour_dest = (uint32 *)(p_unpack[3]+4);
p_coords_source = p_coords_dest = (uint32 *)(p_unpack[4]+4);
// set datasize for tex coords
texcrds_size = ((p_unpack[0][3] & 0x07) == 0x04) ? 2 : 1;
// loop over source vertices
num_squeezed = 0;
for (i=0; i<nloop; i++)
{
// skip if vertex is redundant
if ( (i<=nloop-3) && (p_coords_source[1] & 0x80000000)
&& (p_coords_source[3] & 0x80000000)
&& (p_coords_source[5] & 0x80000000)
|| (i==nloop-2) && (p_coords_source[1] & 0x80000000)
&& (p_coords_source[3] & 0x80000000)
|| (i==nloop-1) && (p_coords_source[1] & 0x80000000))
{
p_texcrds_source += texcrds_size;
p_weights_source += 3;
p_normal_source += 3;
p_colour_source += 1;
p_coords_source += 2;
num_squeezed++;
}
else if (p_coords_source != p_coords_dest)
// copy vertex
{
*p_texcrds_dest++ = *p_texcrds_source++;
if (texcrds_size==2)
*p_texcrds_dest++ = *p_texcrds_source++;
*p_weights_dest++ = *p_weights_source++;
*p_weights_dest++ = *p_weights_source++;
*p_weights_dest++ = *p_weights_source++;
*p_normal_dest++ = *p_normal_source++;
*p_normal_dest++ = *p_normal_source++;
*p_normal_dest++ = *p_normal_source++;
*p_colour_dest++ = *p_colour_source++;
*p_coords_dest++ = *p_coords_source++;
*p_coords_dest++ = *p_coords_source++;
}
else
// just inc pointers
{
p_texcrds_source += texcrds_size;
p_weights_source += 3;
p_normal_source += 3;
p_colour_source += 1;
p_coords_source += 2;
p_texcrds_dest += texcrds_size;
p_weights_dest += 3;
p_normal_dest += 3;
p_colour_dest += 1;
p_coords_dest += 2;
}
}
// reduce nloop
nloop -= num_squeezed;
// make sure there are at least 2 vertices left
if (nloop < 2)
{
num_squeezed -= 2-nloop;
p_texcrds_dest += (2-nloop) * texcrds_size;
p_weights_dest += (2-nloop) * 3;
p_normal_dest += (2-nloop) * 3;
p_colour_dest += (2-nloop) * 1;
p_coords_dest += (2-nloop) * 2;
nloop = 2;
}
// adjust SIZE fields in unpacks
for (i=0; i<5; i++)
{
p_unpack[i][2] = nloop;
}
// pad the dead space after each unpack with vif NOPs
for (i=0; i<num_squeezed; i++)
{
*p_texcrds_dest++ = 0;
if (texcrds_size==2)
*p_texcrds_dest++ = 0;
*p_weights_dest++ = 0;
*p_weights_dest++ = 0;
*p_weights_dest++ = 0;
*p_normal_dest++ = 0;
*p_normal_dest++ = 0;
*p_normal_dest++ = 0;
*p_colour_dest++ = 0;
*p_coords_dest++ = 0;
*p_coords_dest++ = 0;
}
// adjust NLOOP and SIZE in giftag
p_giftag[0] = p_giftag[0] & 0xFFFF8000 | nloop;
p_giftag[3] = nloop*nreg;
// accumulate savings
addr_diff += num_squeezed * 5;
// reset for next vertex packet
unpack_num = -1;
seenMSCAL = false;
}
}
// adjust vu-mem size in dma tag
vumem_size = ((uint16 *)pTag)[1];
vumem_size -= addr_diff;
vumem_size &= 0x3FF;
((uint16 *)pTag)[1] = ((uint16 *)pTag)[1] & 0xFC00 | vumem_size;
}
void dma::SqueezeNOP(uint8 *pTag)
{
uint8 *p_code, *p_end;
uint32 *p_source, *p_dest;
// get start and end of vifcode packet
p_code = pTag + 8;
p_end = pTag + 16 + (*(uint16 *)pTag << 4);
// setup
p_source = p_dest = (uint32 *)p_code;
// parse vifcodes
while (p_code < p_end)
{
if (p_code[3] == 0x00) // NOP
{
p_code = vif::NextCode(p_code);
p_source++;
}
else
{
p_code = vif::NextCode(p_code);
// copy memory
while (p_source < (uint32 *)p_code)
{
*p_dest++ = *p_source++;
}
}
}
// pad to qword boundary with nops
while ((uint)p_dest & 0xF)
*p_dest++ = 0;
// adjust dma size in dma tag
((uint16 *)pTag)[0] = ((uint8 *)p_dest - pTag - 16) >> 4;
}
//---------------------------------
// S T A T I C D A T A
//---------------------------------
uint8 * dma::pBase; // base of dynamic DMA buffer for this frame
uint8 * dma::pLoc; // current position in it that we are building DMA packets
uint8 * dma::pTag;
uint8 * dma::pPrebuiltBuffer;
uint8 * dma::pDummyBuffer; // (Mick) used to simulate memory usage
uint8 * dma::pRuntimeBuffer;
uint8 * dma::pList[2];
uint64 * dma::Gosubs;
uint8 * dma::pSub;
dma::eTag dma::ID;
int dma::sp;
uint8 * dma::Stack[2];
void * dma::sp_group;
int dma::size;
} // namespace NxPs2