#include #include "dma.h" #include "vif.h" #include "vu1.h" #include "gs.h" #include "group.h" #include "render.h" #include "vu1code.h" #include "switches.h" #include "geomnode.h" /* Mick Notes: The calls the the DMA functions suggest that you are immediately executing something. You are not. You are simply building a list of DMA commands that get executed the next frame The DMA list is the primary mechanism for initiating rendering on the PS2. To render something (like a line), you "simply" need to generate the appropiate DMA packets that contain GIF packets, containing GS primitives, and then link this into the main DMA list. Things such as world sectors have pre-build DMA lists that contain commands to upload lists of vertices to VU1 micro-memory, and then upload lists of triangles, and then trigger the appropiate VU1 microcode to transform and render them. These pre-built lists are linked in each frame with the appropiate transformation matricies that are needed based on the current camera position and the current object position (if it is a moving object) Thus, each object is mostly pre-build, and so rendering it requires very little CPU time. Each frame, we build a series of dynamic DMA lists. There are two lists for each group (see group.cpp). The first list is the texture upload. This is pretty simple, just uploading a few textures. Texture uploading happens asyncrnously with rendering, so we can be executing a DMA list of rendering stuff at the same time as we are uploading the textures for the next group. A lot of the DMA code is in DMA.H, as it is small inline functions. The Runtime buffer is dynamically allocated in InitializeEngine(). Last time I looked it was 512K in size (note though, this is increased to 16MB for the debugging wireframe mode) The Runtime DMA lists are double buffered. You are executing one whilst building the next one. The pRunTimeBuffer is split in two, and pointers to the base of each half are stored in pList[0] and pList[1] The integer varaible "Field" is 0 or 1, and indicates which field (odd or even) we are rendering to. The DMA lists are build using a global variable char * pLoc. At the start of a frame, pLoc is initilized like: dma::pLoc = dma::pList[Field]; So it points to the start of one of our 256K DMA buffers (remember the other is being executed) DMA packets are now built using calls to the functions here and in DMA.H, for example: dma::Tag(dma::ref, ((uint8 *)&MPGEnd-(uint8 *)&MPGStart+15)/16, (uint)&MPGStart); vif::NOP(); vif::NOP(); (the above simply transfers the microcode from RAM to VU1 micromem) Now, it's important to note that DMA execution does NOT simply start at pList[Field] and then run all the way through a frame's worth of data. There are actually several DMA lists, which are fired off individually. Some of these (the groups) are logically connected so that the end of one group's DMA (and the GS/VU activity it triggers) causes an interrupt, which the CPU handles by starting the DMA for another group (see interrupt.cpp) After the groups, there is the "immediate mode" DMA list, triggered by pEpilogue->pRender[Field] this is simply a raw DMA list that you can put anything you want into. It starts at the value of pLoc after RenderWorld has been called (a terrible hack, as noted in the code :) All of the "immediate mode" rendering is considered to be part of pEpilogue. It is terminated by the RenderEpilogue() function, which just links in a final interrupt trigger, which will get picked up by GsHandler(), and sGroup::pRenderGroup will be set to NULL, which is what WaitForRendering() uses to determine if rendering has finished. */ namespace NxPs2 { // begin a subroutine void dma::BeginSub(eTag ID) { Align(0,16); pSub = pLoc; dma::ID = ID; vu1::Loc = vu1::Buffer = 0; } // end a subroutine and return its address uint64 dma::EndSub(void) { uint64 GosubTag; Align(0,16); GosubTag = ((uint64)pSub<<32) | ((uint64)ID<<28) | (vu1::Loc & 0x3FF) << 16; Dbg_MsgAssert(ID!=refe && ID!=refs, ("refe and refs not supported in dma tag!")); if (ID==ref) { GosubTag |= (pLoc - pSub) >> 4; } return GosubTag; } // call a subroutine (using a dma::call or a dma::ref) void dma::Gosub(uint Num, uint Path) { register uint64 Tag = Gosubs[Num]; Store64(Tag); switch (Path) { case 1: vif::BASE(vu1::Loc); vif::OFFSET(0); vu1::Loc += (Tag>>16) & 0x3FF; // VUMem size break; case 2: vif::NOP(); vif::NOP(); break; case 3: pLoc += 8; // no need to write anything since TTE=0 break; #ifdef __PLAT_NGPS__ default: printf("error: dma::Gosub() called with unrecognised path number\n"); exit(1); #endif } } // begin a 3D subroutine void dma::BeginSub3D(void) { Align(0,16); pSub = pLoc; vu1::Loc = vu1::Buffer = 0; } // end a 3D subroutine and return its address uint8 *dma::EndSub3D(void) { ((uint16 *)pSub)[1] |= vu1::Loc&0x3FF; return pSub; } // call a 3D subroutine (always using a dma::call) void dma::Gosub3D(uint8 *pSub, uint RenderFlags) { BeginTag(call,(uint)pSub); vif::BASE(vu1::Loc); vif::OFFSET(0); vif::ITOP(RenderFlags); EndTag(); vu1::Loc += ((uint16 *)pSub)[1]; } // dma list traversal function uint8 *dma::NextTag(uint8 *pTag, bool stepInto) { Dbg_MsgAssert((*(uint32 *)pTag&0x80000000)==0, ("IRQ bit set in dma tag")); Dbg_MsgAssert(*(uint32 *)pTag!=refe<<28 && *(uint32 *)pTag!=refs<<28, ("refe and refs not supported in dma tag!")); switch (*(uint32 *)pTag>>28) { case cnt: return pTag + 16 + ((uint)((uint16 *)pTag)[0] << 4); case next: return ((uint8 **)pTag)[1]; case ref: return pTag + 16; case call: Dbg_MsgAssert(sp<2, ("dma call stack overflow")); if (stepInto) { Stack[sp++] = pTag + 16 + ((uint)((uint16 *)pTag)[0] << 4); return ((uint8 **)pTag)[1]; } else { return pTag + 16 + ((uint)((uint16 *)pTag)[0] << 4); } case ret: Dbg_MsgAssert(sp>0, ("dma call stack underflow")); return Stack[--sp]; case end: default: return NULL; } } #ifdef __PLAT_NGPS__ // auxilliary comparison function for dma sort int dma::Cmp(const void *p1, const void *p2) { return ((SSortElement *)p1)->z < ((SSortElement *)p2)->z ? -1 : ((SSortElement *)p1)->z > ((SSortElement *)p2)->z ? +1 : 0 ; } #if 0 // original version // sort a dma list of mesh packets on z // the z is stored in the unused ADDR word of the first cnt tag of the mesh packet uint8 *dma::SortGroup(uint8 *pList) { int num_elements; SSortElement *p_element; uint8 *p_tag, *p_end_tag, *p_prev_tag=NULL; eTag ID; // set array base at start of scratchpad SSortElement *p_array = (SSortElement *)0x70000000; // copy the addresses and z-values into array p_element = p_array; p_tag = pList; sp = 0; // starting at top level of dma list while ((ID = (eTag)(*(uint32 *)p_tag>>28)) != end) { if (ID == cnt) { p_element->address = p_tag; p_element->z = ((float *)p_tag)[1]; p_element++; } p_tag = NextTag(p_tag, false); } // check array fits within scratchpad num_elements = p_element-p_array; Dbg_MsgAssert(num_elements*sizeof(SSortElement)<=16384, ("Can't fit array in scratchpad")); // record address of end tag p_end_tag = p_tag; // sort the array qsort(p_array, num_elements, sizeof(SSortElement), Cmp); // reorder the dma list according to the sorted array for (p_element=p_array; p_elementaddress; do { p_prev_tag = p_tag; p_tag = NextTag(p_tag, false); ID = (eTag)(*(uint32 *)p_tag >> 28); } while (ID!=cnt && ID!=end); ((uint8 **)p_prev_tag)[1] = (p_element+1)->address; } // patch up the final dma::next tag to point to the dma::end tag ((uint8 **)p_prev_tag)[1] = p_end_tag; // chain through the whole list to adjust vu1 base pointers vu1::Loc = 0; for (p_tag = p_array->address; *(uint32 *)p_tag>>28 != end; p_tag = NextTag(p_tag, false)) { //if (*(uint32 *)p_tag & 0x03FF0000) if (((uint8 *)p_tag)[11] == 0x03) { ((uint16 *)p_tag)[4] &= ~0x3FF; ((uint16 *)p_tag)[4] |= vu1::Loc; vu1::Loc += ((uint16 *)p_tag)[1]; } } // return the address of the head tag return p_array->address; } #else // new version: // the list is sorted in segments corresponding to viewports // sort a dma list of mesh packets on z // the z is stored in the unused ADDR word of the first cnt tag of the mesh packet uint8 *dma::SortGroup(uint8 *pList) { int num_elements, i, num_segments; SSortElement *p_element, *p_segment[4]; uint8 *p_tag, *p_end_tag, *p_prev_tag=NULL; eTag ID; // set array base at start of scratchpad SSortElement *p_array = (SSortElement *)0x70000000; // copy the addresses and z-values into array p_element = p_array; p_tag = pList; sp = 0; // starting at top level of dma list num_segments = 0; while ((ID = (eTag)(*(uint32 *)p_tag>>28)) != end) { for (i=0; iaddress = p_tag; p_element->z = ((float *)p_tag)[1]; p_element++; } p_tag = NextTag(p_tag, false); } // check array fits within scratchpad num_elements = p_element-p_array; Dbg_MsgAssert(num_elements*sizeof(SSortElement)<=16384, ("Can't fit array in scratchpad")); // record address of end tag p_end_tag = p_tag; // sort the array in segments if (num_segments) { for (i=0; iaddress; do { p_prev_tag = p_tag; p_tag = NextTag(p_tag, false); ID = (eTag)(*(uint32 *)p_tag >> 28); } while (ID!=cnt && ID!=end); ((uint8 **)p_prev_tag)[1] = (p_element+1)->address; } // patch up the final dma::next tag to point to the dma::end tag ((uint8 **)p_prev_tag)[1] = p_end_tag; // chain through the whole list to adjust vu1 base pointers vu1::Loc = 0; for (p_tag = p_array->address; *(uint32 *)p_tag>>28 != end; p_tag = NextTag(p_tag, false)) { if (*(uint32 *)p_tag & 0x3FF0000) { ((uint16 *)p_tag)[4] &= ~0x3FF; ((uint16 *)p_tag)[4] |= vu1::Loc; vu1::Loc += ((uint16 *)p_tag)[1]; } } // return the address of the head tag return p_array->address; } #endif #endif void dma::BeginList(void *pGroup) { #ifdef __PLAT_NGPS__ // assume group isn't used ((sGroup *)pGroup)->Used[render::Field] = false; // set the dma list pointer ((sGroup *)pGroup)->pRender[render::Field] = pLoc; // VIF1 and VU1 setup BeginTag(cnt, 0xFF000000); // bit of a cheat, so it will stay at the start of any sorted list vif::FLUSH(); vif::STMASK(0); vif::STMOD(0); vif::STCYCL(1,1); vif::BASE(0); vif::OFFSET(0); vif::MSCAL(VU1_ADDR(Setup)); EndTag(); dma::Tag(dma::next, 0, 0); vif::NOP(); vif::NOP(); ((sGroup *)pGroup)->vu1_loc = 0; ((sGroup *)pGroup)->p_tag = pTag; #endif } void dma::EndList(void *pGroup) { SetList(pGroup); // end dma list for this group BeginTag(end, 0); #if USE_INTERRUPTS //vif::BASE(((sGroup *)pGroup)->vu1_loc); vif::BASE(vu1::Loc); vif::OFFSET(0); vu1::Loc = 0; // must do this as a relative prim for a sortable list... gs::BeginPrim(REL,0,0); gs::Reg1(gs::SIGNAL, PackSIGNAL(1,1)); // signal the end of rendering this group gs::EndPrim(1); vif::MSCAL(VU1_ADDR(Parser)); #endif EndTag(); ((uint16 *)pTag)[1] |= vu1::Loc & 0x3FF; // must write some code for doing this automatically } void dma::ReallySetList(void *pGroup) { // finish with the previous dma context if (sp_group) { // ensure the last tag was a 'next'... // get the tag ID uint ID = *(uint32 *)pTag>>28; // take care of 'refe' and 'refs' Dbg_MsgAssert(ID!=refe && ID!=refs, ("refe and refs not supported in dma tag!")); // take care of 'call' and 'ref' if (ID==call || ID==ref) { Tag(next, 0, 0); vif::NOP(); vif::NOP(); } // take care of 'cnt' else if (ID==cnt) { pTag[3] = next<<4; } // 'end' and 'ret' won't have anything after them in the same context // and 'next' is fine as it is // save the vu1 location and dma tag location ((sGroup *)sp_group)->vu1_loc = vu1::Loc; ((sGroup *)sp_group)->p_tag = pTag; } // change bucket sp_group = pGroup; // set up the new bucket if (pGroup) { // restore the vu1 location and dma tag location vu1::Loc = ((sGroup *)pGroup)->vu1_loc; pTag = ((sGroup *)pGroup)->p_tag; // patch the pointer of the dangling 'next' tag ((uint32 *)pTag)[1] = (uint32)pLoc; } } int dma::GetDmaSize(uint8 *pTag) { return (*(uint16 *)pTag + 1) << 4; // (QWC+1)*16 bytes } int dma::GetNumVertices(uint8 *pTag) { // get start and end of dma packet uint8 *p_start = pTag + 8; uint8 *p_end = pTag + 16 + (*(uint16 *)pTag << 4); // parse vifcodes uint8 *p_code = p_start; int num_verts = 0; do { if (((p_code[3] & 0x7F) == 0x05) && (p_code[0] == 1)) // look for STMOD(1) { p_code = vif::NextCode(p_code); Dbg_MsgAssert((p_code[3] & 0x7E)==0x6C, ("0x%08X: expected UNPACK V4_16 or V4_32", *(uint32 *)p_code)); num_verts += p_code[2]; } p_code=vif::NextCode(p_code); } while (p_code < p_end); return num_verts; } int dma::GetNumTris(uint8 *pTag) { // get start and end of dma packet uint8 *p_start = pTag + 8; uint8 *p_end = pTag + 16 + (*(uint16 *)pTag << 4); // parse vifcodes uint8 *p_code = p_start; int num_tris = 0; int num_verts; do { if (((p_code[3] & 0x7F) == 0x05) && (p_code[0] == 1)) // look for STMOD(1) { p_code = vif::NextCode(p_code); Dbg_MsgAssert((p_code[3] & 0x7E)==0x6C, ("0x%08X: expected UNPACK V4_16 or V4_32", *(uint32 *)p_code)); num_verts = p_code[2]; // loop over verts, counting the adc bits which are zero if (p_code[3] & 0x01) // V4_16 { uint16 *p_adc = ((uint16 *)p_code)+5; for (int i=0; i> 2; for (i=0; i> (p_code[3] & 0x03); break; } p_code=vif::NextCode(p_code); } while (p_code < p_end); return bit_length; } void dma::ExtractXYZs(uint8 *pTag, uint8 *pArray) { // get start and end of dma packet uint8 *p_start = pTag + 8; uint8 *p_end = pTag + 16 + (*(uint16 *)pTag << 4); // parse vifcodes uint8 *p_code = p_start; int i, num_words; sint32 *p_dest = (sint32 *)pArray; do { if ((*(uint32 *)p_code & 0x7F000001) == 0x05000001) { p_code = vif::NextCode(p_code); Dbg_MsgAssert((p_code[3] & 0x7E)==0x6C, ("0x%08X: expected UNPACK V4_16 or V4_32", *(uint32 *)p_code)); num_words = (int)((((uint32)p_code[2]-1)&0xFF)+1) << 2; if ((p_code[3] & 0x7F) == 0x6C) { // 32 bit sint32 *p_source = (sint32 *)(p_code+4); for (i=0; i1 && seenMSCAL) { // look for giftag if (unpack_num==-1) { p_giftag = (uint32 *)p_code + 1; nloop = p_giftag[0] & 0x7FFF; nreg = p_giftag[1] >> 28; } // look for vertex elements else { p_unpack[unpack_num] = p_code; } // next element unpack_num++; } } else { if ((p_code[3] & 0x7F) == 0x14) // mscal { seenMSCAL = true; } } // step to next vifcode p_code = vif::NextCode(p_code); // have we found all 5 unpacks of a vertex packet? if (unpack_num==5) { // perform compression... // set element pointers p_texcrds_source = p_texcrds_dest = (uint32 *)(p_unpack[0]+4); p_weights_source = p_weights_dest = (uint8 *)(p_unpack[1]+4); p_normal_source = p_normal_dest = (uint16 *)(p_unpack[2]+4); p_colour_source = p_colour_dest = (uint32 *)(p_unpack[3]+4); p_coords_source = p_coords_dest = (uint32 *)(p_unpack[4]+4); // set datasize for tex coords texcrds_size = ((p_unpack[0][3] & 0x07) == 0x04) ? 2 : 1; // loop over source vertices num_squeezed = 0; for (i=0; i> 4; } //--------------------------------- // S T A T I C D A T A //--------------------------------- uint8 * dma::pBase; // base of dynamic DMA buffer for this frame uint8 * dma::pLoc; // current position in it that we are building DMA packets uint8 * dma::pTag; uint8 * dma::pPrebuiltBuffer; uint8 * dma::pDummyBuffer; // (Mick) used to simulate memory usage uint8 * dma::pRuntimeBuffer; uint8 * dma::pList[2]; uint64 * dma::Gosubs; uint8 * dma::pSub; dma::eTag dma::ID; int dma::sp; uint8 * dma::Stack[2]; void * dma::sp_group; int dma::size; } // namespace NxPs2