* Major endianness fixes on sky code today. The milestone sample and existing

PKE tests run identically on SPARC/Solaris and x86/Linux.

	* sky-pke.c (pke_io_{read,write}_buffer): Endianness fixes aka
 	"E-fixes" in register and FIFO read/writes.
	(pke_code_{pkemscalf,pkemscal}): E-fixes in VU CIA setting.
	(pke_code_{mpg,unpack}): E-fixes in VU memory & tracking updates.
	(pke_code_direct): E-fixes in GPUIF FIFO stuffing.

	* sky-pke.h (PKE_MEM_WRITE): E-fixes in trace file writing.

	* sky-vu0.c (vu0_attach): Allocate micro/data memory with zalloc
 	to guarantee sufficient (16-byte) alignment.

	* sky-vu1.c (vu1_attach): Ditto.
	(vu1_io_read_register_window): *PARTIAL* E-fixes in register accesses.

	* sky-libvpe.c (gif_write): E-fixes in GPUIF FIFO stuffing.

	* sky-gpuif.c (gif_io_{read,write}_buffer): E-fixes in
 	register and FIFO read/writes.

	* sky-dma.c (do_dma_transfer_tag): E-fixes in tag reading.
This commit is contained in:
Frank Ch. Eigler 1998-02-27 21:52:40 +00:00
parent 01e5c2f36c
commit f0bb94cd67
4 changed files with 64 additions and 47 deletions

View file

@ -253,7 +253,7 @@ pke_io_read_buffer(device *me_,
case PKE_REG_C1: case PKE_REG_C1:
case PKE_REG_C2: case PKE_REG_C2:
case PKE_REG_C3: case PKE_REG_C3:
result[0] = me->regs[reg_num][0]; result[0] = H2T_4(me->regs[reg_num][0]);
break; break;
/* handle common case of write-only registers */ /* handle common case of write-only registers */
@ -337,6 +337,10 @@ pke_io_write_buffer(device *me_,
/* write user-given bytes into input */ /* write user-given bytes into input */
memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes); memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes);
/* make words host-endian */
input[0] = T2H_4(input[0]);
/* we may ignore other words */
/* handle writes to individual registers; clear `writeable' on error */ /* handle writes to individual registers; clear `writeable' on error */
switch(reg_num) switch(reg_num)
{ {
@ -443,7 +447,7 @@ pke_io_write_buffer(device *me_,
unsigned_4 dma_tag_present = 0; unsigned_4 dma_tag_present = 0;
int i; int i;
/* collect potentially-partial quadword in write buffer */ /* collect potentially-partial quadword in write buffer; LE byte order */
memcpy(((unsigned_1*)& me->fifo_qw_in_progress) + fifo_byte, src, nr_bytes); memcpy(((unsigned_1*)& me->fifo_qw_in_progress) + fifo_byte, src, nr_bytes);
/* mark bytes written */ /* mark bytes written */
for(i = fifo_byte; i < fifo_byte + nr_bytes; i++) for(i = fifo_byte; i < fifo_byte + nr_bytes; i++)
@ -475,16 +479,18 @@ pke_io_write_buffer(device *me_,
me->fifo_buffer_size = new_fifo_buffer_size; me->fifo_buffer_size = new_fifo_buffer_size;
} }
/* add new quadword at end of FIFO */ /* add new quadword at end of FIFO; store data in host-endian */
fqw = & me->fifo[me->fifo_num_elements]; fqw = & me->fifo[me->fifo_num_elements];
fqw->word_class[0] = fqw->word_class[1] = fqw->word_class[0] = fqw->word_class[1] =
fqw->word_class[2] = fqw->word_class[3] = wc_unknown; fqw->word_class[2] = fqw->word_class[3] = wc_unknown;
memcpy((void*) fqw->data, me->fifo_qw_in_progress, sizeof(quadword)); fqw->data[0] = T2H_4(me->fifo_qw_in_progress[0]);
fqw->data[1] = T2H_4(me->fifo_qw_in_progress[1]);
fqw->data[2] = T2H_4(me->fifo_qw_in_progress[2]);
fqw->data[3] = T2H_4(me->fifo_qw_in_progress[3]);
ASSERT(sizeof(unsigned_4) == 4); ASSERT(sizeof(unsigned_4) == 4);
PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_MADR : DMA_D1_MADR), PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_MADR : DMA_D1_MADR),
& fqw->source_address, /* target endian */ & fqw->source_address, /* converted to host-endian */
4); 4);
fqw->source_address = T2H_4(fqw->source_address);
PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_PKTFLAG : DMA_D1_PKTFLAG), PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_PKTFLAG : DMA_D1_PKTFLAG),
& dma_tag_present, & dma_tag_present,
4); 4);
@ -1250,8 +1256,9 @@ pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode)
if(me->pke_number == 1) if(me->pke_number == 1)
pke_flip_dbf(me); pke_flip_dbf(me);
/* compute new PC for VU */ /* compute new PC for VU (host byte-order) */
vu_pc = BIT_MASK_GET(imm, 0, 15); vu_pc = BIT_MASK_GET(imm, 0, 15);
vu_pc = T2H_4(vu_pc);
/* write new PC; callback function gets VU running */ /* write new PC; callback function gets VU running */
ASSERT(sizeof(unsigned_4) == 4); ASSERT(sizeof(unsigned_4) == 4);
@ -1370,8 +1377,9 @@ pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode)
if(me->pke_number == 1) if(me->pke_number == 1)
pke_flip_dbf(me); pke_flip_dbf(me);
/* compute new PC for VU */ /* compute new PC for VU (host byte-order) */
vu_pc = BIT_MASK_GET(imm, 0, 15); vu_pc = BIT_MASK_GET(imm, 0, 15);
vu_pc = T2H_4(vu_pc);
/* rewrite new PC; callback function gets VU running */ /* rewrite new PC; callback function gets VU running */
ASSERT(sizeof(unsigned_4) == 4); ASSERT(sizeof(unsigned_4) == 4);
@ -1573,7 +1581,6 @@ pke_code_mpg(struct pke_device* me, unsigned_4 pkecode)
address_word vu_addr_max_size; address_word vu_addr_max_size;
unsigned_4 vu_lower_opcode, vu_upper_opcode; unsigned_4 vu_lower_opcode, vu_upper_opcode;
unsigned_4* operand; unsigned_4* operand;
unsigned_4 source_addr;
struct fifo_quadword* fq; struct fifo_quadword* fq;
int next_num; int next_num;
@ -1605,21 +1612,20 @@ pke_code_mpg(struct pke_device* me, unsigned_4 pkecode)
vu_upper_opcode = *pke_pc_operand(me, i*2 + 2); vu_upper_opcode = *pke_pc_operand(me, i*2 + 2);
/* write data into VU memory */ /* write data into VU memory */
/* lower (scalar) opcode comes in first word */ /* lower (scalar) opcode comes in first word ; macro performs H2T! */
PKE_MEM_WRITE(me, vu_addr, PKE_MEM_WRITE(me, vu_addr,
& vu_lower_opcode, & vu_lower_opcode,
4); 4);
/* upper (vector) opcode comes in second word */ /* upper (vector) opcode comes in second word ; H2T */
ASSERT(sizeof(unsigned_4) == 4); ASSERT(sizeof(unsigned_4) == 4);
PKE_MEM_WRITE(me, vu_addr + 4, PKE_MEM_WRITE(me, vu_addr + 4,
& vu_upper_opcode, & vu_upper_opcode,
4); 4);
/* write tracking address in target byte-order */ /* write tracking address in target byte-order */
source_addr = H2T_4(fq->source_address);
ASSERT(sizeof(unsigned_4) == 4); ASSERT(sizeof(unsigned_4) == 4);
PKE_MEM_WRITE(me, vutrack_addr, PKE_MEM_WRITE(me, vutrack_addr,
& source_addr, & fq->source_address,
4); 4);
} /* VU xfer loop */ } /* VU xfer loop */
@ -1667,7 +1673,7 @@ pke_code_direct(struct pke_device* me, unsigned_4 pkecode)
{ {
/* VU idle */ /* VU idle */
int i; int i;
quadword fifo_data; unsigned_16 fifo_data;
/* "transferring" operand */ /* "transferring" operand */
PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
@ -1678,14 +1684,14 @@ pke_code_direct(struct pke_device* me, unsigned_4 pkecode)
unsigned_4* operand = pke_pc_operand(me, 1+i); unsigned_4* operand = pke_pc_operand(me, 1+i);
/* collect word into quadword */ /* collect word into quadword */
fifo_data[i % 4] = *operand; *A4_16(&fifo_data, 3 - (i % 4)) = *operand;
/* write to GPUIF FIFO only with full quadword */ /* write to GPUIF FIFO only with full quadword */
if(i % 4 == 3) if(i % 4 == 3)
{ {
ASSERT(sizeof(fifo_data) == 16); ASSERT(sizeof(fifo_data) == 16);
PKE_MEM_WRITE(me, GIF_PATH2_FIFO_ADDR, PKE_MEM_WRITE(me, GIF_PATH2_FIFO_ADDR,
fifo_data, & fifo_data,
16); 16);
} /* write collected quadword */ } /* write collected quadword */
@ -1816,16 +1822,18 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode)
/* compute address of tracking table entry */ /* compute address of tracking table entry */
vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 4; vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 4;
/* read old VU data word at address */ /* read old VU data word at address; reverse words if needed */
ASSERT(sizeof(vu_old_data) == 16); {
PKE_MEM_READ(me, vu_addr, unsigned_16 vu_old_badwords;
vu_old_data, ASSERT(sizeof(vu_old_badwords) == 16);
16); PKE_MEM_READ(me, vu_addr,
&vu_old_badwords, 16);
vu_old_data[0] = * A4_16(& vu_old_badwords, 3);
vu_old_data[1] = * A4_16(& vu_old_badwords, 2);
vu_old_data[2] = * A4_16(& vu_old_badwords, 1);
vu_old_data[3] = * A4_16(& vu_old_badwords, 0);
}
/* yank memory out of little-endian order */
for(i=0; i<4; i++)
vu_old_data[i] = LE2H_4(vu_old_data[i]);
/* For cyclic unpack, next operand quadword may come from instruction stream /* For cyclic unpack, next operand quadword may come from instruction stream
or be zero. */ or be zero. */
if((num == 0 && cl == 0 && wl == 0) || /* shortcut clear */ if((num == 0 && cl == 0 && wl == 0) || /* shortcut clear */
@ -1960,18 +1968,19 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode)
; ;
} }
/* yank memory into little-endian order */ /* write new VU data word at address; reverse words if needed */
for(i=0; i<4; i++) {
vu_new_data[i] = H2LE_4(vu_new_data[i]); unsigned_16 vu_new_badwords;
* A4_16(& vu_new_badwords, 3) = vu_new_data[0];
/* write replacement word */ * A4_16(& vu_new_badwords, 2) = vu_new_data[1];
ASSERT(sizeof(vu_new_data) == 16); * A4_16(& vu_new_badwords, 1) = vu_new_data[2];
PKE_MEM_WRITE(me, vu_addr, * A4_16(& vu_new_badwords, 0) = vu_new_data[3];
vu_new_data, ASSERT(sizeof(vu_new_badwords) == 16);
16); PKE_MEM_WRITE(me, vu_addr,
&vu_new_badwords, 16);
}
/* write tracking address in target byte-order */ /* write tracking address */
source_addr = H2T_4(source_addr);
ASSERT(sizeof(unsigned_4) == 4); ASSERT(sizeof(unsigned_4) == 4);
PKE_MEM_WRITE(me, vutrack_addr, PKE_MEM_WRITE(me, vutrack_addr,
& source_addr, & source_addr,

View file

@ -422,10 +422,12 @@ struct pke_device
if((me)->fifo_trace_file != NULL) \ if((me)->fifo_trace_file != NULL) \
{ \ { \
int i; \ int i; \
unsigned_##size value_te; \
value_te = H2T_##size(value); \
fprintf((me)->fifo_trace_file, "# Write %2d bytes to ", size); \ fprintf((me)->fifo_trace_file, "# Write %2d bytes to ", size); \
fprintf((me)->fifo_trace_file, "0x%08lx: ", (unsigned long)(addr)); \ fprintf((me)->fifo_trace_file, "0x%08lx: ", (unsigned long)(addr)); \
for(i=0; i<size; i++) \ for(i=0; i<size; i++) \
fprintf((me)->fifo_trace_file, " %02x", ((unsigned_1*)(& value))[i]); \ fprintf((me)->fifo_trace_file, " %02x", ((unsigned_1*)(& value_te))[i]); \
fprintf((me)->fifo_trace_file, "\n"); \ fprintf((me)->fifo_trace_file, "\n"); \
} \ } \
} while(0) } while(0)

View file

@ -7,8 +7,9 @@
#include "sky-device.h" #include "sky-device.h"
#include "sky-vu0.h" #include "sky-vu0.h"
static char vu0_mem0_buffer[VU0_MEM0_SIZE]; static char* vu0_mem0_buffer = 0;
static char vu0_mem1_buffer[VU0_MEM1_SIZE]; static char* vu0_mem1_buffer = 0;
void void
vu0_issue(void) vu0_issue(void)
@ -62,6 +63,7 @@ vu0_attach(SIM_DESC sd)
&vu0_device, &vu0_device,
NULL /*buffer*/); NULL /*buffer*/);
vu0_mem0_buffer = zalloc(VU0_MEM0_SIZE);
sim_core_attach (sd, sim_core_attach (sd,
NULL, NULL,
0 /*level*/, 0 /*level*/,
@ -71,8 +73,9 @@ vu0_attach(SIM_DESC sd)
VU0_MEM0_SIZE /*nr_bytes*/, VU0_MEM0_SIZE /*nr_bytes*/,
0 /*modulo*/, 0 /*modulo*/,
0 /*device*/, 0 /*device*/,
&vu0_mem0_buffer /*buffer*/); vu0_mem0_buffer /*buffer*/);
vu0_mem1_buffer = zalloc(VU0_MEM1_SIZE);
sim_core_attach (sd, sim_core_attach (sd,
NULL, NULL,
0 /*level*/, 0 /*level*/,
@ -82,5 +85,5 @@ vu0_attach(SIM_DESC sd)
VU0_MEM1_SIZE /*nr_bytes*/, VU0_MEM1_SIZE /*nr_bytes*/,
0 /*modulo*/, 0 /*modulo*/,
0 /*device*/, 0 /*device*/,
&vu0_mem1_buffer /*buffer*/); vu0_mem1_buffer /*buffer*/);
} }

View file

@ -17,8 +17,8 @@ VectorUnitState vu1_state;
#define sim_warning printf #define sim_warning printf
static char vu1_umem_buffer[VU1_MEM0_SIZE] __attribute__ ((aligned(16))); static char* vu1_umem_buffer = 0;
static char vu1_mem_buffer[VU1_MEM1_SIZE] __attribute__ ((aligned(16))); static char* vu1_mem_buffer = 0;
void init_vu1(void); void init_vu1(void);
void init_vu(VectorUnitState *state, char* umem_buffer, char* mem_buffer); void init_vu(VectorUnitState *state, char* umem_buffer, char* mem_buffer);
@ -78,10 +78,11 @@ vu1_io_read_register_window(device *me,
if (vu1_state.runState == VU_RUN || vu1_state.runState == VU_BREAK) if (vu1_state.runState == VU_RUN || vu1_state.runState == VU_BREAK)
SET_BIT(stat, VPU_STAT_VBS1_BIT); SET_BIT(stat, VPU_STAT_VBS1_BIT);
*(u_long*)&source_buffer[VPE1_STAT - VU1_REGISTER_WINDOW_START] = stat; *(u_long*)&source_buffer[VPE1_STAT - VU1_REGISTER_WINDOW_START] = H2T_4(stat);
} }
*(u_long*)&source_buffer[VU1_CIA - VU1_REGISTER_WINDOW_START] = vu1_state.junk._vpepc; *(u_long*)&source_buffer[VU1_CIA - VU1_REGISTER_WINDOW_START] = H2T_4(vu1_state.junk._vpepc);
/* XXX: other H2T_N's needed around here. */
#if 0 #if 0
printf("%s: Read: %x, %d, dest: %x, space: %d, %x!\n", me->name, (int)addr, nr_bytes, (int)dest, space, *(int*)&(vu1_state.regs.VPE_STAT)); printf("%s: Read: %x, %d, dest: %x, space: %d, %x!\n", me->name, (int)addr, nr_bytes, (int)dest, space, *(int*)&(vu1_state.regs.VPE_STAT));
@ -167,6 +168,7 @@ vu1_init(SIM_DESC sd)
&vu1_device, &vu1_device,
NULL /*buffer*/); NULL /*buffer*/);
vu1_umem_buffer = zalloc(VU1_MEM0_SIZE);
sim_core_attach (sd, sim_core_attach (sd,
NULL, NULL,
0 /*level*/, 0 /*level*/,
@ -176,8 +178,9 @@ vu1_init(SIM_DESC sd)
VU1_MEM0_SIZE /*nr_bytes*/, VU1_MEM0_SIZE /*nr_bytes*/,
0 /*modulo*/, 0 /*modulo*/,
0 /*device*/, 0 /*device*/,
&vu1_umem_buffer /*buffer*/); vu1_umem_buffer /*buffer*/);
vu1_mem_buffer = zalloc(VU1_MEM1_SIZE);
sim_core_attach (sd, sim_core_attach (sd,
NULL, NULL,
0 /*level*/, 0 /*level*/,
@ -187,7 +190,7 @@ vu1_init(SIM_DESC sd)
VU1_MEM1_SIZE /*nr_bytes*/, VU1_MEM1_SIZE /*nr_bytes*/,
0 /*modulo*/, 0 /*modulo*/,
0 /*device*/, 0 /*device*/,
&vu1_mem_buffer /*buffer*/); vu1_mem_buffer /*buffer*/);
init_vu1(); init_vu1();
/*initvpe();*/ /*initvpe();*/