wiffle_screen_space_store_queue2: add alpha blending
This commit is contained in:
parent
306294cfff
commit
b156e2d24e
2
Makefile
2
Makefile
@ -4,7 +4,7 @@ include base.mk
|
||||
include common.mk
|
||||
include headers.mk
|
||||
|
||||
OPT = -O2
|
||||
OPT = -Og
|
||||
MAKEFILE_PATH := $(patsubst %/,%,$(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
|
||||
CFLAGS += -I$(MAKEFILE_PATH)
|
||||
LIB ?= $(MAKEFILE_PATH)
|
||||
|
@ -244,7 +244,8 @@ WIFFLE_SCREEN_SPACE_STORE_QUEUE2_OBJ = \
|
||||
holly/background.o \
|
||||
holly/ta_fifo_polygon_converter.o \
|
||||
sh7091/serial.o \
|
||||
sobel_fipr_store_queue2.o
|
||||
sobel_fipr_store_queue2.o \
|
||||
$(LIBGCC)
|
||||
|
||||
example/wiffle_screen_space_store_queue2.elf: LDSCRIPT = $(LIB)/main.lds
|
||||
example/wiffle_screen_space_store_queue2.elf: $(START_OBJ) $(WIFFLE_SCREEN_SPACE_STORE_QUEUE2_OBJ)
|
||||
|
@ -143,6 +143,85 @@ void transfer_scene(float theta)
|
||||
sq_transfer_32byte(ta_fifo_polygon_converter);
|
||||
}
|
||||
|
||||
struct quad_vertex {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
float u;
|
||||
float v;
|
||||
};
|
||||
|
||||
// screen space coordinates
|
||||
constexpr float x_uv = 640.f / 1024.f;
|
||||
constexpr float y_uv = 480.f / 512.f;
|
||||
|
||||
const struct quad_vertex quad_vertices[] = {
|
||||
{ 0.f, 0.f, 0.1f, 0.0f, 0.0f },
|
||||
{ 640.f, 0.f, 0.1f, x_uv, 0.0f },
|
||||
{ 640.f, 480.f, 0.1f, x_uv, y_uv },
|
||||
{ 0.f, 480.f, 0.1f, 0.0f, y_uv },
|
||||
};
|
||||
|
||||
void transfer_translucent_quad(uint32_t texture_address, bool use_alpha)
|
||||
{
|
||||
const uint32_t parameter_control_word = para_control::para_type::sprite
|
||||
| para_control::list_type::translucent
|
||||
| obj_control::col_type::packed_color
|
||||
| obj_control::texture
|
||||
| obj_control::_16bit_uv;
|
||||
|
||||
const uint32_t isp_tsp_instruction_word = isp_tsp_instruction_word::depth_compare_mode::always
|
||||
| isp_tsp_instruction_word::culling_mode::no_culling;
|
||||
|
||||
const uint32_t alpha =
|
||||
tsp_instruction_word::src_alpha_instr::inverse_src_alpha |
|
||||
tsp_instruction_word::dst_alpha_instr::src_alpha;
|
||||
const uint32_t no_alpha =
|
||||
tsp_instruction_word::src_alpha_instr::one |
|
||||
tsp_instruction_word::dst_alpha_instr::zero;
|
||||
|
||||
const uint32_t tsp_instruction_word = (use_alpha ? alpha : no_alpha)
|
||||
| tsp_instruction_word::fog_control::no_fog
|
||||
| tsp_instruction_word::texture_u_size::from_int(1024)
|
||||
| tsp_instruction_word::texture_v_size::from_int(512)
|
||||
| (use_alpha ? tsp_instruction_word::use_alpha : 0);
|
||||
|
||||
const uint32_t texture_control_word = texture_control_word::pixel_format::_4444
|
||||
| texture_control_word::scan_order::non_twiddled
|
||||
| texture_control_word::texture_address(texture_address / 8)
|
||||
| texture_control_word::stride_select;
|
||||
|
||||
const uint32_t base_color = 0xffff00ff;
|
||||
*reinterpret_cast<ta_global_parameter::sprite *>(store_queue) =
|
||||
ta_global_parameter::sprite(parameter_control_word,
|
||||
isp_tsp_instruction_word,
|
||||
tsp_instruction_word,
|
||||
texture_control_word,
|
||||
base_color,
|
||||
0, // offset_color
|
||||
0, // data_size_for_sort_dma
|
||||
0); // next_address_for_sort_dma
|
||||
sq_transfer_32byte(ta_fifo_polygon_converter);
|
||||
|
||||
*reinterpret_cast<ta_vertex_parameter::sprite_type_1 *>(store_queue) =
|
||||
ta_vertex_parameter::sprite_type_1(para_control::para_type::vertex_parameter,
|
||||
quad_vertices[0].x,
|
||||
quad_vertices[0].y,
|
||||
quad_vertices[0].z,
|
||||
quad_vertices[1].x,
|
||||
quad_vertices[1].y,
|
||||
quad_vertices[1].z,
|
||||
quad_vertices[2].x,
|
||||
quad_vertices[2].y,
|
||||
quad_vertices[2].z,
|
||||
quad_vertices[3].x,
|
||||
quad_vertices[3].y,
|
||||
uv_16bit(quad_vertices[0].u, quad_vertices[0].v),
|
||||
uv_16bit(quad_vertices[1].u, quad_vertices[1].v),
|
||||
uv_16bit(quad_vertices[2].u, quad_vertices[2].v));
|
||||
sq_transfer_64byte(ta_fifo_polygon_converter);
|
||||
}
|
||||
|
||||
void dma_transfer(uint32_t source, uint32_t destination, uint32_t transfers)
|
||||
{
|
||||
using namespace dmac;
|
||||
@ -227,13 +306,14 @@ void main()
|
||||
dma_init();
|
||||
video_output::set_mode_vga();
|
||||
|
||||
const int render_passes = 1;
|
||||
|
||||
constexpr uint32_t ta_alloc = ta_alloc_ctrl::pt_opb::no_list
|
||||
| ta_alloc_ctrl::tm_opb::no_list
|
||||
| ta_alloc_ctrl::t_opb::_16x4byte
|
||||
| ta_alloc_ctrl::om_opb::no_list
|
||||
| ta_alloc_ctrl::o_opb::no_list;
|
||||
|
||||
const int render_passes = 1;
|
||||
const struct opb_size opb_size[render_passes] = {
|
||||
{
|
||||
.opaque = 0,
|
||||
@ -244,6 +324,22 @@ void main()
|
||||
}
|
||||
};
|
||||
|
||||
constexpr uint32_t ta_alloc2 = ta_alloc_ctrl::pt_opb::no_list
|
||||
| ta_alloc_ctrl::tm_opb::no_list
|
||||
| ta_alloc_ctrl::t_opb::_16x4byte
|
||||
| ta_alloc_ctrl::om_opb::no_list
|
||||
| ta_alloc_ctrl::o_opb::no_list;
|
||||
|
||||
const struct opb_size opb_size2[render_passes] = {
|
||||
{
|
||||
.opaque = 0,
|
||||
.opaque_modifier = 0,
|
||||
.translucent = 16 * 4,
|
||||
.translucent_modifier = 0,
|
||||
.punch_through = 0
|
||||
}
|
||||
};
|
||||
|
||||
holly.SOFTRESET = softreset::pipeline_soft_reset
|
||||
| softreset::ta_soft_reset;
|
||||
holly.SOFTRESET = 0;
|
||||
@ -265,66 +361,45 @@ void main()
|
||||
render_passes,
|
||||
texture_memory_alloc.region_array[0].start,
|
||||
texture_memory_alloc.object_list[0].start);
|
||||
|
||||
background_parameter2(texture_memory_alloc.background[0].start,
|
||||
0xffc0c0c0);
|
||||
|
||||
region_array_multipass(tile_width,
|
||||
tile_height,
|
||||
opb_size2,
|
||||
render_passes,
|
||||
texture_memory_alloc.region_array[1].start,
|
||||
texture_memory_alloc.object_list[1].start);
|
||||
background_parameter2(texture_memory_alloc.background[1].start,
|
||||
0xffc0c0c0);
|
||||
|
||||
holly.FB_R_SOF1 = texture_memory_alloc.framebuffer[0].start;
|
||||
|
||||
holly.FB_R_CTRL = fb_r_ctrl::vclk_div::pclk_vclk_1
|
||||
| fb_r_ctrl::fb_depth::_0888_rgb_32bit
|
||||
| fb_r_ctrl::fb_depth::_565_rgb_16bit
|
||||
| fb_r_ctrl::fb_enable;
|
||||
|
||||
holly.FB_R_SIZE = fb_r_size::fb_modulus(1)
|
||||
| fb_r_size::fb_y_size(480 - 3)
|
||||
| fb_r_size::fb_x_size((640 * 32) / 32 - 1);
|
||||
| fb_r_size::fb_x_size((640 * 16) / 32 - 1);
|
||||
|
||||
holly.FB_W_CTRL = fb_w_ctrl::fb_packmode::_8888_argb_32bit;
|
||||
holly.TEXT_CONTROL = text_control::stride(20); // 640 pixels
|
||||
|
||||
system.LMMODE0 = 1;
|
||||
system.LMMODE1 = 1; // 32-bit
|
||||
|
||||
uint32_t * out = (uint32_t *)&texture_memory32[texture_memory_alloc.framebuffer[0].start / 4];
|
||||
for (int i = 0; i < 640 * 480; i++) {
|
||||
out[i] = 0xffff0000;
|
||||
}
|
||||
|
||||
ta_polygon_converter_init2(texture_memory_alloc.isp_tsp_parameters[0].start,
|
||||
texture_memory_alloc.isp_tsp_parameters[0].end,
|
||||
texture_memory_alloc.object_list[0].start,
|
||||
texture_memory_alloc.object_list[0].end,
|
||||
opb_size[0].total(),
|
||||
ta_alloc,
|
||||
tile_width,
|
||||
tile_height);
|
||||
transfer_scene(theta);
|
||||
ta_wait_translucent_list();
|
||||
|
||||
const uint32_t bytes_per_pixel = 4;
|
||||
core_start_render3(texture_memory_alloc.region_array[0].start,
|
||||
texture_memory_alloc.isp_tsp_parameters[0].start,
|
||||
texture_memory_alloc.background[0].start,
|
||||
//texture_memory_alloc.framebuffer[0].start,
|
||||
0x100'0000 | texture_memory_alloc.texture.start, // 64-bit area
|
||||
framebuffer_width,
|
||||
bytes_per_pixel);
|
||||
|
||||
ta_polygon_converter_init2(texture_memory_alloc.isp_tsp_parameters[0].start,
|
||||
texture_memory_alloc.isp_tsp_parameters[0].end,
|
||||
texture_memory_alloc.object_list[0].start,
|
||||
texture_memory_alloc.object_list[0].end,
|
||||
opb_size[0].total(),
|
||||
ta_alloc,
|
||||
tile_width,
|
||||
tile_height);
|
||||
transfer_scene(theta);
|
||||
//system.LMMODE0 = 1;
|
||||
//system.LMMODE1 = 1; // 32-bit
|
||||
system.LMMODE0 = 0;
|
||||
system.LMMODE1 = 0; // 64-bit
|
||||
|
||||
uint32_t * in = (uint32_t *)&texture_memory64[texture_memory_alloc.texture.start / 4];
|
||||
uint32_t * framebuffer = (uint32_t *)(0x11000000 + texture_memory_alloc.framebuffer[0].start);
|
||||
|
||||
/*
|
||||
for (int i = 0; i < 640 * 480; i++) {
|
||||
uint32_t * framebuffer = (uint32_t *)(0x11000000 + texture_memory_alloc.framebuffer[0].start);
|
||||
framebuffer[i] = 0xffff0000;
|
||||
}
|
||||
*/
|
||||
|
||||
while (1) {
|
||||
ta_wait_translucent_list();
|
||||
ta_polygon_converter_init2(texture_memory_alloc.isp_tsp_parameters[0].start,
|
||||
texture_memory_alloc.isp_tsp_parameters[0].end,
|
||||
texture_memory_alloc.object_list[0].start,
|
||||
@ -333,32 +408,75 @@ void main()
|
||||
ta_alloc,
|
||||
tile_width,
|
||||
tile_height);
|
||||
|
||||
transfer_scene(theta);
|
||||
//serial::string("wait_tl1\n");
|
||||
ta_wait_translucent_list();
|
||||
//serial::string("wait_tl1 end\n");
|
||||
|
||||
holly.FB_W_CTRL = fb_w_ctrl::fb_packmode::_4444_argb_16bit;
|
||||
|
||||
core_wait_end_of_render_video();
|
||||
core_start_render3(texture_memory_alloc.region_array[0].start,
|
||||
texture_memory_alloc.isp_tsp_parameters[0].start,
|
||||
texture_memory_alloc.background[0].start,
|
||||
//texture_memory_alloc.framebuffer[0].start,
|
||||
0x100'0000 | texture_memory_alloc.texture.start, // 64-bit area
|
||||
framebuffer_width,
|
||||
bytes_per_pixel);
|
||||
2); // bytes_per_pixel
|
||||
//serial::string("wait_eorv1\n");
|
||||
core_wait_end_of_render_video();
|
||||
//serial::string("wait_eorv1 end\n");
|
||||
|
||||
dma_transfer((uint32_t)in, (uint32_t)inbuf, 640 * 480 * 4 / 32);
|
||||
dma_transfer((uint32_t)in, (uint32_t)inbuf, 640 * 480 * 2 / 32);
|
||||
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0);
|
||||
|
||||
sobel_fipr_store_queue2(inbuf, framebuffer, temp);
|
||||
//sobel_fipr_store_queue2(inbuf, out, temp);
|
||||
int frame = frame_ix & 1;
|
||||
uint32_t * framebuffer = (uint32_t *)(0x11000000 + texture_memory_alloc.framebuffer[0].start);
|
||||
uint32_t * out = (uint32_t *)(0x11000000 + texture_memory_alloc.texture.start + 640 * 480 * 2);
|
||||
//serial::string("sobel\n");
|
||||
//sobel_fipr_store_queue2(inbuf, framebuffer, temp);
|
||||
sobel_fipr_store_queue2(inbuf, out, temp);
|
||||
|
||||
ta_polygon_converter_init2(texture_memory_alloc.isp_tsp_parameters[1].start,
|
||||
texture_memory_alloc.isp_tsp_parameters[1].end,
|
||||
texture_memory_alloc.object_list[1].start,
|
||||
texture_memory_alloc.object_list[1].end,
|
||||
opb_size2[0].total(),
|
||||
ta_alloc2,
|
||||
tile_width,
|
||||
tile_height);
|
||||
|
||||
const uint32_t texture_address0 = texture_memory_alloc.texture.start;
|
||||
transfer_translucent_quad(texture_address0, false);
|
||||
const uint32_t texture_address1 = texture_memory_alloc.texture.start + 640 * 480 * 2;
|
||||
transfer_translucent_quad(texture_address1, true);
|
||||
*reinterpret_cast<ta_global_parameter::end_of_list *>(store_queue) =
|
||||
ta_global_parameter::end_of_list(para_control::para_type::end_of_list);
|
||||
sq_transfer_32byte(ta_fifo_polygon_converter);
|
||||
|
||||
//serial::string("wait_tl2\n");
|
||||
ta_wait_translucent_list();
|
||||
//serial::string("wait_tl2 end\n");
|
||||
|
||||
holly.FB_W_CTRL = fb_w_ctrl::fb_packmode::_565_rgb_16bit;
|
||||
|
||||
core_start_render3(texture_memory_alloc.region_array[1].start,
|
||||
texture_memory_alloc.isp_tsp_parameters[1].start,
|
||||
texture_memory_alloc.background[1].start,
|
||||
texture_memory_alloc.framebuffer[frame].start,
|
||||
framebuffer_width,
|
||||
2); // bytes_per_pixel
|
||||
//serial::string("wait_eorv2\n");
|
||||
core_wait_end_of_render_video();
|
||||
//serial::string("wait_eorv2 end\n");
|
||||
|
||||
while (!spg_status::vsync(holly.SPG_STATUS));
|
||||
holly.FB_R_SOF1 = texture_memory_alloc.framebuffer[frame].start;
|
||||
while (spg_status::vsync(holly.SPG_STATUS));
|
||||
|
||||
theta += half_degree;
|
||||
frame_ix += 1;
|
||||
if (frame_ix > 100)
|
||||
break;
|
||||
}
|
||||
|
||||
ta_wait_translucent_list();
|
||||
core_wait_end_of_render_video();
|
||||
|
||||
serial::string("return\n");
|
||||
serial::string("return\n");
|
||||
serial::string("return\n");
|
||||
|
@ -140,18 +140,43 @@ void core_wait_end_of_render_video()
|
||||
"Furthermore, it is strongly recommended that the End of ISP and End of Video interrupts
|
||||
be cleared at the same time in order to make debugging easier when an error occurs."
|
||||
*/
|
||||
while ((system.ISTNRM & istnrm::end_of_render_tsp) == 0) {
|
||||
//serial::string("eorv\n");
|
||||
int64_t count = 0;
|
||||
while (1) {
|
||||
uint32_t istnrm = system.ISTNRM;
|
||||
if ((istnrm & istnrm::end_of_render_tsp) != 0)
|
||||
break;
|
||||
if (istnrm & 0xc0000000) {
|
||||
serial::string("istnrm ");
|
||||
serial::integer<uint32_t>(istnrm);
|
||||
serial::string("isterr ");
|
||||
serial::integer<uint32_t>(system.ISTERR);
|
||||
}
|
||||
|
||||
//serial::integer<uint32_t>(system.ISTERR);
|
||||
if (system.ISTERR) {
|
||||
//serial::string("core ");
|
||||
//serial::integer<uint32_t>(system.ISTERR);
|
||||
holly.SOFTRESET = softreset::pipeline_soft_reset;
|
||||
holly.SOFTRESET = 0;
|
||||
//break;
|
||||
}
|
||||
if (count > 10000000) {
|
||||
serial::string("core timeout:\n");
|
||||
serial::string("isterr ");
|
||||
serial::integer<uint32_t>(system.ISTERR);
|
||||
serial::string("istnrm ");
|
||||
serial::integer<uint32_t>(system.ISTNRM);
|
||||
break;
|
||||
}
|
||||
count += 1;
|
||||
};
|
||||
system.ISTNRM = istnrm::end_of_render_tsp
|
||||
| istnrm::end_of_render_isp
|
||||
| istnrm::end_of_render_video;
|
||||
|
||||
holly.SOFTRESET = softreset::pipeline_soft_reset;
|
||||
holly.SOFTRESET = 0;
|
||||
}
|
||||
|
||||
void core_flip(uint32_t frame_ix)
|
||||
|
@ -30,10 +30,10 @@ constexpr texture_memory_alloc texture_memory_alloc = {
|
||||
.background = {{0x07'ffe0, 0x08'0000}, {0x47'ffe0, 0x48'0000}},
|
||||
.object_list = {{0x08'0000, 0x0f'ffe0}, {0x48'0000, 0x4f'ffe0}}, // ~122880 object list pointers
|
||||
.region_array = {{0x10'0000, 0x11'0000}, {0x50'0000, 0x51'0000}}, // ~9 render passes
|
||||
//.framebuffer = {{0x11'0000, 0x1b'8c00}, {0x51'0000, 0x5b'8c00}}, // 720x480*2
|
||||
.framebuffer = {{0x11'0000, 0x23'c000}, {0x51'0000, 0x63'c000}}, // 640x480*4
|
||||
.framebuffer = {{0x11'0000, 0x1b'8c00}, {0x51'0000, 0x5b'8c00}}, // 720x480*2
|
||||
//.framebuffer = {{0x11'0000, 0x23'c000}, {0x51'0000, 0x63'c000}}, // 640x480*4
|
||||
|
||||
// 64-bit addresses
|
||||
//.texture = {0x37'1800, 0x80'0000}
|
||||
.texture = {0x57'1800, 0x80'0000}
|
||||
.texture = {0x37'1800, 0x80'0000}
|
||||
//.texture = {0x57'1800, 0x80'0000}
|
||||
};
|
||||
|
@ -1,3 +1,5 @@
|
||||
.macro inner_multiplication
|
||||
|
||||
/* y multiplication */
|
||||
mov #4,r1 /* r1 : temporary */
|
||||
fmov.s @r0,fr0 /* 0 */
|
||||
@ -49,11 +51,27 @@
|
||||
fsts FPUL,fr3
|
||||
fadd fr3,fr7
|
||||
|
||||
add #4,r0 /* next pixel */
|
||||
|
||||
fschg
|
||||
fmov xd0,dr0 /* load 100.f constant */
|
||||
fcmp/gt fr0,fr7
|
||||
fschg
|
||||
|
||||
.endm
|
||||
|
||||
.macro sobel_fipr_inner_2px
|
||||
mov #0,r9
|
||||
|
||||
inner_multiplication
|
||||
movt r9
|
||||
add #-1,r9
|
||||
extu.w r9,r9
|
||||
|
||||
add #4,r0 /* next pixel */
|
||||
inner_multiplication
|
||||
movt r1
|
||||
add #-1,r1
|
||||
extu.w r1,r1
|
||||
shll16 r1
|
||||
or r1,r9
|
||||
.endm
|
||||
|
@ -15,7 +15,7 @@ _sobel_fipr_store_queue2:
|
||||
/* r11: var (y loop counter) */
|
||||
/* r12: var (prefetch address: input address + 1280 4) */
|
||||
/* r13: var (input address) */
|
||||
/* r14: - */
|
||||
/* r14: (temporary) */
|
||||
|
||||
__setup:
|
||||
mov.l r8,@-r15
|
||||
@ -24,6 +24,7 @@ __setup:
|
||||
mov.l r11,@-r15
|
||||
mov.l r12,@-r15
|
||||
mov.l r13,@-r15
|
||||
mov.l r14,@-r15
|
||||
fmov.s fr12,@-r15
|
||||
fmov.s fr13,@-r15
|
||||
fmov.s fr14,@-r15
|
||||
@ -81,7 +82,7 @@ __setup:
|
||||
nop
|
||||
|
||||
.align 4
|
||||
_const_100f: .float 3900
|
||||
_const_100f: .float 50
|
||||
|
||||
_const_store_queue: .long 0xe0000000
|
||||
_const_store_queue_mask: .long 0x03ffffc0 /* (0xffffffff & (~0b111111)) & (~(0b111111 << 26)) */
|
||||
@ -93,54 +94,56 @@ _const_1280: .short (1280 * 4)
|
||||
_const_1281: .short (1281 * 4)
|
||||
_const_1282: .short (1282 * 4)
|
||||
|
||||
/* use r10 as temporary to load the first 1280 pixels; 8 pixels per loop iteration */
|
||||
/* use r10 as temporary to load the first 1280 pixels; 16 pixels per loop iteration */
|
||||
.include "unpack_pixel.s"
|
||||
.align 4
|
||||
_prime_pixels_loop_init:
|
||||
mov #80,r10 /* 1280 / 8 */
|
||||
shll r10
|
||||
mov r0,r12
|
||||
mov #80,r10 /* 1280 / 16 */
|
||||
shll r10
|
||||
|
||||
_prime_pixels_loop:
|
||||
.include "unpack_pixel.s"
|
||||
unpack_pixel_16
|
||||
dt r10
|
||||
bt _loop_init
|
||||
bra _prime_pixels_loop
|
||||
nop
|
||||
|
||||
.align 4
|
||||
_loop_init:
|
||||
/* skip first row */
|
||||
add r3,r0 /* r3: const (640 * 4) */
|
||||
add r3,r8
|
||||
/* skip first output row */
|
||||
mov r3,r1
|
||||
shlr r1
|
||||
add r1,r8 /* r3: 640 * 4 */
|
||||
|
||||
mov.w _const_height,r11 /* 478 */
|
||||
bra _loop
|
||||
mov #80,r10 /* 640 / 8 */
|
||||
mov #40,r10 /* 640 / 8 */
|
||||
|
||||
_const_height: .short 478
|
||||
_const_height: .short 476
|
||||
/*_const_height: .short 238*/
|
||||
|
||||
.include "sobel_fipr_inner2.s"
|
||||
_loop:
|
||||
_loop_width:
|
||||
/* prefetch at r8 + 1280 */
|
||||
unpack_pixel_16
|
||||
|
||||
/* process the next 8 pixels */
|
||||
.include "unpack_pixel.s"
|
||||
|
||||
.include "sobel_fipr_inner2.s"
|
||||
/* process the next 16 pixels */
|
||||
sobel_fipr_inner_2px
|
||||
mov.l r9,@r8 /* save result in the store queue */
|
||||
.include "sobel_fipr_inner2.s"
|
||||
sobel_fipr_inner_2px
|
||||
mov.l r9,@(4,r8) /* save result in the store queue */
|
||||
.include "sobel_fipr_inner2.s"
|
||||
sobel_fipr_inner_2px
|
||||
mov.l r9,@(8,r8) /* save result in the store queue */
|
||||
.include "sobel_fipr_inner2.s"
|
||||
sobel_fipr_inner_2px
|
||||
mov.l r9,@(12,r8) /* save result in the store queue */
|
||||
.include "sobel_fipr_inner2.s"
|
||||
sobel_fipr_inner_2px
|
||||
mov.l r9,@(16,r8) /* save result in the store queue */
|
||||
.include "sobel_fipr_inner2.s"
|
||||
sobel_fipr_inner_2px
|
||||
mov.l r9,@(20,r8) /* save result in the store queue */
|
||||
.include "sobel_fipr_inner2.s"
|
||||
sobel_fipr_inner_2px
|
||||
mov.l r9,@(24,r8) /* save result in the store queue */
|
||||
.include "sobel_fipr_inner2.s"
|
||||
sobel_fipr_inner_2px
|
||||
mov.l r9,@(28,r8) /* save result in the store queue */
|
||||
|
||||
/* send the store queue */
|
||||
@ -158,7 +161,7 @@ _row_decrement:
|
||||
dt r11
|
||||
bt _return
|
||||
bra _loop
|
||||
mov #80,r10 /* 640 / 8 */
|
||||
mov #40,r10 /* 640 / 8 */
|
||||
|
||||
/* restore registers */
|
||||
_return:
|
||||
@ -166,6 +169,7 @@ _return:
|
||||
fmov.s @r15+,fr14
|
||||
fmov.s @r15+,fr13
|
||||
fmov.s @r15+,fr12
|
||||
mov.l @r15+,r14
|
||||
mov.l @r15+,r13
|
||||
mov.l @r15+,r12
|
||||
mov.l @r15+,r11
|
||||
|
@ -1,25 +1,63 @@
|
||||
ocbi @r13
|
||||
pref @r13 /* 32 bytes, 8 pixels */
|
||||
|
||||
/* unpack the next 8 pixels */
|
||||
|
||||
fschg
|
||||
/*
|
||||
mov.l @r13,r9
|
||||
extu.b r9,r1
|
||||
shlr8 r9
|
||||
extu.b r9,r2
|
||||
add r1,r2
|
||||
shlr8 r9
|
||||
extu.b r9,r1
|
||||
add r1,r2
|
||||
shlr8 r9
|
||||
add r2,r9
|
||||
lds r9,fpul
|
||||
add #4,r13
|
||||
*/
|
||||
.macro unpack_pixel_inner_nibs
|
||||
|
||||
.include "unpack_pixel_inner.s"
|
||||
mov.w @r13+,r9
|
||||
|
||||
mov r9,r1 /* nib0 */
|
||||
shlr2 r9
|
||||
shlr2 r9
|
||||
and r14,r1
|
||||
|
||||
mov r9,r2 /* nib1 */
|
||||
shlr2 r9
|
||||
shlr2 r9
|
||||
and r14,r2
|
||||
add r2,r1
|
||||
|
||||
mov r9,r2 /* nib3 */
|
||||
shlr2 r9
|
||||
shlr2 r9
|
||||
and r14,r2
|
||||
add r2,r1
|
||||
|
||||
and r14,r9 /* nib4 */
|
||||
add r9,r1
|
||||
|
||||
lds r1,fpul
|
||||
|
||||
.endm
|
||||
|
||||
.macro unpack_pixel_8
|
||||
unpack_pixel_inner_nibs
|
||||
float fpul,fr0
|
||||
.include "unpack_pixel_inner.s"
|
||||
unpack_pixel_inner_nibs
|
||||
float fpul,fr1
|
||||
.include "unpack_pixel_inner.s"
|
||||
unpack_pixel_inner_nibs
|
||||
float fpul,fr2
|
||||
.include "unpack_pixel_inner.s"
|
||||
unpack_pixel_inner_nibs
|
||||
float fpul,fr3
|
||||
.include "unpack_pixel_inner.s"
|
||||
unpack_pixel_inner_nibs
|
||||
float fpul,fr4
|
||||
.include "unpack_pixel_inner.s"
|
||||
unpack_pixel_inner_nibs
|
||||
float fpul,fr5
|
||||
.include "unpack_pixel_inner.s"
|
||||
unpack_pixel_inner_nibs
|
||||
float fpul,fr6
|
||||
.include "unpack_pixel_inner.s"
|
||||
unpack_pixel_inner_nibs
|
||||
float fpul,fr7
|
||||
|
||||
fmov dr0,@r12
|
||||
@ -30,5 +68,17 @@
|
||||
add #8,r12
|
||||
fmov dr6,@r12
|
||||
add #8,r12
|
||||
.endm
|
||||
|
||||
.macro unpack_pixel_16
|
||||
ocbi @r13
|
||||
pref @r13 /* 32 bytes, 16 pixels */
|
||||
mov #15,r14
|
||||
|
||||
fschg
|
||||
|
||||
unpack_pixel_8
|
||||
unpack_pixel_8
|
||||
|
||||
fschg
|
||||
.endm
|
||||
|
@ -1,12 +1 @@
|
||||
mov.l @r13,r9
|
||||
extu.b r9,r1
|
||||
shlr8 r9
|
||||
extu.b r9,r2
|
||||
add r1,r2
|
||||
shlr8 r9
|
||||
extu.b r9,r1
|
||||
add r1,r2
|
||||
shlr8 r9
|
||||
add r2,r9
|
||||
lds r9,fpul
|
||||
add #4,r13
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user