wiffle_screen_space_store_queue2: add alpha blending
This commit is contained in:
parent
306294cfff
commit
b156e2d24e
2
Makefile
2
Makefile
@ -4,7 +4,7 @@ include base.mk
|
|||||||
include common.mk
|
include common.mk
|
||||||
include headers.mk
|
include headers.mk
|
||||||
|
|
||||||
OPT = -O2
|
OPT = -Og
|
||||||
MAKEFILE_PATH := $(patsubst %/,%,$(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
|
MAKEFILE_PATH := $(patsubst %/,%,$(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
|
||||||
CFLAGS += -I$(MAKEFILE_PATH)
|
CFLAGS += -I$(MAKEFILE_PATH)
|
||||||
LIB ?= $(MAKEFILE_PATH)
|
LIB ?= $(MAKEFILE_PATH)
|
||||||
|
@ -244,7 +244,8 @@ WIFFLE_SCREEN_SPACE_STORE_QUEUE2_OBJ = \
|
|||||||
holly/background.o \
|
holly/background.o \
|
||||||
holly/ta_fifo_polygon_converter.o \
|
holly/ta_fifo_polygon_converter.o \
|
||||||
sh7091/serial.o \
|
sh7091/serial.o \
|
||||||
sobel_fipr_store_queue2.o
|
sobel_fipr_store_queue2.o \
|
||||||
|
$(LIBGCC)
|
||||||
|
|
||||||
example/wiffle_screen_space_store_queue2.elf: LDSCRIPT = $(LIB)/main.lds
|
example/wiffle_screen_space_store_queue2.elf: LDSCRIPT = $(LIB)/main.lds
|
||||||
example/wiffle_screen_space_store_queue2.elf: $(START_OBJ) $(WIFFLE_SCREEN_SPACE_STORE_QUEUE2_OBJ)
|
example/wiffle_screen_space_store_queue2.elf: $(START_OBJ) $(WIFFLE_SCREEN_SPACE_STORE_QUEUE2_OBJ)
|
||||||
|
@ -143,6 +143,85 @@ void transfer_scene(float theta)
|
|||||||
sq_transfer_32byte(ta_fifo_polygon_converter);
|
sq_transfer_32byte(ta_fifo_polygon_converter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct quad_vertex {
|
||||||
|
float x;
|
||||||
|
float y;
|
||||||
|
float z;
|
||||||
|
float u;
|
||||||
|
float v;
|
||||||
|
};
|
||||||
|
|
||||||
|
// screen space coordinates
|
||||||
|
constexpr float x_uv = 640.f / 1024.f;
|
||||||
|
constexpr float y_uv = 480.f / 512.f;
|
||||||
|
|
||||||
|
const struct quad_vertex quad_vertices[] = {
|
||||||
|
{ 0.f, 0.f, 0.1f, 0.0f, 0.0f },
|
||||||
|
{ 640.f, 0.f, 0.1f, x_uv, 0.0f },
|
||||||
|
{ 640.f, 480.f, 0.1f, x_uv, y_uv },
|
||||||
|
{ 0.f, 480.f, 0.1f, 0.0f, y_uv },
|
||||||
|
};
|
||||||
|
|
||||||
|
void transfer_translucent_quad(uint32_t texture_address, bool use_alpha)
|
||||||
|
{
|
||||||
|
const uint32_t parameter_control_word = para_control::para_type::sprite
|
||||||
|
| para_control::list_type::translucent
|
||||||
|
| obj_control::col_type::packed_color
|
||||||
|
| obj_control::texture
|
||||||
|
| obj_control::_16bit_uv;
|
||||||
|
|
||||||
|
const uint32_t isp_tsp_instruction_word = isp_tsp_instruction_word::depth_compare_mode::always
|
||||||
|
| isp_tsp_instruction_word::culling_mode::no_culling;
|
||||||
|
|
||||||
|
const uint32_t alpha =
|
||||||
|
tsp_instruction_word::src_alpha_instr::inverse_src_alpha |
|
||||||
|
tsp_instruction_word::dst_alpha_instr::src_alpha;
|
||||||
|
const uint32_t no_alpha =
|
||||||
|
tsp_instruction_word::src_alpha_instr::one |
|
||||||
|
tsp_instruction_word::dst_alpha_instr::zero;
|
||||||
|
|
||||||
|
const uint32_t tsp_instruction_word = (use_alpha ? alpha : no_alpha)
|
||||||
|
| tsp_instruction_word::fog_control::no_fog
|
||||||
|
| tsp_instruction_word::texture_u_size::from_int(1024)
|
||||||
|
| tsp_instruction_word::texture_v_size::from_int(512)
|
||||||
|
| (use_alpha ? tsp_instruction_word::use_alpha : 0);
|
||||||
|
|
||||||
|
const uint32_t texture_control_word = texture_control_word::pixel_format::_4444
|
||||||
|
| texture_control_word::scan_order::non_twiddled
|
||||||
|
| texture_control_word::texture_address(texture_address / 8)
|
||||||
|
| texture_control_word::stride_select;
|
||||||
|
|
||||||
|
const uint32_t base_color = 0xffff00ff;
|
||||||
|
*reinterpret_cast<ta_global_parameter::sprite *>(store_queue) =
|
||||||
|
ta_global_parameter::sprite(parameter_control_word,
|
||||||
|
isp_tsp_instruction_word,
|
||||||
|
tsp_instruction_word,
|
||||||
|
texture_control_word,
|
||||||
|
base_color,
|
||||||
|
0, // offset_color
|
||||||
|
0, // data_size_for_sort_dma
|
||||||
|
0); // next_address_for_sort_dma
|
||||||
|
sq_transfer_32byte(ta_fifo_polygon_converter);
|
||||||
|
|
||||||
|
*reinterpret_cast<ta_vertex_parameter::sprite_type_1 *>(store_queue) =
|
||||||
|
ta_vertex_parameter::sprite_type_1(para_control::para_type::vertex_parameter,
|
||||||
|
quad_vertices[0].x,
|
||||||
|
quad_vertices[0].y,
|
||||||
|
quad_vertices[0].z,
|
||||||
|
quad_vertices[1].x,
|
||||||
|
quad_vertices[1].y,
|
||||||
|
quad_vertices[1].z,
|
||||||
|
quad_vertices[2].x,
|
||||||
|
quad_vertices[2].y,
|
||||||
|
quad_vertices[2].z,
|
||||||
|
quad_vertices[3].x,
|
||||||
|
quad_vertices[3].y,
|
||||||
|
uv_16bit(quad_vertices[0].u, quad_vertices[0].v),
|
||||||
|
uv_16bit(quad_vertices[1].u, quad_vertices[1].v),
|
||||||
|
uv_16bit(quad_vertices[2].u, quad_vertices[2].v));
|
||||||
|
sq_transfer_64byte(ta_fifo_polygon_converter);
|
||||||
|
}
|
||||||
|
|
||||||
void dma_transfer(uint32_t source, uint32_t destination, uint32_t transfers)
|
void dma_transfer(uint32_t source, uint32_t destination, uint32_t transfers)
|
||||||
{
|
{
|
||||||
using namespace dmac;
|
using namespace dmac;
|
||||||
@ -227,13 +306,14 @@ void main()
|
|||||||
dma_init();
|
dma_init();
|
||||||
video_output::set_mode_vga();
|
video_output::set_mode_vga();
|
||||||
|
|
||||||
|
const int render_passes = 1;
|
||||||
|
|
||||||
constexpr uint32_t ta_alloc = ta_alloc_ctrl::pt_opb::no_list
|
constexpr uint32_t ta_alloc = ta_alloc_ctrl::pt_opb::no_list
|
||||||
| ta_alloc_ctrl::tm_opb::no_list
|
| ta_alloc_ctrl::tm_opb::no_list
|
||||||
| ta_alloc_ctrl::t_opb::_16x4byte
|
| ta_alloc_ctrl::t_opb::_16x4byte
|
||||||
| ta_alloc_ctrl::om_opb::no_list
|
| ta_alloc_ctrl::om_opb::no_list
|
||||||
| ta_alloc_ctrl::o_opb::no_list;
|
| ta_alloc_ctrl::o_opb::no_list;
|
||||||
|
|
||||||
const int render_passes = 1;
|
|
||||||
const struct opb_size opb_size[render_passes] = {
|
const struct opb_size opb_size[render_passes] = {
|
||||||
{
|
{
|
||||||
.opaque = 0,
|
.opaque = 0,
|
||||||
@ -244,6 +324,22 @@ void main()
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
constexpr uint32_t ta_alloc2 = ta_alloc_ctrl::pt_opb::no_list
|
||||||
|
| ta_alloc_ctrl::tm_opb::no_list
|
||||||
|
| ta_alloc_ctrl::t_opb::_16x4byte
|
||||||
|
| ta_alloc_ctrl::om_opb::no_list
|
||||||
|
| ta_alloc_ctrl::o_opb::no_list;
|
||||||
|
|
||||||
|
const struct opb_size opb_size2[render_passes] = {
|
||||||
|
{
|
||||||
|
.opaque = 0,
|
||||||
|
.opaque_modifier = 0,
|
||||||
|
.translucent = 16 * 4,
|
||||||
|
.translucent_modifier = 0,
|
||||||
|
.punch_through = 0
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
holly.SOFTRESET = softreset::pipeline_soft_reset
|
holly.SOFTRESET = softreset::pipeline_soft_reset
|
||||||
| softreset::ta_soft_reset;
|
| softreset::ta_soft_reset;
|
||||||
holly.SOFTRESET = 0;
|
holly.SOFTRESET = 0;
|
||||||
@ -265,66 +361,45 @@ void main()
|
|||||||
render_passes,
|
render_passes,
|
||||||
texture_memory_alloc.region_array[0].start,
|
texture_memory_alloc.region_array[0].start,
|
||||||
texture_memory_alloc.object_list[0].start);
|
texture_memory_alloc.object_list[0].start);
|
||||||
|
|
||||||
background_parameter2(texture_memory_alloc.background[0].start,
|
background_parameter2(texture_memory_alloc.background[0].start,
|
||||||
0xffc0c0c0);
|
0xffc0c0c0);
|
||||||
|
|
||||||
|
region_array_multipass(tile_width,
|
||||||
|
tile_height,
|
||||||
|
opb_size2,
|
||||||
|
render_passes,
|
||||||
|
texture_memory_alloc.region_array[1].start,
|
||||||
|
texture_memory_alloc.object_list[1].start);
|
||||||
|
background_parameter2(texture_memory_alloc.background[1].start,
|
||||||
|
0xffc0c0c0);
|
||||||
|
|
||||||
holly.FB_R_SOF1 = texture_memory_alloc.framebuffer[0].start;
|
holly.FB_R_SOF1 = texture_memory_alloc.framebuffer[0].start;
|
||||||
|
|
||||||
holly.FB_R_CTRL = fb_r_ctrl::vclk_div::pclk_vclk_1
|
holly.FB_R_CTRL = fb_r_ctrl::vclk_div::pclk_vclk_1
|
||||||
| fb_r_ctrl::fb_depth::_0888_rgb_32bit
|
| fb_r_ctrl::fb_depth::_565_rgb_16bit
|
||||||
| fb_r_ctrl::fb_enable;
|
| fb_r_ctrl::fb_enable;
|
||||||
|
|
||||||
holly.FB_R_SIZE = fb_r_size::fb_modulus(1)
|
holly.FB_R_SIZE = fb_r_size::fb_modulus(1)
|
||||||
| fb_r_size::fb_y_size(480 - 3)
|
| fb_r_size::fb_y_size(480 - 3)
|
||||||
| fb_r_size::fb_x_size((640 * 32) / 32 - 1);
|
| fb_r_size::fb_x_size((640 * 16) / 32 - 1);
|
||||||
|
|
||||||
holly.FB_W_CTRL = fb_w_ctrl::fb_packmode::_8888_argb_32bit;
|
holly.TEXT_CONTROL = text_control::stride(20); // 640 pixels
|
||||||
|
|
||||||
system.LMMODE0 = 1;
|
//system.LMMODE0 = 1;
|
||||||
system.LMMODE1 = 1; // 32-bit
|
//system.LMMODE1 = 1; // 32-bit
|
||||||
|
system.LMMODE0 = 0;
|
||||||
uint32_t * out = (uint32_t *)&texture_memory32[texture_memory_alloc.framebuffer[0].start / 4];
|
system.LMMODE1 = 0; // 64-bit
|
||||||
for (int i = 0; i < 640 * 480; i++) {
|
|
||||||
out[i] = 0xffff0000;
|
|
||||||
}
|
|
||||||
|
|
||||||
ta_polygon_converter_init2(texture_memory_alloc.isp_tsp_parameters[0].start,
|
|
||||||
texture_memory_alloc.isp_tsp_parameters[0].end,
|
|
||||||
texture_memory_alloc.object_list[0].start,
|
|
||||||
texture_memory_alloc.object_list[0].end,
|
|
||||||
opb_size[0].total(),
|
|
||||||
ta_alloc,
|
|
||||||
tile_width,
|
|
||||||
tile_height);
|
|
||||||
transfer_scene(theta);
|
|
||||||
ta_wait_translucent_list();
|
|
||||||
|
|
||||||
const uint32_t bytes_per_pixel = 4;
|
|
||||||
core_start_render3(texture_memory_alloc.region_array[0].start,
|
|
||||||
texture_memory_alloc.isp_tsp_parameters[0].start,
|
|
||||||
texture_memory_alloc.background[0].start,
|
|
||||||
//texture_memory_alloc.framebuffer[0].start,
|
|
||||||
0x100'0000 | texture_memory_alloc.texture.start, // 64-bit area
|
|
||||||
framebuffer_width,
|
|
||||||
bytes_per_pixel);
|
|
||||||
|
|
||||||
ta_polygon_converter_init2(texture_memory_alloc.isp_tsp_parameters[0].start,
|
|
||||||
texture_memory_alloc.isp_tsp_parameters[0].end,
|
|
||||||
texture_memory_alloc.object_list[0].start,
|
|
||||||
texture_memory_alloc.object_list[0].end,
|
|
||||||
opb_size[0].total(),
|
|
||||||
ta_alloc,
|
|
||||||
tile_width,
|
|
||||||
tile_height);
|
|
||||||
transfer_scene(theta);
|
|
||||||
|
|
||||||
uint32_t * in = (uint32_t *)&texture_memory64[texture_memory_alloc.texture.start / 4];
|
uint32_t * in = (uint32_t *)&texture_memory64[texture_memory_alloc.texture.start / 4];
|
||||||
uint32_t * framebuffer = (uint32_t *)(0x11000000 + texture_memory_alloc.framebuffer[0].start);
|
|
||||||
|
/*
|
||||||
|
for (int i = 0; i < 640 * 480; i++) {
|
||||||
|
uint32_t * framebuffer = (uint32_t *)(0x11000000 + texture_memory_alloc.framebuffer[0].start);
|
||||||
|
framebuffer[i] = 0xffff0000;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
ta_wait_translucent_list();
|
|
||||||
ta_polygon_converter_init2(texture_memory_alloc.isp_tsp_parameters[0].start,
|
ta_polygon_converter_init2(texture_memory_alloc.isp_tsp_parameters[0].start,
|
||||||
texture_memory_alloc.isp_tsp_parameters[0].end,
|
texture_memory_alloc.isp_tsp_parameters[0].end,
|
||||||
texture_memory_alloc.object_list[0].start,
|
texture_memory_alloc.object_list[0].start,
|
||||||
@ -333,32 +408,75 @@ void main()
|
|||||||
ta_alloc,
|
ta_alloc,
|
||||||
tile_width,
|
tile_width,
|
||||||
tile_height);
|
tile_height);
|
||||||
|
|
||||||
transfer_scene(theta);
|
transfer_scene(theta);
|
||||||
|
//serial::string("wait_tl1\n");
|
||||||
|
ta_wait_translucent_list();
|
||||||
|
//serial::string("wait_tl1 end\n");
|
||||||
|
|
||||||
|
holly.FB_W_CTRL = fb_w_ctrl::fb_packmode::_4444_argb_16bit;
|
||||||
|
|
||||||
core_wait_end_of_render_video();
|
|
||||||
core_start_render3(texture_memory_alloc.region_array[0].start,
|
core_start_render3(texture_memory_alloc.region_array[0].start,
|
||||||
texture_memory_alloc.isp_tsp_parameters[0].start,
|
texture_memory_alloc.isp_tsp_parameters[0].start,
|
||||||
texture_memory_alloc.background[0].start,
|
texture_memory_alloc.background[0].start,
|
||||||
//texture_memory_alloc.framebuffer[0].start,
|
|
||||||
0x100'0000 | texture_memory_alloc.texture.start, // 64-bit area
|
0x100'0000 | texture_memory_alloc.texture.start, // 64-bit area
|
||||||
framebuffer_width,
|
framebuffer_width,
|
||||||
bytes_per_pixel);
|
2); // bytes_per_pixel
|
||||||
|
//serial::string("wait_eorv1\n");
|
||||||
|
core_wait_end_of_render_video();
|
||||||
|
//serial::string("wait_eorv1 end\n");
|
||||||
|
|
||||||
dma_transfer((uint32_t)in, (uint32_t)inbuf, 640 * 480 * 4 / 32);
|
dma_transfer((uint32_t)in, (uint32_t)inbuf, 640 * 480 * 2 / 32);
|
||||||
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0);
|
while ((sh7091.DMAC.CHCR1 & dmac::chcr::te::transfers_completed) == 0);
|
||||||
|
|
||||||
sobel_fipr_store_queue2(inbuf, framebuffer, temp);
|
//sobel_fipr_store_queue2(inbuf, out, temp);
|
||||||
|
int frame = frame_ix & 1;
|
||||||
|
uint32_t * framebuffer = (uint32_t *)(0x11000000 + texture_memory_alloc.framebuffer[0].start);
|
||||||
|
uint32_t * out = (uint32_t *)(0x11000000 + texture_memory_alloc.texture.start + 640 * 480 * 2);
|
||||||
|
//serial::string("sobel\n");
|
||||||
|
//sobel_fipr_store_queue2(inbuf, framebuffer, temp);
|
||||||
|
sobel_fipr_store_queue2(inbuf, out, temp);
|
||||||
|
|
||||||
|
ta_polygon_converter_init2(texture_memory_alloc.isp_tsp_parameters[1].start,
|
||||||
|
texture_memory_alloc.isp_tsp_parameters[1].end,
|
||||||
|
texture_memory_alloc.object_list[1].start,
|
||||||
|
texture_memory_alloc.object_list[1].end,
|
||||||
|
opb_size2[0].total(),
|
||||||
|
ta_alloc2,
|
||||||
|
tile_width,
|
||||||
|
tile_height);
|
||||||
|
|
||||||
|
const uint32_t texture_address0 = texture_memory_alloc.texture.start;
|
||||||
|
transfer_translucent_quad(texture_address0, false);
|
||||||
|
const uint32_t texture_address1 = texture_memory_alloc.texture.start + 640 * 480 * 2;
|
||||||
|
transfer_translucent_quad(texture_address1, true);
|
||||||
|
*reinterpret_cast<ta_global_parameter::end_of_list *>(store_queue) =
|
||||||
|
ta_global_parameter::end_of_list(para_control::para_type::end_of_list);
|
||||||
|
sq_transfer_32byte(ta_fifo_polygon_converter);
|
||||||
|
|
||||||
|
//serial::string("wait_tl2\n");
|
||||||
|
ta_wait_translucent_list();
|
||||||
|
//serial::string("wait_tl2 end\n");
|
||||||
|
|
||||||
|
holly.FB_W_CTRL = fb_w_ctrl::fb_packmode::_565_rgb_16bit;
|
||||||
|
|
||||||
|
core_start_render3(texture_memory_alloc.region_array[1].start,
|
||||||
|
texture_memory_alloc.isp_tsp_parameters[1].start,
|
||||||
|
texture_memory_alloc.background[1].start,
|
||||||
|
texture_memory_alloc.framebuffer[frame].start,
|
||||||
|
framebuffer_width,
|
||||||
|
2); // bytes_per_pixel
|
||||||
|
//serial::string("wait_eorv2\n");
|
||||||
|
core_wait_end_of_render_video();
|
||||||
|
//serial::string("wait_eorv2 end\n");
|
||||||
|
|
||||||
|
while (!spg_status::vsync(holly.SPG_STATUS));
|
||||||
|
holly.FB_R_SOF1 = texture_memory_alloc.framebuffer[frame].start;
|
||||||
|
while (spg_status::vsync(holly.SPG_STATUS));
|
||||||
|
|
||||||
theta += half_degree;
|
theta += half_degree;
|
||||||
frame_ix += 1;
|
frame_ix += 1;
|
||||||
if (frame_ix > 100)
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ta_wait_translucent_list();
|
|
||||||
core_wait_end_of_render_video();
|
|
||||||
|
|
||||||
serial::string("return\n");
|
serial::string("return\n");
|
||||||
serial::string("return\n");
|
serial::string("return\n");
|
||||||
serial::string("return\n");
|
serial::string("return\n");
|
||||||
|
@ -140,18 +140,43 @@ void core_wait_end_of_render_video()
|
|||||||
"Furthermore, it is strongly recommended that the End of ISP and End of Video interrupts
|
"Furthermore, it is strongly recommended that the End of ISP and End of Video interrupts
|
||||||
be cleared at the same time in order to make debugging easier when an error occurs."
|
be cleared at the same time in order to make debugging easier when an error occurs."
|
||||||
*/
|
*/
|
||||||
while ((system.ISTNRM & istnrm::end_of_render_tsp) == 0) {
|
//serial::string("eorv\n");
|
||||||
|
int64_t count = 0;
|
||||||
|
while (1) {
|
||||||
|
uint32_t istnrm = system.ISTNRM;
|
||||||
|
if ((istnrm & istnrm::end_of_render_tsp) != 0)
|
||||||
|
break;
|
||||||
|
if (istnrm & 0xc0000000) {
|
||||||
|
serial::string("istnrm ");
|
||||||
|
serial::integer<uint32_t>(istnrm);
|
||||||
|
serial::string("isterr ");
|
||||||
|
serial::integer<uint32_t>(system.ISTERR);
|
||||||
|
}
|
||||||
|
|
||||||
|
//serial::integer<uint32_t>(system.ISTERR);
|
||||||
if (system.ISTERR) {
|
if (system.ISTERR) {
|
||||||
//serial::string("core ");
|
//serial::string("core ");
|
||||||
//serial::integer<uint32_t>(system.ISTERR);
|
//serial::integer<uint32_t>(system.ISTERR);
|
||||||
holly.SOFTRESET = softreset::pipeline_soft_reset;
|
holly.SOFTRESET = softreset::pipeline_soft_reset;
|
||||||
holly.SOFTRESET = 0;
|
holly.SOFTRESET = 0;
|
||||||
|
//break;
|
||||||
|
}
|
||||||
|
if (count > 10000000) {
|
||||||
|
serial::string("core timeout:\n");
|
||||||
|
serial::string("isterr ");
|
||||||
|
serial::integer<uint32_t>(system.ISTERR);
|
||||||
|
serial::string("istnrm ");
|
||||||
|
serial::integer<uint32_t>(system.ISTNRM);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
count += 1;
|
||||||
};
|
};
|
||||||
system.ISTNRM = istnrm::end_of_render_tsp
|
system.ISTNRM = istnrm::end_of_render_tsp
|
||||||
| istnrm::end_of_render_isp
|
| istnrm::end_of_render_isp
|
||||||
| istnrm::end_of_render_video;
|
| istnrm::end_of_render_video;
|
||||||
|
|
||||||
|
holly.SOFTRESET = softreset::pipeline_soft_reset;
|
||||||
|
holly.SOFTRESET = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void core_flip(uint32_t frame_ix)
|
void core_flip(uint32_t frame_ix)
|
||||||
|
@ -30,10 +30,10 @@ constexpr texture_memory_alloc texture_memory_alloc = {
|
|||||||
.background = {{0x07'ffe0, 0x08'0000}, {0x47'ffe0, 0x48'0000}},
|
.background = {{0x07'ffe0, 0x08'0000}, {0x47'ffe0, 0x48'0000}},
|
||||||
.object_list = {{0x08'0000, 0x0f'ffe0}, {0x48'0000, 0x4f'ffe0}}, // ~122880 object list pointers
|
.object_list = {{0x08'0000, 0x0f'ffe0}, {0x48'0000, 0x4f'ffe0}}, // ~122880 object list pointers
|
||||||
.region_array = {{0x10'0000, 0x11'0000}, {0x50'0000, 0x51'0000}}, // ~9 render passes
|
.region_array = {{0x10'0000, 0x11'0000}, {0x50'0000, 0x51'0000}}, // ~9 render passes
|
||||||
//.framebuffer = {{0x11'0000, 0x1b'8c00}, {0x51'0000, 0x5b'8c00}}, // 720x480*2
|
.framebuffer = {{0x11'0000, 0x1b'8c00}, {0x51'0000, 0x5b'8c00}}, // 720x480*2
|
||||||
.framebuffer = {{0x11'0000, 0x23'c000}, {0x51'0000, 0x63'c000}}, // 640x480*4
|
//.framebuffer = {{0x11'0000, 0x23'c000}, {0x51'0000, 0x63'c000}}, // 640x480*4
|
||||||
|
|
||||||
// 64-bit addresses
|
// 64-bit addresses
|
||||||
//.texture = {0x37'1800, 0x80'0000}
|
.texture = {0x37'1800, 0x80'0000}
|
||||||
.texture = {0x57'1800, 0x80'0000}
|
//.texture = {0x57'1800, 0x80'0000}
|
||||||
};
|
};
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
.macro inner_multiplication
|
||||||
|
|
||||||
/* y multiplication */
|
/* y multiplication */
|
||||||
mov #4,r1 /* r1 : temporary */
|
mov #4,r1 /* r1 : temporary */
|
||||||
fmov.s @r0,fr0 /* 0 */
|
fmov.s @r0,fr0 /* 0 */
|
||||||
@ -49,11 +51,27 @@
|
|||||||
fsts FPUL,fr3
|
fsts FPUL,fr3
|
||||||
fadd fr3,fr7
|
fadd fr3,fr7
|
||||||
|
|
||||||
|
add #4,r0 /* next pixel */
|
||||||
|
|
||||||
fschg
|
fschg
|
||||||
fmov xd0,dr0 /* load 100.f constant */
|
fmov xd0,dr0 /* load 100.f constant */
|
||||||
fcmp/gt fr0,fr7
|
fcmp/gt fr0,fr7
|
||||||
fschg
|
fschg
|
||||||
|
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro sobel_fipr_inner_2px
|
||||||
|
mov #0,r9
|
||||||
|
|
||||||
|
inner_multiplication
|
||||||
movt r9
|
movt r9
|
||||||
add #-1,r9
|
add #-1,r9
|
||||||
|
extu.w r9,r9
|
||||||
|
|
||||||
add #4,r0 /* next pixel */
|
inner_multiplication
|
||||||
|
movt r1
|
||||||
|
add #-1,r1
|
||||||
|
extu.w r1,r1
|
||||||
|
shll16 r1
|
||||||
|
or r1,r9
|
||||||
|
.endm
|
||||||
|
@ -15,7 +15,7 @@ _sobel_fipr_store_queue2:
|
|||||||
/* r11: var (y loop counter) */
|
/* r11: var (y loop counter) */
|
||||||
/* r12: var (prefetch address: input address + 1280 4) */
|
/* r12: var (prefetch address: input address + 1280 4) */
|
||||||
/* r13: var (input address) */
|
/* r13: var (input address) */
|
||||||
/* r14: - */
|
/* r14: (temporary) */
|
||||||
|
|
||||||
__setup:
|
__setup:
|
||||||
mov.l r8,@-r15
|
mov.l r8,@-r15
|
||||||
@ -24,6 +24,7 @@ __setup:
|
|||||||
mov.l r11,@-r15
|
mov.l r11,@-r15
|
||||||
mov.l r12,@-r15
|
mov.l r12,@-r15
|
||||||
mov.l r13,@-r15
|
mov.l r13,@-r15
|
||||||
|
mov.l r14,@-r15
|
||||||
fmov.s fr12,@-r15
|
fmov.s fr12,@-r15
|
||||||
fmov.s fr13,@-r15
|
fmov.s fr13,@-r15
|
||||||
fmov.s fr14,@-r15
|
fmov.s fr14,@-r15
|
||||||
@ -81,7 +82,7 @@ __setup:
|
|||||||
nop
|
nop
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
_const_100f: .float 3900
|
_const_100f: .float 50
|
||||||
|
|
||||||
_const_store_queue: .long 0xe0000000
|
_const_store_queue: .long 0xe0000000
|
||||||
_const_store_queue_mask: .long 0x03ffffc0 /* (0xffffffff & (~0b111111)) & (~(0b111111 << 26)) */
|
_const_store_queue_mask: .long 0x03ffffc0 /* (0xffffffff & (~0b111111)) & (~(0b111111 << 26)) */
|
||||||
@ -93,54 +94,56 @@ _const_1280: .short (1280 * 4)
|
|||||||
_const_1281: .short (1281 * 4)
|
_const_1281: .short (1281 * 4)
|
||||||
_const_1282: .short (1282 * 4)
|
_const_1282: .short (1282 * 4)
|
||||||
|
|
||||||
/* use r10 as temporary to load the first 1280 pixels; 8 pixels per loop iteration */
|
/* use r10 as temporary to load the first 1280 pixels; 16 pixels per loop iteration */
|
||||||
|
.include "unpack_pixel.s"
|
||||||
.align 4
|
.align 4
|
||||||
_prime_pixels_loop_init:
|
_prime_pixels_loop_init:
|
||||||
mov #80,r10 /* 1280 / 8 */
|
|
||||||
shll r10
|
|
||||||
mov r0,r12
|
mov r0,r12
|
||||||
|
mov #80,r10 /* 1280 / 16 */
|
||||||
|
shll r10
|
||||||
|
|
||||||
_prime_pixels_loop:
|
_prime_pixels_loop:
|
||||||
.include "unpack_pixel.s"
|
unpack_pixel_16
|
||||||
dt r10
|
dt r10
|
||||||
bt _loop_init
|
bt _loop_init
|
||||||
bra _prime_pixels_loop
|
bra _prime_pixels_loop
|
||||||
nop
|
nop
|
||||||
|
|
||||||
.align 4
|
|
||||||
_loop_init:
|
_loop_init:
|
||||||
/* skip first row */
|
/* skip first output row */
|
||||||
add r3,r0 /* r3: const (640 * 4) */
|
mov r3,r1
|
||||||
add r3,r8
|
shlr r1
|
||||||
|
add r1,r8 /* r3: 640 * 4 */
|
||||||
|
|
||||||
mov.w _const_height,r11 /* 478 */
|
mov.w _const_height,r11 /* 478 */
|
||||||
bra _loop
|
bra _loop
|
||||||
mov #80,r10 /* 640 / 8 */
|
mov #40,r10 /* 640 / 8 */
|
||||||
|
|
||||||
_const_height: .short 478
|
_const_height: .short 476
|
||||||
|
/*_const_height: .short 238*/
|
||||||
|
|
||||||
|
.include "sobel_fipr_inner2.s"
|
||||||
_loop:
|
_loop:
|
||||||
_loop_width:
|
_loop_width:
|
||||||
/* prefetch at r8 + 1280 */
|
/* prefetch at r8 + 1280 */
|
||||||
|
unpack_pixel_16
|
||||||
|
|
||||||
/* process the next 8 pixels */
|
/* process the next 16 pixels */
|
||||||
.include "unpack_pixel.s"
|
sobel_fipr_inner_2px
|
||||||
|
|
||||||
.include "sobel_fipr_inner2.s"
|
|
||||||
mov.l r9,@r8 /* save result in the store queue */
|
mov.l r9,@r8 /* save result in the store queue */
|
||||||
.include "sobel_fipr_inner2.s"
|
sobel_fipr_inner_2px
|
||||||
mov.l r9,@(4,r8) /* save result in the store queue */
|
mov.l r9,@(4,r8) /* save result in the store queue */
|
||||||
.include "sobel_fipr_inner2.s"
|
sobel_fipr_inner_2px
|
||||||
mov.l r9,@(8,r8) /* save result in the store queue */
|
mov.l r9,@(8,r8) /* save result in the store queue */
|
||||||
.include "sobel_fipr_inner2.s"
|
sobel_fipr_inner_2px
|
||||||
mov.l r9,@(12,r8) /* save result in the store queue */
|
mov.l r9,@(12,r8) /* save result in the store queue */
|
||||||
.include "sobel_fipr_inner2.s"
|
sobel_fipr_inner_2px
|
||||||
mov.l r9,@(16,r8) /* save result in the store queue */
|
mov.l r9,@(16,r8) /* save result in the store queue */
|
||||||
.include "sobel_fipr_inner2.s"
|
sobel_fipr_inner_2px
|
||||||
mov.l r9,@(20,r8) /* save result in the store queue */
|
mov.l r9,@(20,r8) /* save result in the store queue */
|
||||||
.include "sobel_fipr_inner2.s"
|
sobel_fipr_inner_2px
|
||||||
mov.l r9,@(24,r8) /* save result in the store queue */
|
mov.l r9,@(24,r8) /* save result in the store queue */
|
||||||
.include "sobel_fipr_inner2.s"
|
sobel_fipr_inner_2px
|
||||||
mov.l r9,@(28,r8) /* save result in the store queue */
|
mov.l r9,@(28,r8) /* save result in the store queue */
|
||||||
|
|
||||||
/* send the store queue */
|
/* send the store queue */
|
||||||
@ -158,7 +161,7 @@ _row_decrement:
|
|||||||
dt r11
|
dt r11
|
||||||
bt _return
|
bt _return
|
||||||
bra _loop
|
bra _loop
|
||||||
mov #80,r10 /* 640 / 8 */
|
mov #40,r10 /* 640 / 8 */
|
||||||
|
|
||||||
/* restore registers */
|
/* restore registers */
|
||||||
_return:
|
_return:
|
||||||
@ -166,6 +169,7 @@ _return:
|
|||||||
fmov.s @r15+,fr14
|
fmov.s @r15+,fr14
|
||||||
fmov.s @r15+,fr13
|
fmov.s @r15+,fr13
|
||||||
fmov.s @r15+,fr12
|
fmov.s @r15+,fr12
|
||||||
|
mov.l @r15+,r14
|
||||||
mov.l @r15+,r13
|
mov.l @r15+,r13
|
||||||
mov.l @r15+,r12
|
mov.l @r15+,r12
|
||||||
mov.l @r15+,r11
|
mov.l @r15+,r11
|
||||||
|
@ -1,25 +1,63 @@
|
|||||||
ocbi @r13
|
|
||||||
pref @r13 /* 32 bytes, 8 pixels */
|
|
||||||
|
|
||||||
/* unpack the next 8 pixels */
|
/* unpack the next 8 pixels */
|
||||||
|
|
||||||
fschg
|
/*
|
||||||
|
mov.l @r13,r9
|
||||||
|
extu.b r9,r1
|
||||||
|
shlr8 r9
|
||||||
|
extu.b r9,r2
|
||||||
|
add r1,r2
|
||||||
|
shlr8 r9
|
||||||
|
extu.b r9,r1
|
||||||
|
add r1,r2
|
||||||
|
shlr8 r9
|
||||||
|
add r2,r9
|
||||||
|
lds r9,fpul
|
||||||
|
add #4,r13
|
||||||
|
*/
|
||||||
|
.macro unpack_pixel_inner_nibs
|
||||||
|
|
||||||
.include "unpack_pixel_inner.s"
|
mov.w @r13+,r9
|
||||||
|
|
||||||
|
mov r9,r1 /* nib0 */
|
||||||
|
shlr2 r9
|
||||||
|
shlr2 r9
|
||||||
|
and r14,r1
|
||||||
|
|
||||||
|
mov r9,r2 /* nib1 */
|
||||||
|
shlr2 r9
|
||||||
|
shlr2 r9
|
||||||
|
and r14,r2
|
||||||
|
add r2,r1
|
||||||
|
|
||||||
|
mov r9,r2 /* nib3 */
|
||||||
|
shlr2 r9
|
||||||
|
shlr2 r9
|
||||||
|
and r14,r2
|
||||||
|
add r2,r1
|
||||||
|
|
||||||
|
and r14,r9 /* nib4 */
|
||||||
|
add r9,r1
|
||||||
|
|
||||||
|
lds r1,fpul
|
||||||
|
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro unpack_pixel_8
|
||||||
|
unpack_pixel_inner_nibs
|
||||||
float fpul,fr0
|
float fpul,fr0
|
||||||
.include "unpack_pixel_inner.s"
|
unpack_pixel_inner_nibs
|
||||||
float fpul,fr1
|
float fpul,fr1
|
||||||
.include "unpack_pixel_inner.s"
|
unpack_pixel_inner_nibs
|
||||||
float fpul,fr2
|
float fpul,fr2
|
||||||
.include "unpack_pixel_inner.s"
|
unpack_pixel_inner_nibs
|
||||||
float fpul,fr3
|
float fpul,fr3
|
||||||
.include "unpack_pixel_inner.s"
|
unpack_pixel_inner_nibs
|
||||||
float fpul,fr4
|
float fpul,fr4
|
||||||
.include "unpack_pixel_inner.s"
|
unpack_pixel_inner_nibs
|
||||||
float fpul,fr5
|
float fpul,fr5
|
||||||
.include "unpack_pixel_inner.s"
|
unpack_pixel_inner_nibs
|
||||||
float fpul,fr6
|
float fpul,fr6
|
||||||
.include "unpack_pixel_inner.s"
|
unpack_pixel_inner_nibs
|
||||||
float fpul,fr7
|
float fpul,fr7
|
||||||
|
|
||||||
fmov dr0,@r12
|
fmov dr0,@r12
|
||||||
@ -30,5 +68,17 @@
|
|||||||
add #8,r12
|
add #8,r12
|
||||||
fmov dr6,@r12
|
fmov dr6,@r12
|
||||||
add #8,r12
|
add #8,r12
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro unpack_pixel_16
|
||||||
|
ocbi @r13
|
||||||
|
pref @r13 /* 32 bytes, 16 pixels */
|
||||||
|
mov #15,r14
|
||||||
|
|
||||||
fschg
|
fschg
|
||||||
|
|
||||||
|
unpack_pixel_8
|
||||||
|
unpack_pixel_8
|
||||||
|
|
||||||
|
fschg
|
||||||
|
.endm
|
||||||
|
@ -1,12 +1 @@
|
|||||||
mov.l @r13,r9
|
|
||||||
extu.b r9,r1
|
|
||||||
shlr8 r9
|
|
||||||
extu.b r9,r2
|
|
||||||
add r1,r2
|
|
||||||
shlr8 r9
|
|
||||||
extu.b r9,r1
|
|
||||||
add r1,r2
|
|
||||||
shlr8 r9
|
|
||||||
add r2,r9
|
|
||||||
lds r9,fpul
|
|
||||||
add #4,r13
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user