diff --git a/base.mk b/base.mk index 6c20686..08062f1 100644 --- a/base.mk +++ b/base.mk @@ -12,6 +12,7 @@ CFLAGS += -Wno-error=maybe-uninitialized CFLAGS += -Wno-error=unused-but-set-variable CFLAGS += -Wno-error=unused-variable CFLAGS += -Wno-error=unused-function +CFLAGS += -D__dreamcast__ CXXFLAGS += -fno-exceptions -fno-non-call-exceptions -fno-rtti -fno-threadsafe-statics diff --git a/example/modifier_volume_cube_hosted.cpp b/example/modifier_volume_cube_hosted.cpp new file mode 100644 index 0000000..386596c --- /dev/null +++ b/example/modifier_volume_cube_hosted.cpp @@ -0,0 +1,960 @@ +#include +#include +#include +#include + +#include "holly/region_array.hpp" +#include "holly/ta_bits.hpp" +#include "holly/isp_tsp.hpp" +#include "holly/ta_global_parameter.hpp" +#include "holly/ta_vertex_parameter.hpp" +#include "holly/ta_parameter.hpp" +#include "holly/software_ta.hpp" +#include "holly/texture_memory_alloc3.hpp" + +#include "model/model.h" +#include "model/cube/model.h" +#include "model/plane/model.h" + +#include "math/vec2.hpp" +#include "math/vec3.hpp" +#include "math/vec4.hpp" +#include "math/mat3x3.hpp" +#include "math/mat4x4.hpp" +#include "math/math.hpp" + + +using vec2 = vec<2, float>; +using vec3 = vec<3, float>; +using vec4 = vec<4, float>; +using mat3x3 = mat<3, 3, float>; +using mat4x4 = mat<4, 4, float>; + +const float deg = 0.017453292519943295; + +#define _fsrra(n) (1.0f / (sqrtf(n))) + +static inline float inverse_length(vec3 v) +{ + float f = dot(v, v); + return _fsrra(f); +} + +static inline int max(int a, int b) +{ + return (a > b) ? a : b; +} + +static inline int min(int a, int b) +{ + return (a > b) ? b : a; +} + +void global_polygon_type_0(ta_parameter_writer& writer, bool shadow) +{ + const uint32_t parameter_control_word = para_control::para_type::polygon_or_modifier_volume + | para_control::list_type::opaque + | obj_control::col_type::packed_color + | (shadow ? obj_control::shadow : 0) + ; + + const uint32_t isp_tsp_instruction_word = isp_tsp_instruction_word::depth_compare_mode::greater + | isp_tsp_instruction_word::culling_mode::no_culling; + + const uint32_t tsp_instruction_word = tsp_instruction_word::fog_control::no_fog + | tsp_instruction_word::src_alpha_instr::one + | tsp_instruction_word::dst_alpha_instr::zero + ; + + const uint32_t texture_control_word = 0; + + writer.append() = + ta_global_parameter::polygon_type_0(parameter_control_word, + isp_tsp_instruction_word, + tsp_instruction_word, + texture_control_word, + 0, // data_size_for_sort_dma + 0 // next_address_for_sort_dma + ); +} + +void global_polygon_type_1(ta_parameter_writer& writer, bool shadow, float r, float g, float b) +{ + const uint32_t parameter_control_word = para_control::para_type::polygon_or_modifier_volume + | para_control::list_type::opaque + | obj_control::col_type::intensity_mode_1 + | (shadow ? obj_control::shadow : 0) + ; + + const uint32_t isp_tsp_instruction_word = isp_tsp_instruction_word::depth_compare_mode::greater + | isp_tsp_instruction_word::culling_mode::no_culling; + + const uint32_t tsp_instruction_word = tsp_instruction_word::fog_control::no_fog + | tsp_instruction_word::src_alpha_instr::one + | tsp_instruction_word::dst_alpha_instr::zero + ; + + const uint32_t texture_control_word = 0; + + const float alpha = 1.0f; + + writer.append() = + ta_global_parameter::polygon_type_1(parameter_control_word, + isp_tsp_instruction_word, + tsp_instruction_word, + texture_control_word, + alpha, + r, + g, + b + ); +} + +void global_modifier_volume(ta_parameter_writer& writer) +{ + const uint32_t parameter_control_word = para_control::para_type::polygon_or_modifier_volume + | para_control::list_type::opaque_modifier_volume + ; + + const uint32_t isp_tsp_instruction_word = isp_tsp_instruction_word::volume_instruction::normal_polygon + | isp_tsp_instruction_word::culling_mode::no_culling; + + writer.append() = + ta_global_parameter::modifier_volume(parameter_control_word, + isp_tsp_instruction_word + ); +} + +void transfer_line(ta_parameter_writer& writer, vec3 p1, vec3 p2, uint32_t base_color) +{ + float dy = p2.y - p1.y; + float dx = p2.x - p1.x; + float d = _fsrra(dx * dx + dy * dy) * 0.7f; + float dy1 = dy * d; + float dx1 = dx * d; + + assert(p1.z < 1); + assert(p2.z < 1); + + const vec3 v[4] = { + { p1.x + dy1, p1.y + -dx1, p1.z }, + { p1.x + -dy1, p1.y + dx1, p1.z }, + { p2.x + -dy1, p2.y + dx1, p2.z }, + { p2.x + dy1, p2.y + -dx1, p2.z }, + }; + + writer.append() = + ta_vertex_parameter::polygon_type_0(polygon_vertex_parameter_control_word(false), + v[0].x, v[0].y, v[0].z, + base_color); + + writer.append() = + ta_vertex_parameter::polygon_type_0(polygon_vertex_parameter_control_word(false), + v[1].x, v[1].y, v[1].z, + base_color); + + writer.append() = + ta_vertex_parameter::polygon_type_0(polygon_vertex_parameter_control_word(false), + v[3].x, v[3].y, v[3].z, + base_color); + + writer.append() = + ta_vertex_parameter::polygon_type_0(polygon_vertex_parameter_control_word(true), + v[2].x, v[2].y, v[2].z, + base_color); +} + +vec3 screen_transform(const mat4x4& screen, vec3 v) +{ + v = screen * v; + + float dim = 480 / 2.0 * 1.5; + + return { + v.x / v.z * dim + 640 / 2.0f, + v.y / v.z * dim + 480 / 2.0f, + 1 / v.z, + }; +} + +void render_basis(ta_parameter_writer& writer, const mat4x4& screen) +{ + global_polygon_type_0(writer, false); + + vec3 origin = screen_transform(screen, {0, 0, 0}); + vec3 z = screen_transform(screen, {0, 0, 1}); + vec3 y = screen_transform(screen, {0, 1, 0}); + vec3 x = screen_transform(screen, {1, 0, 0}); + + uint32_t base_color = 0xffffff; + + // magenta: Z + transfer_line(writer, origin, z, base_color); + + // yellow: Y + transfer_line(writer, origin, y, base_color); + + // cyan: X + transfer_line(writer, origin, x, base_color); +} + +//#define LINE_DRAWING 1 + +static inline void render_quad(ta_parameter_writer& writer, + uint32_t base_color, + vec3 ap, + vec3 bp, + vec3 cp, + vec3 dp) +{ +#ifdef LINE_DRAWING + transfer_line(writer, ap, bp, base_color); + transfer_line(writer, bp, cp, base_color); + transfer_line(writer, cp, dp, base_color); + transfer_line(writer, dp, ap, base_color); +#else + writer.append() = + ta_vertex_parameter::polygon_type_0(polygon_vertex_parameter_control_word(false), + ap.x, ap.y, ap.z, + base_color); + + writer.append() = + ta_vertex_parameter::polygon_type_0(polygon_vertex_parameter_control_word(false), + bp.x, bp.y, bp.z, + base_color); + + writer.append() = + ta_vertex_parameter::polygon_type_0(polygon_vertex_parameter_control_word(false), + dp.x, dp.y, dp.z, + base_color); + + writer.append() = + ta_vertex_parameter::polygon_type_0(polygon_vertex_parameter_control_word(true), + cp.x, cp.y, cp.z, + base_color); +#endif +} + +static inline void render_quad_type2(ta_parameter_writer& writer, + float intensity, + vec3 ap, + vec3 bp, + vec3 cp, + vec3 dp) +{ +#ifdef LINE_DRAWING +#else + writer.append() = + ta_vertex_parameter::polygon_type_2(polygon_vertex_parameter_control_word(false), + ap.x, ap.y, ap.z, + intensity); + + writer.append() = + ta_vertex_parameter::polygon_type_2(polygon_vertex_parameter_control_word(false), + bp.x, bp.y, bp.z, + intensity); + + writer.append() = + ta_vertex_parameter::polygon_type_2(polygon_vertex_parameter_control_word(false), + dp.x, dp.y, dp.z, + intensity); + + writer.append() = + ta_vertex_parameter::polygon_type_2(polygon_vertex_parameter_control_word(true), + cp.x, cp.y, cp.z, + intensity); +#endif +} + +static inline void render_tri(ta_parameter_writer& writer, + uint32_t base_color, + vec3 ap, + vec3 bp, + vec3 cp) +{ +#ifdef LINE_DRAWING + transfer_line(writer, ap, bp, base_color); + transfer_line(writer, bp, cp, base_color); + transfer_line(writer, cp, ap, base_color); +#else +#endif +} + +static inline void render_last_tri_mod(ta_parameter_writer& writer) +{ +#ifdef LINE_DRAWING +#else + const uint32_t last_parameter_control_word = para_control::para_type::polygon_or_modifier_volume + | para_control::list_type::opaque_modifier_volume + | obj_control::volume::modifier_volume::last_in_volume; + + const uint32_t last_isp_tsp_instruction_word = isp_tsp_instruction_word::volume_instruction::inside_last_polygon + | isp_tsp_instruction_word::culling_mode::no_culling; + + writer.append() = + ta_global_parameter::modifier_volume(last_parameter_control_word, + last_isp_tsp_instruction_word); +#endif +} + +static inline void render_tri_mod(ta_parameter_writer& writer, + vec3 ap, + vec3 bp, + vec3 cp) +{ +#ifdef LINE_DRAWING + transfer_line(writer, ap, bp, base_color); + transfer_line(writer, bp, cp, base_color); + transfer_line(writer, cp, ap, base_color); +#else + writer.append() = + ta_vertex_parameter::modifier_volume(modifier_volume_vertex_parameter_control_word(), + ap.x, ap.y, ap.z, + bp.x, bp.y, bp.z, + cp.x, cp.y, cp.z); +#endif +} + +void set_edge_coloring(uint8_t * edge_coloring, + const int edge_stride, + bool l_dot_n_b, int a, int b) +{ + int ma = min(a, b); + int mb = max(a, b); + + int bit = 1 << ((int)l_dot_n_b); + + edge_coloring[ma * edge_stride + mb] |= bit; +} + +struct edge { + int a; + int b; +}; + +static uint32_t _random; + +uint32_t xorshift() +{ + uint32_t x = _random; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return _random = x; +} + +static inline void render_extension_mesh(ta_parameter_writer& writer, + vec3 ap, + vec3 bp, + vec3 cp, + vec3 dp, + vec3 ep, + vec3 fp, + vec3 apo, + vec3 bpo, + vec3 cpo, + vec3 dpo, + vec3 epo, + vec3 fpo) +{ + render_tri_mod(writer, ap, bp, apo); + render_tri_mod(writer, bpo, apo, bp); + + render_tri_mod(writer, bp, cp, bpo); + render_tri_mod(writer, cpo, bpo, cp); + + render_tri_mod(writer, cp, dp, cpo); + render_tri_mod(writer, dpo, cpo, dp); + + render_tri_mod(writer, dp, ep, dpo); + render_tri_mod(writer, epo, dpo, ep); + + render_tri_mod(writer, ep, fp, epo); + render_tri_mod(writer, fpo, epo, fp); + + render_tri_mod(writer, fp, ap, fpo); + render_last_tri_mod(writer); + render_tri_mod(writer, apo, fpo, ap); + + /* + random = 0x12345789; + + render_quad(writer, xorshift(), ap, bp, bpo, apo); + + render_quad(writer, xorshift(), bp, cp, cpo, bpo); + + render_quad(writer, xorshift(), cp, dp, dpo, cpo); + + render_quad(writer, xorshift(), dp, ep, epo, dpo); + + render_quad(writer, xorshift(), ep, fp, fpo, epo); + + render_quad(writer, xorshift(), fp, ap, apo, fpo); + */ +} + +void render_silhouette(ta_parameter_writer& writer, + const mat4x4& screen, + const mat4x4& model, + const vec3 light_vec, + const uint8_t * edge_coloring, + const int edge_stride) +{ + struct edge silhouette[6]; + int ix = 0; + + for (int a = 0; a < edge_stride; a++) { + for (int b = 0; b < edge_stride; b++) { + uint8_t coloring = edge_coloring[a * edge_stride + b]; + if (coloring == 0b11) { + silhouette[ix++] = {a, b}; + } + } + } + assert(ix == 6); + + int last_ix = 0; + int order_ix = 0; + int order_vtx[6]; + order_vtx[order_ix++] = silhouette[0].a; + + // calculate vertex ordering + while (order_ix < 6) { + for (int i = 1; i < 6; i++) { + if (i == last_ix) + continue; + + int last_vtx = order_vtx[order_ix - 1]; + if (last_vtx == silhouette[i].a) { + last_ix = i; + order_vtx[order_ix++] = silhouette[i].b; + break; + } + if (last_vtx == silhouette[i].b) { + last_ix = i; + order_vtx[order_ix++] = silhouette[i].a; + break; + } + } + } + + const vec3 * position = cube_position; + + vec3 ap = screen_transform(screen, model * position[order_vtx[0]]); + vec3 bp = screen_transform(screen, model * position[order_vtx[1]]); + vec3 cp = screen_transform(screen, model * position[order_vtx[2]]); + vec3 dp = screen_transform(screen, model * position[order_vtx[3]]); + vec3 ep = screen_transform(screen, model * position[order_vtx[4]]); + vec3 fp = screen_transform(screen, model * position[order_vtx[5]]); + + float scale = 5; + mat4x4 translate = { + 1, 0, 0, -light_vec.x * scale, + 0, 1, 0, -light_vec.y * scale, + 0, 0, 1, -light_vec.z * scale, + 0, 0, 0, 1, + }; + + mat4x4 model2 = model * translate; + + vec3 apo = screen_transform(screen, model2 * position[order_vtx[0]]); + vec3 bpo = screen_transform(screen, model2 * position[order_vtx[1]]); + vec3 cpo = screen_transform(screen, model2 * position[order_vtx[2]]); + vec3 dpo = screen_transform(screen, model2 * position[order_vtx[3]]); + vec3 epo = screen_transform(screen, model2 * position[order_vtx[4]]); + vec3 fpo = screen_transform(screen, model2 * position[order_vtx[5]]); + + if (0) { // perimeter + uint32_t base_color = 0xff0080; + + transfer_line(writer, ap, bp, base_color); + transfer_line(writer, bp, cp, base_color); + transfer_line(writer, cp, dp, base_color); + transfer_line(writer, dp, ep, base_color); + transfer_line(writer, ep, fp, base_color); + transfer_line(writer, fp, ap, base_color); + } + + if (1) { // near end cap + render_tri_mod(writer, ap, bp, cp); + render_tri_mod(writer, cp, dp, ep); + render_tri_mod(writer, ep, fp, ap); + render_tri_mod(writer, ap, cp, ep); + } + + if (1) { // far end cap + render_tri_mod(writer, apo, bpo, cpo); + render_tri_mod(writer, cpo, dpo, epo); + render_tri_mod(writer, epo, fpo, apo); + render_tri_mod(writer, apo, cpo, epo); + } + + if (1) { + render_extension_mesh(writer, + ap, + bp, + cp, + dp, + ep, + fp, + apo, + bpo, + cpo, + dpo, + epo, + fpo); + } +} + +void render_cube(ta_parameter_writer& writer, + const mat4x4& screen, + const vec3 light_vec, + float theta) +{ + //float ct = cos(theta); + //float st = sin(theta); + float scale = 0.3f; + const mat4x4 s = { + scale, 0, 0, 0, + 0, scale, 0, 0, + 0, 0, scale, 0, + 0, 0, 0, 1, + }; + /* + const mat4x4 rz = { + ct, -st, 0, 0, + st, ct, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1, + }; + */ + mat4x4 model = s; + + const vec3 * normal = cube_normal; + const vec3 * position = cube_position; + const union quadrilateral * quadrilateral = cube_Cube_quadrilateral; + + const int edge_stride = 8; + const int edge_coloring_length = edge_stride * edge_stride; + uint8_t __attribute__((aligned(4))) edge_coloring[edge_coloring_length]; + for (int i = 0; i < edge_coloring_length / 4; i++) + reinterpret_cast(edge_coloring)[i] = 0; + + //uint32_t base_color = l_dot_n_b ? 0xff8000 : 0x0080ff; + const float red = 0.0f; + const float green = 0.5f; + const float blue = 1.0f; + + global_polygon_type_1(writer, false, red, green, blue); // no self-shadow + + for (int i = 0; i < 6; i++) { + const union quadrilateral& q = quadrilateral[i]; + vec3 n3 = normal[q.a.normal]; + vec4 n4 = model * (vec4){n3.x, n3.y, n3.z, 0.f}; // no translation component + vec3 n = {n4.x, n4.y, n4.z}; + float n_dot_l = dot(n, light_vec); + bool n_dot_l_b = n_dot_l > 0; + + set_edge_coloring(edge_coloring, edge_stride, n_dot_l_b, q.a.position, q.b.position); + set_edge_coloring(edge_coloring, edge_stride, n_dot_l_b, q.b.position, q.c.position); + set_edge_coloring(edge_coloring, edge_stride, n_dot_l_b, q.c.position, q.d.position); + set_edge_coloring(edge_coloring, edge_stride, n_dot_l_b, q.d.position, q.a.position); + + vec3 ap = model * position[q.a.position]; + vec3 bp = model * position[q.b.position]; + vec3 cp = model * position[q.c.position]; + vec3 dp = model * position[q.d.position]; + + vec3 sap = screen_transform(screen, ap); + vec3 sbp = screen_transform(screen, bp); + vec3 scp = screen_transform(screen, cp); + vec3 sdp = screen_transform(screen, dp); + + float intensity = 0.2f; + + if (n_dot_l > 0) { + intensity += 0.5f * n_dot_l * (inverse_length(n) * inverse_length(light_vec)); + if (intensity > 1.0f) + intensity = 1.0f; + } + + render_quad_type2(writer, + intensity, + sap, + sbp, + scp, + sdp); + } + + if (0) { + // end of opaque list + writer.append() = + ta_global_parameter::end_of_list(para_control::para_type::end_of_list); + + global_modifier_volume(writer); + + render_silhouette(writer, + screen, + model, + light_vec, + edge_coloring, + edge_stride); + } +} + +void render_plane(ta_parameter_writer& writer, + const mat4x4& screen, + const vec3 light_vec) +{ + const vec3 * normal = plane_normal; + const vec3 * position = plane_position; + const union quadrilateral * quadrilateral = plane_Plane.quadrilateral; + int count = plane_Plane.quadrilateral_count; + + float scale = 3; + float translate = 1; + const mat4x4 model = { + scale, 0, 0, 0, + 0, scale, 0, 0, + 0, 0, scale, translate, + 0, 0, 0, 1, + }; + + //uint32_t base_color = 0xffff80; + const float red = 1.0f; + const float green = 1.0f; + const float blue = 0.5f; + + global_polygon_type_1(writer, true, red, green, blue); // with shadow + + for (int i = 0; i < count; i++) { + const union quadrilateral& q = quadrilateral[i]; + + vec3 ap = model * position[q.a.position]; + vec3 bp = model * position[q.b.position]; + vec3 cp = model * position[q.c.position]; + vec3 dp = model * position[q.d.position]; + + float intensity = 0.2f; + + vec4 _n = normal[q.a.normal]; + vec4 n4 = model * (vec4){_n.x, _n.y, _n.z, 0}; // no translation component + vec3 n = {n4.x, n4.y, n4.z}; + float n_dot_l = -dot(n, light_vec); + + if (n_dot_l > 0) { + intensity += 0.5f * n_dot_l * (inverse_length(n) * inverse_length(light_vec)); + if (intensity > 1.0f) + intensity = 1.0f; + } + + vec3 sap = screen_transform(screen, ap); + vec3 sbp = screen_transform(screen, bp); + vec3 scp = screen_transform(screen, cp); + vec3 sdp = screen_transform(screen, dp); + + render_quad_type2(writer, + intensity, + sap, + sbp, + scp, + sdp); + } +} + +constexpr inline mat4x4 screen_rotation(float theta) +{ + float zt = -0.7853981633974483 / 1 + theta / 5; + float xt = -0.7853981633974483 * 0.7 + 0.3 * sin(theta / 3); + + mat4x4 rx = { + 1, 0, 0, 0, + 0, cos(xt), -sin(xt), 0, + 0, sin(xt), cos(xt), 0, + 0, 0, 0, 1, + }; + + mat4x4 rz = { + cos(zt), -sin(zt), 0, 0, + sin(zt), cos(zt), 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1, + }; + + mat4x4 t = { + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 2.5, + 0, 0, 0, 1, + }; + + return t * rx * rz; +} + +void render_light_vec(ta_parameter_writer& writer, const mat4x4& screen, vec3 l) +{ + vec3 a = screen_transform(screen, {0, 0, 0}); + vec3 b = screen_transform(screen, l * 0.5f); + + transfer_line(writer, a, b, 0x00ff00); +} + +void transfer_scene(ta_parameter_writer& writer, const mat4x4& screen, vec3 light_vec) +{ + // opaque + render_basis(writer, screen); + + render_light_vec(writer, screen, light_vec); + + if (1) { + render_plane(writer, + screen, + light_vec); + } + + static float cube_theta = 0; + render_cube(writer, + screen, + light_vec, + cube_theta); + cube_theta += deg; + + writer.append() = + ta_global_parameter::end_of_list(para_control::para_type::end_of_list); + + // punch_through + /* + render_basis_text(writer, screen); + + writer.append() = + ta_global_parameter::end_of_list(para_control::para_type::end_of_list); + */ +} + +vec3 update_light() +{ + static float ltheta = 2; + + vec3 light_origin = {0, 0, 0}; + vec3 light_pos = {1, 1, 2}; + + mat3x3 rot = { + cos(ltheta), -sin(ltheta), 0, + sin(ltheta), cos(ltheta), 0, + 0, 0, 1, + }; + + light_pos = rot * light_pos; + ltheta += deg / 8; + + vec3 light_vec = light_origin - light_pos; + + return light_vec; +} + +static inline uint16_t argb1555(int a, int r, int g, int b) +{ + return ((a & 1) << 15) | ((r & 31) << 10) | ((g & 31) << 5) | ((b & 31) << 0); +} + +uint8_t __attribute__((aligned(32))) ta_parameter_buf[1024 * 1024]; + +void region_array_multipass2(const uint32_t width, // in tile units (1 tile unit = 32 pixels) + const uint32_t height, // in tile units (1 tile unit = 32 pixels) + const struct opb_size * opb_size, + const uint32_t num_render_passes, + const uint32_t region_array_start, + const uint32_t object_list_start, + uint32_t * dest) +{ + auto region_array = reinterpret_cast + (&dest[region_array_start / 4]); + + const uint32_t num_tiles = width * height; + uint32_t ol_base[num_render_passes]; + + ol_base[0] = object_list_start; + for (uint32_t pass = 1; pass < num_render_passes; pass++) { + ol_base[pass] = ol_base[pass - 1] + num_tiles * opb_size[pass - 1].total(); + } + + uint32_t ix = 0; + + for (uint32_t y = 0; y < height; y++) { + for (uint32_t x = 0; x < width; x++) { + for (uint32_t pass = 0; pass < num_render_passes; pass++) { + region_array[ix].tile = REGION_ARRAY__TILE_Y_POSITION(y) + | REGION_ARRAY__TILE_X_POSITION(x); + + if (pass == (num_render_passes - 1) && y == (height - 1) && x == (width - 1)) + region_array[ix].tile |= REGION_ARRAY__LAST_REGION; + + if (pass != (num_render_passes - 1)) + region_array[ix].tile |= REGION_ARRAY__FLUSH_ACCUMULATE; + + if (pass > 0) + region_array[ix].tile |= REGION_ARRAY__Z_CLEAR; + + uint32_t tile_index = y * width + x; + region_array[ix].opaque_list_pointer = (opb_size[pass].opaque == 0) ? REGION_ARRAY__LIST_POINTER__EMPTY : + (ol_base[pass] + (opb_size[pass].opaque * tile_index) + ); + + region_array[ix].opaque_modifier_volume_list_pointer = (opb_size[pass].opaque_modifier == 0) ? REGION_ARRAY__LIST_POINTER__EMPTY : + (ol_base[pass] + num_tiles * ( opb_size[pass].opaque + ) + + (opb_size[pass].opaque_modifier * tile_index) + ); + + region_array[ix].translucent_list_pointer = (opb_size[pass].translucent == 0) ? REGION_ARRAY__LIST_POINTER__EMPTY : + (ol_base[pass] + num_tiles * ( opb_size[pass].opaque + + opb_size[pass].opaque_modifier + ) + + (opb_size[pass].translucent * tile_index) + ); + region_array[ix].translucent_modifier_volume_list_pointer = (opb_size[pass].translucent_modifier == 0) ? REGION_ARRAY__LIST_POINTER__EMPTY : + (ol_base[pass] + num_tiles * ( opb_size[pass].opaque + + opb_size[pass].opaque_modifier + + opb_size[pass].translucent + ) + + (opb_size[pass].translucent_modifier * tile_index) + ); + region_array[ix].punch_through_list_pointer = (opb_size[pass].punch_through == 0) ? REGION_ARRAY__LIST_POINTER__EMPTY : + (ol_base[pass] + num_tiles * ( opb_size[pass].opaque + + opb_size[pass].opaque_modifier + + opb_size[pass].translucent + + opb_size[pass].translucent_modifier + ) + + (opb_size[pass].punch_through * tile_index) + ); + fprintf(stderr, "ra_ol %d %d %08x\n", x, y, region_array[ix].opaque_list_pointer); + ix += 1; + } + } + } +} + +struct vertex_parameter { + float x; + float y; + float z; + uint32_t base_color; +}; // ISP_BACKGND_T skip(1) + +struct isp_tsp_parameter { + uint32_t isp_tsp_instruction_word; + uint32_t tsp_instruction_word; + uint32_t texture_control_word; + vertex_parameter vertex[3]; +}; + +void background_parameter3(const uint32_t background_start, + const uint32_t color, + uint32_t * dst) +{ + auto parameter = reinterpret_cast + (&dst[background_start / 4]); + + parameter->isp_tsp_instruction_word + = isp_tsp_instruction_word::depth_compare_mode::always + | isp_tsp_instruction_word::culling_mode::no_culling; + + parameter->tsp_instruction_word + = tsp_instruction_word::src_alpha_instr::one + | tsp_instruction_word::dst_alpha_instr::zero + | tsp_instruction_word::fog_control::no_fog; + + parameter->texture_control_word + = 0; + + parameter->vertex[0].x = 0.f; + parameter->vertex[0].y = 0.f; + parameter->vertex[0].z = 1.f/100000; + parameter->vertex[0].base_color = color; + + parameter->vertex[1].x = 639.f; + parameter->vertex[1].y = 0.f; + parameter->vertex[1].z = 1.f/100000; + parameter->vertex[1].base_color = color; + + parameter->vertex[2].x = 639.f; + parameter->vertex[2].y = 479.f; + parameter->vertex[2].z = 1.f/100000; + parameter->vertex[2].base_color = color; +} + +static uint8_t __attribute__((aligned(32))) texture_memory[8 * 1024 * 1024]; + +int main() +{ + constexpr uint32_t ta_alloc = 0 + //ta_alloc_ctrl::pt_opb::_16x4byte + | ta_alloc_ctrl::tm_opb::no_list + | ta_alloc_ctrl::t_opb::no_list + //| ta_alloc_ctrl::om_opb::_16x4byte + | ta_alloc_ctrl::o_opb::_16x4byte; + + constexpr int render_passes = 1; + constexpr struct opb_size opb_size[render_passes] = { + { + .opaque = 16 * 4, + //.opaque_modifier = 16 * 4, + .translucent = 0, + .translucent_modifier = 0, + .punch_through = 0 + } + }; + + const int framebuffer_width = 640; + const int framebuffer_height = 480; + const int tile_width = framebuffer_width / 32; + const int tile_height = framebuffer_height / 32; + + for (int i = 0; i < 2; i++) { + region_array_multipass2(tile_width, + tile_height, + opb_size, + render_passes, + texture_memory_alloc.region_array[i].start, + texture_memory_alloc.object_list[i].start, + (uint32_t*)texture_memory); + + background_parameter3(texture_memory_alloc.background[i].start, + 0xff202040, + (uint32_t*)texture_memory); + } + + ta_parameter_writer writer = ta_parameter_writer(ta_parameter_buf); + + int ta = 0; + int core = 0; + + const float degree = 0.017453292519943295 / 5; + float theta = 0; + + const mat4x4 screen = screen_rotation(theta); + + vec3 light_vec = update_light(); + + writer.offset = 0; + transfer_scene(writer, screen, light_vec); + + //size_t len = fwrite(writer.buf, 1, writer.offset, stdout); + + struct ta_configuration config; + + config.isp_base = texture_memory_alloc.isp_tsp_parameters[ta].start; + config.isp_limit = texture_memory_alloc.isp_tsp_parameters[ta].end; + config.ol_base = texture_memory_alloc.object_list[ta].start; + config.ol_limit = texture_memory_alloc.object_list[ta].end; + config.alloc_ctrl = ta_alloc; + config.next_opb_init = 0; + config.tile_x_num = tile_width; + config.tile_y_num = tile_height; + + software_ta_init(&config); + software_ta_transfer(writer.buf, writer.offset, texture_memory); + + size_t len = fwrite(texture_memory, 1, (sizeof (texture_memory)), stdout); + assert(len == (sizeof (texture_memory))); +} diff --git a/holly/software_ta.cpp b/holly/software_ta.cpp new file mode 100644 index 0000000..d19df1b --- /dev/null +++ b/holly/software_ta.cpp @@ -0,0 +1,739 @@ +#include "software_ta.hpp" + +#if defined(__dreamcast__) +#include "sh7091/serial.hpp" +#define print__character serial::character +#define print__string serial::string +#define print__integer serial::integer +#define assert(b) \ + do { \ + if (!(b)) { \ + print__string(__FILE__); \ + print__character(':'); \ + print__integer(__LINE__, ' '); \ + print__string(__func__); \ + print__string(": assertion failed: "); \ + print__string(#b); \ + print__character('\n'); \ + while (1); \ + } \ + } while (0); +#else +#include +#include +#endif + +union i32_f { + int32_t i; + float f; +}; + +namespace para_type { + constexpr int end_of_list = 0; + constexpr int user_tile_clip = 1; + constexpr int object_list_set = 2; + constexpr int polygon_or_modifier_volume = 4; + constexpr int sprite = 5; + constexpr int vertex = 7; +}; + +namespace list_type { + constexpr int opaque = 0; + constexpr int opaque_modifier_volume = 1; + constexpr int translucent = 2; + constexpr int translucent_modifier_volume = 3; + constexpr int punch_through = 4; +}; + +namespace col_type { + constexpr int packed_color = 0; + constexpr int floating_color = 1; + constexpr int intensity_mode_1 = 2; + constexpr int intensity_mode_2 = 3; +}; + +namespace object_list_data { + constexpr int triangle_strip = (0b0 << 31); + constexpr int triangle_array = (0b100 << 29); + constexpr int quad_array = (0b101 << 29); + constexpr int object_pointer_block_link = (0b111 << 29); +}; + +// if entry == 0; calculate ol_current from ol_base +struct tile_state { + int8_t entry; + int8_t current_list_type; + int32_t ol_base; // words + int32_t ol_current; // words +}; + +struct alloc { + union { + int opaque; // words + int opaque_modifier_volume; // words + int translucent; // words + int translucent_modifier_volume; // words + int punch_through; // words + }; + int list_type[5]; +}; + +struct ta_state { + int8_t tile_x_num; + int8_t tile_y_num; + + int8_t current_list_type; + int8_t list_end; + + int32_t ol_base; + int32_t ol_limit; + int32_t param_base; + int32_t param_limit; // words + int32_t param_current; // words + + int32_t next_opb; // words + int32_t entry; + struct alloc alloc; + struct tile_state tile[64 * 64]; +}; + +static struct ta_state state; + +static inline int alloc_ctrl_units(int n) +{ + return (n == 0) ? 0 : (1 << (n + 2)); // words +} + +static inline void terminate_ta_tiles(int list_type, void * _dst) +{ + union i32_f * dst = (union i32_f *)_dst; + for (int y = 0; y < state.tile_y_num; y++) { + for (int x = 0; x < state.tile_x_num; x++) { + struct tile_state * tile = &state.tile[y * 64 + x]; + assert(tile->current_list_type == list_type || tile->current_list_type == -1); + + // end of list + assert(tile->ol_current >= state.ol_base && tile->ol_current < state.ol_limit); + fprintf(stderr, "eol %d %d %08x\n", x, y, tile->ol_current * 4); + dst[tile->ol_current].i = object_list_data::object_pointer_block_link | (1 << 28); + + tile->current_list_type = -1; + } + } +} + +static inline void tile_ol_current_for_list(struct tile_state * tile, int list_type_ix) +{ + assert(list_type_ix >= 0 && list_type_ix <= 5); + tile->ol_current = tile->ol_base + state.alloc.list_type[list_type_ix]; + { // removeme + int32_t ix = tile - state.tile; + int x = ix & 63; + int y = ix >> 6; + fprintf(stderr, "ol_current_for_list %d %d %08x\n", x, y, tile->ol_current * 4); + } +} + +static inline void flush_ta_tile(int list_type, int length, int shadow, int skip, struct tile_state * tile, void * _dst) +{ + union i32_f * dst = (union i32_f *)_dst; + + if (tile->current_list_type != list_type) { + tile_ol_current_for_list(tile, list_type); + tile->current_list_type = list_type; + } + + if (!tile->entry) + return; + + assert(length > 0); + assert(length <= 16); + + int32_t ix = tile - state.tile; + int x = ix & 63; + int y = ix >> 6; + + fprintf(stderr, "flush_ta_tile %d %d %08x\n", x, y, tile->ol_current * 4); + if ((tile->ol_current & 15) == 15) { + fprintf(stderr, "overflow %d\n", (int32_t)(tile - state.tile)); + tile->entry = 0; + } + //assert((tile->ol_current & 15) != 15); + + dst[tile->ol_current].i = (object_list_data::triangle_array | + (length - 1) << 25 | + (shadow << 24) | + (skip << 21) | + state.param_current); + fprintf(stderr, "l1 %d\n", (length - 1)); + fprintf(stderr, "oldta %d\n", object_list_data::triangle_array); + fprintf(stderr, "param_current %d\n", state.param_current); + fprintf(stderr, "dst tile ol_current %d %08x\n", tile->ol_current * 4, dst[tile->ol_current].i); + + tile->ol_current += 1; + tile->entry = 0; +} + +void flush_ta_tiles(int list_type, int length, int shadow, int skip, void * dst) +{ + for (int y = 0; y < state.tile_y_num; y++) { + for (int x = 0; x < state.tile_x_num; x++) { + struct tile_state * tile = &state.tile[y * 64 + x]; + flush_ta_tile(list_type, length, shadow, skip, tile, dst); + } + } +} + +void software_ta_init(const struct ta_configuration * config) +{ + state.tile_x_num = config->tile_x_num; + state.tile_y_num = config->tile_y_num; + state.current_list_type = -1; + state.list_end = 0; + + state.ol_base = config->ol_base >> 2; // words + state.ol_limit = config->ol_limit >> 2; // words + state.param_base = config->isp_base >> 2; // words + state.param_limit = config->isp_limit >> 2; // words + state.param_current = state.param_base; // words + assert(state.param_limit <= 0x1fffff); + + state.next_opb = config->next_opb_init; + + int alloc_opaque = alloc_ctrl_units((config->alloc_ctrl >> 0 ) & 0b11); + int alloc_opaque_modifier_volume = alloc_ctrl_units((config->alloc_ctrl >> 4 ) & 0b11); + int alloc_translucent = alloc_ctrl_units((config->alloc_ctrl >> 8 ) & 0b11); + int alloc_translucent_modifier_volume = alloc_ctrl_units((config->alloc_ctrl >> 12) & 0b11); + int alloc_punch_through = alloc_ctrl_units((config->alloc_ctrl >> 16) & 0b11); + + // bytes + int alloc_total_size = (alloc_opaque + + alloc_opaque_modifier_volume + + alloc_translucent + + alloc_translucent_modifier_volume + + alloc_punch_through); + + state.alloc.opaque = 0; + state.alloc.opaque_modifier_volume = state.alloc.opaque + alloc_opaque; + state.alloc.translucent = state.alloc.opaque_modifier_volume + alloc_opaque_modifier_volume; + state.alloc.translucent_modifier_volume = state.alloc.translucent + alloc_translucent; + state.alloc.punch_through = state.alloc.translucent_modifier_volume + alloc_translucent_modifier_volume; + + int ol_offset = config->ol_base >> 2; // words + for (int y = 0; y < config->tile_y_num; y++) { + for (int x = 0; x < config->tile_x_num; x++) { + struct tile_state * tile = &state.tile[y * 64 + x]; + fprintf(stderr, "ol_offset %d %d %08x\n", x, y, ol_offset); + tile->entry = 0; + tile->ol_base = ol_offset; + tile->ol_current = -1; + tile->current_list_type = -1; + ol_offset += alloc_total_size; + } + } +} + +static inline int decode_skip(int texture, int offset, int _16bit_uv) +{ + if (texture == 0) + return 0b001; + + int skip; + int control_word = (offset << 1) | (_16bit_uv << 0); + switch (control_word) { + default: [[fallthrough]]; + case 0b00: skip = 0b011; break; + case 0b01: skip = 0b010; break; + case 0b10: skip = 0b100; break; + case 0b11: skip = 0b011; break; + } + + return skip; +} + +/* +int modifier_volume(union i32_f * parameter, + int list_type, + int parameter_control_word, + void * dst) +{ + int skip = 0; +} +*/ + +static inline int32_t pack_floating_color(float a, float r, float g, float b) +{ + /* The TA converts each element of ARGB data into a fixed decimal value + between 0.0 and 1.0, multiples the value by 255, and packs the result in a + 32-bit value. + */ + + if (a > 1.f) a = 1.f; + if (a < 0.f) a = 0.f; + if (r > 1.f) r = 1.f; + if (r < 0.f) r = 0.f; + if (g > 1.f) g = 1.f; + if (g < 0.f) g = 0.f; + if (b > 1.f) b = 1.f; + if (b < 0.f) b = 0.f; + + int ai = (int)(a * 255.f); + int ri = (int)(r * 255.f); + int gi = (int)(g * 255.f); + int bi = (int)(b * 255.f); + + return ((ai & 0xff) << 24) | ((ri & 0xff) << 16) | ((gi & 0xff) << 8) | ((bi & 0xff) << 0); +} + +static inline int32_t pack_intensity_color(float a, float r, float g, float b, float intensity) +{ + /* Regarding alpha values, the TA converts the specified Face Color Alpha + value into a fixed decimal value between 0.0 and 1.0, multiples the value + by 255, and derives an 8-bit value. Regarding RGB values, the TA converts + the specified Face Color R/G/B value into a fixed decimal value between 0.0 + and 1.0, multiples the value by 255, converts the intensity value into a + fixed decimal value between 0.0 and 1.0, multiplies the converted R/G/B + value and the converted intensity value together, multiplies that result by + 255, and derives an 8-bit value for each of R, G, and B. Finally, the TA + packs each 8-bit value into a 32-bit value. + */ + + // this description is probably incorrect. multiplying by 255 twice does not + // make sense. + + if (a > 1.f) a = 1.f; + if (a < 0.f) a = 0.f; + if (r > 1.f) r = 1.f; + if (r < 0.f) r = 0.f; + if (g > 1.f) g = 1.f; + if (g < 0.f) g = 0.f; + if (b > 1.f) b = 1.f; + if (b < 0.f) b = 0.f; + if (intensity > 1.f) intensity = 1.f; + if (intensity < 0.f) intensity = 0.f; + + int ai = (int)(a * 255.f); + int ri = (int)(r * intensity * 255.f); + int gi = (int)(g * intensity * 255.f); + int bi = (int)(b * intensity * 255.f); + + if (ri > 255) ri = 255; + if (gi > 255) gi = 255; + if (bi > 255) bi = 255; + + return ((ai & 0xff) << 24) | (ri << 16) | (gi << 8) | (bi << 0); +} + +struct bounding_box { + int min_x; // in tile units + int min_y; // in tile units + int max_x; // in tile units + int max_y; // in tile units +}; + +static inline float min(float a, float b, float c) +{ + if (a < b) + return (a < c) ? a : c; + else + return (b < c) ? b : c; +} + +static inline float max(float a, float b, float c) +{ + if (a > b) + return (a > c) ? a : c; + else + return (b > c) ? b : c; +} + +static inline int floor(float f) +{ + return (int)f; +} + +static inline int ceil(float f) +{ + int fi = (int)f; + float fn = (float)fi; + return fi + (fn < f); +} + +static inline struct bounding_box calculate_bounding_box(float ax, float ay, + float bx, float by, + float cx, float cy) +{ + int min_x = floor(min(ax, bx, cx)); + int min_y = floor(min(ay, by, cy)); + int max_x = ceil(max(ax, bx, cx)); + int max_y = ceil(max(ay, by, cy)); + + return (bounding_box){ + min_x >> 5, // round down + min_y >> 5, // round down + max_x >> 5, // round down + max_y >> 5, // round down + }; +} + +struct previous_vertex { + float x; + float y; + float z; + int32_t base_color_0; + int32_t offset_color_0; + int32_t base_color_1; + int32_t offset_color_1; +}; + +int polygon(union i32_f * parameter, + int list_type, + int parameter_control_word, + void * _dst) +{ + union i32_f * dst = (union i32_f *)_dst; + + assert(((parameter_control_word >> 27) & 1) == 0); + int group_en = (parameter_control_word >> 23) & 0b1; + assert(((parameter_control_word >> 20) & 0b111) == 0); + int strip_len = (parameter_control_word >> 18) & 0b11; + int user_clip = (parameter_control_word >> 16) & 0b11; + assert(((parameter_control_word >> 8) & 0xff) == 0); + int shadow = (parameter_control_word >> 7) & 0b1; + int volume = (parameter_control_word >> 6) & 0b1; + int col_type = (parameter_control_word >> 4) & 0b11; + int texture = (parameter_control_word >> 3) & 0b1; + int offset = (parameter_control_word >> 2) & 0b1; + int gouraud = (parameter_control_word >> 1) & 0b1; + int _16bit_uv = (parameter_control_word >> 0) & 0b1; + + fprintf(stderr, "sv %d %d\n", shadow, volume); + assert(!volume || shadow); // for polygons, shadow and volume must be the same value + + int32_t isp_tsp_instruction_word = parameter[1].i; + isp_tsp_instruction_word &= ~(0b1111 << 22); + isp_tsp_instruction_word |= (parameter_control_word & 0b1111) << 22; + +#define tsp_instruction_word_0 parameter[2].i +#define texture_control_word_0 parameter[3].i +#define tsp_instruction_word_1 parameter[4].i +#define texture_control_word_1 parameter[5].i + + static float face_color_a_0; + static float face_color_r_0; + static float face_color_g_0; + static float face_color_b_0; + static float face_offset_color_a; + static float face_offset_color_r; + static float face_offset_color_g; + static float face_offset_color_b; +#define face_color_a_1 face_offset_color_a +#define face_color_r_1 face_offset_color_r +#define face_color_g_1 face_offset_color_g +#define face_color_b_1 face_offset_color_b + + int vertex_index = 8; + if (col_type == col_type::intensity_mode_1) { + if (offset || volume) { + face_color_a_0 = parameter[8].f; + face_color_r_0 = parameter[9].f; + face_color_g_0 = parameter[10].f; + face_color_b_0 = parameter[11].f; + face_offset_color_a = parameter[12].f; + face_offset_color_r = parameter[13].f; + face_offset_color_g = parameter[14].f; + face_offset_color_b = parameter[15].f; + vertex_index = 16; + } else { + face_color_a_0 = parameter[4].f; + face_color_r_0 = parameter[5].f; + face_color_g_0 = parameter[6].f; + face_color_b_0 = parameter[7].f; + } + } + + int skip = decode_skip(texture, offset, _16bit_uv); + + int param_index = state.param_current; + + struct previous_vertex previous_vertex[2]; + + int color_control_word = volume << 1 | texture << 0; + int vertex_length = 8 * (1 + (texture && (volume || col_type == col_type::floating_color))); + + int strip_index = 0; + + int32_t base_color_0; + int32_t offset_color_0; + int32_t base_color_1; + int32_t offset_color_1; + float x; + float y; + float z; + + while (true) { + int parameter_control_word = parameter[vertex_index + 0].i; + fprintf(stderr, "vi %d %08x\n", vertex_index, parameter_control_word); + + int para_type = (parameter_control_word >> 29) & 0b111; + assert(para_type == para_type::vertex); + int end_of_strip = (parameter_control_word >> 28) & 0b1; + + x = parameter[vertex_index + 1].f; + y = parameter[vertex_index + 2].f; + z = parameter[vertex_index + 3].f; + + switch (col_type) { + case col_type::packed_color: + switch (color_control_word) { + case 0b00: // one volume, non-textured + base_color_0 = parameter[vertex_index + 6].i; + break; + case 0b01: // one volume, textured + base_color_0 = parameter[vertex_index + 6].i; + offset_color_0 = parameter[vertex_index + 7].i; + break; + case 0b10: // two volumes, non-textured + base_color_0 = parameter[vertex_index + 4].i; + base_color_1 = parameter[vertex_index + 5].i; + break; + case 0b11: // two volumes, textured + base_color_0 = parameter[vertex_index + 6].i; + offset_color_0 = parameter[vertex_index + 7].i; + base_color_1 = parameter[vertex_index + 10].i; + offset_color_1 = parameter[vertex_index + 11].i; + break; + } + break; + case col_type::floating_color: + { + float base_color_a; + float base_color_r; + float base_color_g; + float base_color_b; + float offset_color_a; + float offset_color_r; + float offset_color_g; + float offset_color_b; + switch (color_control_word) { + case 0b00: // one volume, non-textured + base_color_a = parameter[vertex_index + 4].f; + base_color_r = parameter[vertex_index + 5].f; + base_color_g = parameter[vertex_index + 6].f; + base_color_b = parameter[vertex_index + 7].f; + + base_color_0 = pack_floating_color(base_color_a, base_color_r, base_color_g, base_color_b); + break; + case 0b01: // one volume, textured + base_color_a = parameter[vertex_index + 8].f; + base_color_r = parameter[vertex_index + 9].f; + base_color_g = parameter[vertex_index + 10].f; + base_color_b = parameter[vertex_index + 11].f; + base_color_a = parameter[vertex_index + 12].f; + base_color_r = parameter[vertex_index + 13].f; + base_color_g = parameter[vertex_index + 14].f; + base_color_b = parameter[vertex_index + 15].f; + + base_color_0 = pack_floating_color(base_color_a, base_color_r, base_color_g, base_color_b); + offset_color_0 = pack_floating_color(offset_color_a, offset_color_r, offset_color_g, offset_color_b); + break; + case 0b10: // two volumes, non-textured + assert(!"two volume non-textured floating color"); + break; + case 0b11: // two volumes, textured + assert(!"two volume textured floating color"); + break; + } + } + break; + case col_type::intensity_mode_1: [[fallthrough]]; + case col_type::intensity_mode_2: + { + float base_intensity_0; + float base_intensity_1; + float offset_intensity_0; + float offset_intensity_1; + switch (color_control_word) { + case 0b00: // one volume, non-textured + base_intensity_0 = parameter[vertex_index + 6].f; + base_color_0 = pack_intensity_color(face_color_a_0, face_color_r_0, face_color_g_0, face_color_b_0, base_intensity_0); + break; + case 0b01: // one volume, textured + base_intensity_0 = parameter[vertex_index + 6].f; + offset_intensity_0 = parameter[vertex_index + 7].f; + base_color_0 = pack_intensity_color(face_color_a_0, face_color_r_0, face_color_g_0, face_color_b_0, base_intensity_0); + offset_color_0 = pack_intensity_color(face_offset_color_a, face_offset_color_r, face_offset_color_g, face_offset_color_b, offset_intensity_0); + break; + case 0b10: // two volumes, non-textured + base_intensity_0 = parameter[vertex_index + 4].f; + base_intensity_1 = parameter[vertex_index + 5].f; + base_color_0 = pack_intensity_color(face_color_a_0, face_color_r_0, face_color_g_0, face_color_b_0, base_intensity_0); + base_color_1 = pack_intensity_color(face_color_a_1, face_color_r_1, face_color_g_1, face_color_b_1, base_intensity_0); + break; + case 0b11: // two volumes, textured + base_intensity_0 = parameter[vertex_index + 6].f; + offset_intensity_0 = parameter[vertex_index + 7].f; + base_color_0 = pack_intensity_color(face_color_a_0, face_color_r_0, face_color_g_0, face_color_b_0, base_intensity_0); + offset_color_0 = pack_intensity_color(face_offset_color_a, face_offset_color_r, face_offset_color_g, face_offset_color_b, offset_intensity_0); + + base_intensity_1 = parameter[vertex_index + 10].f; + offset_intensity_1 = parameter[vertex_index + 11].f; + base_color_1 = pack_intensity_color(face_color_a_1, face_color_r_1, face_color_g_1, face_color_b_1, base_intensity_1); + offset_color_1 = pack_intensity_color(face_offset_color_a, face_offset_color_r, face_offset_color_g, face_offset_color_b, offset_intensity_1); + break; + } + } // switch col_type + } + + if (strip_index >= 2) { + // write an entire triangle to params: + dst[param_index + 0].i = isp_tsp_instruction_word; + dst[param_index + 1].i = tsp_instruction_word_0; + dst[param_index + 2].i = texture_control_word_0; + param_index += 3; + + dst[param_index + 0].f = previous_vertex[1].x; + dst[param_index + 1].f = previous_vertex[1].y; + dst[param_index + 2].f = previous_vertex[1].z; + dst[param_index + 3].i = previous_vertex[1].base_color_0; + assert(skip + 3 == 4); + param_index += 4; + + dst[param_index + 0].f = previous_vertex[0].x; + dst[param_index + 1].f = previous_vertex[0].y; + dst[param_index + 2].f = previous_vertex[0].z; + dst[param_index + 3].i = previous_vertex[0].base_color_0; + assert(skip + 3 == 4); + param_index += 4; + + dst[param_index + 0].f = x; + dst[param_index + 1].f = y; + dst[param_index + 2].f = z; + dst[param_index + 3].i = base_color_0; + assert(skip + 3 == 4); + param_index += 4; + } + + if (strip_index >= 2) { + // find new tiles to includes + + // 2: {v1, v0} + // 3: {v1, v2} + // 4: {v3, v2} + // 5: {v3, v4} + + + /* + 1 p0 B p0 B---D _ + / \ \ / + 0 p1 A---C 2 _ p1 C + */ + + assert(texture == 0); + + // calculate bounding box + struct bounding_box bb = calculate_bounding_box(x, y, + previous_vertex[0].x, previous_vertex[0].y, + previous_vertex[1].x, previous_vertex[1].y); + fprintf(stderr, "bb: min %d %d max %d %d\n", bb.min_x, bb.min_y, bb.max_x, bb.max_y); + for (int ty = bb.min_y; ty <= bb.max_y; ty++) { + for (int tx = bb.min_x; tx <= bb.max_x; tx++) { + int tile_ix = ty * 64 + tx; + struct tile_state * tile = &state.tile[tile_ix]; + tile->entry = 1; + } + } + } + + // check for possible end of strip + strip_index += 1; + if (end_of_strip) { + assert(strip_index >= 2); + int num_triangles = strip_index - 2; + assert(num_triangles >= 0); + flush_ta_tiles(list_type, num_triangles, shadow, skip, dst); + state.param_current = param_index; + strip_index = 0; + } + + // next_vertex (possible end of polygon array) + vertex_index += vertex_length; + parameter_control_word = parameter[vertex_index + 0].i; + para_type = (parameter_control_word >> 29) & 0b111; + if (para_type != para_type::vertex) { + assert(end_of_strip); + break; + } + + // write previous vertex parameters to triangle strip array + // 0: 1 & (strip_index & 1) → 1 + // 1: 2 & (strip_index & 1) → 0 + // 2: 3 & (strip_index & 1) → 1 + // 3: 4 & (strip_index & 1) → 0 + // 4: 5 & (strip_index & 1) → 1 + // 5: 6 & (strip_index & 1) → 0 + previous_vertex[strip_index & 1].x = x; + previous_vertex[strip_index & 1].y = y; + previous_vertex[strip_index & 1].z = z; + previous_vertex[strip_index & 1].base_color_0 = base_color_0; + previous_vertex[strip_index & 1].offset_color_0 = offset_color_0; + previous_vertex[strip_index & 1].base_color_1 = base_color_1; + previous_vertex[strip_index & 1].offset_color_1 = offset_color_1; + } + + return vertex_index << 2; +} + +void software_ta_transfer(void * src, int32_t src_size, + void * dst) +{ + int32_t src_offset = 0; + while (src_offset < src_size) { + fprintf(stderr, "src_offset: %08x\n", src_offset); + union i32_f * parameter = (union i32_f *)(((int8_t *)src) + src_offset); + int32_t parameter_control_word = parameter[0].i; + int para_type = (parameter_control_word >> 29) & 0b111; + int list_type = (parameter_control_word >> 24) & 0b111; + + switch (para_type) { + case para_type::end_of_list: + assert(state.current_list_type != -1); + state.list_end |= (1 << state.current_list_type); + terminate_ta_tiles(state.current_list_type, dst); + state.current_list_type = -1; + src_offset += 32; + break; + case para_type::user_tile_clip: + src_offset += 32; + assert(!"user tile clip"); + break; + case para_type::object_list_set: + src_offset += 32; + assert(!"object list set"); + break; + case para_type::polygon_or_modifier_volume: + fprintf(stderr, "%d %d\n", state.current_list_type, list_type); + fflush(stdout); + assert(state.current_list_type == -1 || state.current_list_type == list_type); + state.current_list_type = list_type; + if (list_type == list_type::opaque_modifier_volume || list_type == list_type::translucent_modifier_volume) { + assert(!"modifier_volume"); + //src_offset += modifier_volume(parameter, list_type, parameter_control_word, dst); + } else { + src_offset += polygon(parameter, list_type, parameter_control_word, dst); + } + break; + case para_type::sprite: + assert(!"sprite"); + break; + case para_type::vertex: + assert(!"vertex parameter with no global"); + break; + default: + assert(!"invalid para_type"); + break; + } + } + + assert(src_offset == src_size); +} diff --git a/holly/software_ta.hpp b/holly/software_ta.hpp new file mode 100644 index 0000000..2e9248b --- /dev/null +++ b/holly/software_ta.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include + +struct ta_configuration { + int32_t isp_base; // bytes + int32_t isp_limit; // bytes + int32_t ol_base; // bytes + int32_t ol_limit; // bytes + int32_t alloc_ctrl; + int32_t next_opb_init; // bytes + uint8_t tile_x_num; + uint8_t tile_y_num; +}; + +void software_ta_init(const struct ta_configuration * config); + +void software_ta_transfer(void * src, int32_t src_size, + void * dst); diff --git a/holly/ta_parameter.hpp b/holly/ta_parameter.hpp index 7c48b16..e7a6140 100644 --- a/holly/ta_parameter.hpp +++ b/holly/ta_parameter.hpp @@ -99,7 +99,7 @@ struct ta_parameter_writer { } }; -constexpr inline uint32_t uv_16bit(float u, float v) +static inline uint32_t uv_16bit(float u, float v) { uint32_t * ui = (reinterpret_cast(&u)); uint32_t * vi = (reinterpret_cast(&v));