#include "software_ta.hpp" #if defined(__dreamcast__) #include "sh7091/serial.hpp" #define print__character serial::character #define print__string serial::string #define print__integer serial::integer #define assert(b) \ do { \ if (!(b)) { \ print__string(__FILE__); \ print__character(':'); \ print__integer(__LINE__, ' '); \ print__string(__func__); \ print__string(": assertion failed: "); \ print__string(#b); \ print__character('\n'); \ while (1); \ } \ } while (0); #else #include #include #endif union i32_f { int32_t i; float f; }; namespace para_type { constexpr int end_of_list = 0; constexpr int user_tile_clip = 1; constexpr int object_list_set = 2; constexpr int polygon_or_modifier_volume = 4; constexpr int sprite = 5; constexpr int vertex = 7; }; namespace list_type { constexpr int opaque = 0; constexpr int opaque_modifier_volume = 1; constexpr int translucent = 2; constexpr int translucent_modifier_volume = 3; constexpr int punch_through = 4; }; namespace col_type { constexpr int packed_color = 0; constexpr int floating_color = 1; constexpr int intensity_mode_1 = 2; constexpr int intensity_mode_2 = 3; }; namespace object_list_data { constexpr int triangle_strip = (0b0 << 31); constexpr int triangle_array = (0b100 << 29); constexpr int quad_array = (0b101 << 29); constexpr int object_pointer_block_link = (0b111 << 29); }; // if entry == 0; calculate ol_current from ol_base struct tile_state { int8_t entry; int8_t current_list_type; int32_t ol_base; // words int32_t ol_current; // words }; struct alloc { union { int opaque; // words int opaque_modifier_volume; // words int translucent; // words int translucent_modifier_volume; // words int punch_through; // words }; int list_type[5]; }; struct ta_state { int8_t tile_x_num; int8_t tile_y_num; int8_t current_list_type; int8_t list_end; int32_t ol_base; int32_t ol_limit; int32_t param_base; int32_t param_limit; // words int32_t param_current; // words int32_t next_opb; // words int32_t entry; struct alloc alloc; struct tile_state tile[64 * 64]; }; static struct ta_state state; static inline int alloc_ctrl_units(int n) { return (n == 0) ? 0 : (1 << (n + 2)); // words } static inline void terminate_ta_tiles(int list_type, void * _dst) { union i32_f * dst = (union i32_f *)_dst; for (int y = 0; y < state.tile_y_num; y++) { for (int x = 0; x < state.tile_x_num; x++) { struct tile_state * tile = &state.tile[y * 64 + x]; assert(tile->current_list_type == list_type || tile->current_list_type == -1); // end of list assert(tile->ol_current >= state.ol_base && tile->ol_current < state.ol_limit); fprintf(stderr, "eol %d %d %08x\n", x, y, tile->ol_current * 4); dst[tile->ol_current].i = object_list_data::object_pointer_block_link | (1 << 28); tile->current_list_type = -1; } } } static inline void tile_ol_current_for_list(struct tile_state * tile, int list_type_ix) { assert(list_type_ix >= 0 && list_type_ix <= 5); tile->ol_current = tile->ol_base + state.alloc.list_type[list_type_ix]; { // removeme int32_t ix = tile - state.tile; int x = ix & 63; int y = ix >> 6; fprintf(stderr, "ol_current_for_list %d %d %08x\n", x, y, tile->ol_current * 4); } } static inline void flush_ta_tile(int list_type, int length, int shadow, int skip, struct tile_state * tile, void * _dst) { union i32_f * dst = (union i32_f *)_dst; if (tile->current_list_type != list_type) { tile_ol_current_for_list(tile, list_type); tile->current_list_type = list_type; } if (!tile->entry) return; assert(length > 0); assert(length <= 16); int32_t ix = tile - state.tile; int x = ix & 63; int y = ix >> 6; fprintf(stderr, "flush_ta_tile %d %d %08x\n", x, y, tile->ol_current * 4); if ((tile->ol_current & 15) == 15) { fprintf(stderr, "overflow %d\n", (int32_t)(tile - state.tile)); tile->entry = 0; } //assert((tile->ol_current & 15) != 15); dst[tile->ol_current].i = (object_list_data::triangle_array | (length - 1) << 25 | (shadow << 24) | (skip << 21) | state.param_current); fprintf(stderr, "l1 %d\n", (length - 1)); fprintf(stderr, "oldta %d\n", object_list_data::triangle_array); fprintf(stderr, "param_current %d\n", state.param_current); fprintf(stderr, "dst tile ol_current %d %08x\n", tile->ol_current * 4, dst[tile->ol_current].i); tile->ol_current += 1; tile->entry = 0; } void flush_ta_tiles(int list_type, int length, int shadow, int skip, void * dst) { for (int y = 0; y < state.tile_y_num; y++) { for (int x = 0; x < state.tile_x_num; x++) { struct tile_state * tile = &state.tile[y * 64 + x]; flush_ta_tile(list_type, length, shadow, skip, tile, dst); } } } void software_ta_init(const struct ta_configuration * config) { state.tile_x_num = config->tile_x_num; state.tile_y_num = config->tile_y_num; state.current_list_type = -1; state.list_end = 0; state.ol_base = config->ol_base >> 2; // words state.ol_limit = config->ol_limit >> 2; // words state.param_base = config->isp_base >> 2; // words state.param_limit = config->isp_limit >> 2; // words state.param_current = state.param_base; // words assert(state.param_limit <= 0x1fffff); state.next_opb = config->next_opb_init; int alloc_opaque = alloc_ctrl_units((config->alloc_ctrl >> 0 ) & 0b11); int alloc_opaque_modifier_volume = alloc_ctrl_units((config->alloc_ctrl >> 4 ) & 0b11); int alloc_translucent = alloc_ctrl_units((config->alloc_ctrl >> 8 ) & 0b11); int alloc_translucent_modifier_volume = alloc_ctrl_units((config->alloc_ctrl >> 12) & 0b11); int alloc_punch_through = alloc_ctrl_units((config->alloc_ctrl >> 16) & 0b11); // bytes int alloc_total_size = (alloc_opaque + alloc_opaque_modifier_volume + alloc_translucent + alloc_translucent_modifier_volume + alloc_punch_through); state.alloc.opaque = 0; state.alloc.opaque_modifier_volume = state.alloc.opaque + alloc_opaque; state.alloc.translucent = state.alloc.opaque_modifier_volume + alloc_opaque_modifier_volume; state.alloc.translucent_modifier_volume = state.alloc.translucent + alloc_translucent; state.alloc.punch_through = state.alloc.translucent_modifier_volume + alloc_translucent_modifier_volume; int ol_offset = config->ol_base >> 2; // words for (int y = 0; y < config->tile_y_num; y++) { for (int x = 0; x < config->tile_x_num; x++) { struct tile_state * tile = &state.tile[y * 64 + x]; fprintf(stderr, "ol_offset %d %d %08x\n", x, y, ol_offset); tile->entry = 0; tile->ol_base = ol_offset; tile->ol_current = -1; tile->current_list_type = -1; ol_offset += alloc_total_size; } } } static inline int decode_skip(int texture, int offset, int _16bit_uv) { if (texture == 0) return 0b001; int skip; int control_word = (offset << 1) | (_16bit_uv << 0); switch (control_word) { default: [[fallthrough]]; case 0b00: skip = 0b011; break; case 0b01: skip = 0b010; break; case 0b10: skip = 0b100; break; case 0b11: skip = 0b011; break; } return skip; } /* int modifier_volume(union i32_f * parameter, int list_type, int parameter_control_word, void * dst) { int skip = 0; } */ static inline int32_t pack_floating_color(float a, float r, float g, float b) { /* The TA converts each element of ARGB data into a fixed decimal value between 0.0 and 1.0, multiples the value by 255, and packs the result in a 32-bit value. */ if (a > 1.f) a = 1.f; if (a < 0.f) a = 0.f; if (r > 1.f) r = 1.f; if (r < 0.f) r = 0.f; if (g > 1.f) g = 1.f; if (g < 0.f) g = 0.f; if (b > 1.f) b = 1.f; if (b < 0.f) b = 0.f; int ai = (int)(a * 255.f); int ri = (int)(r * 255.f); int gi = (int)(g * 255.f); int bi = (int)(b * 255.f); return ((ai & 0xff) << 24) | ((ri & 0xff) << 16) | ((gi & 0xff) << 8) | ((bi & 0xff) << 0); } static inline int32_t pack_intensity_color(float a, float r, float g, float b, float intensity) { /* Regarding alpha values, the TA converts the specified Face Color Alpha value into a fixed decimal value between 0.0 and 1.0, multiples the value by 255, and derives an 8-bit value. Regarding RGB values, the TA converts the specified Face Color R/G/B value into a fixed decimal value between 0.0 and 1.0, multiples the value by 255, converts the intensity value into a fixed decimal value between 0.0 and 1.0, multiplies the converted R/G/B value and the converted intensity value together, multiplies that result by 255, and derives an 8-bit value for each of R, G, and B. Finally, the TA packs each 8-bit value into a 32-bit value. */ // this description is probably incorrect. multiplying by 255 twice does not // make sense. if (a > 1.f) a = 1.f; if (a < 0.f) a = 0.f; if (r > 1.f) r = 1.f; if (r < 0.f) r = 0.f; if (g > 1.f) g = 1.f; if (g < 0.f) g = 0.f; if (b > 1.f) b = 1.f; if (b < 0.f) b = 0.f; if (intensity > 1.f) intensity = 1.f; if (intensity < 0.f) intensity = 0.f; int ai = (int)(a * 255.f); int ri = (int)(r * intensity * 255.f); int gi = (int)(g * intensity * 255.f); int bi = (int)(b * intensity * 255.f); if (ri > 255) ri = 255; if (gi > 255) gi = 255; if (bi > 255) bi = 255; return ((ai & 0xff) << 24) | (ri << 16) | (gi << 8) | (bi << 0); } struct bounding_box { int min_x; // in tile units int min_y; // in tile units int max_x; // in tile units int max_y; // in tile units }; static inline float min(float a, float b, float c) { if (a < b) return (a < c) ? a : c; else return (b < c) ? b : c; } static inline float max(float a, float b, float c) { if (a > b) return (a > c) ? a : c; else return (b > c) ? b : c; } static inline int floor(float f) { return (int)f; } static inline int ceil(float f) { int fi = (int)f; float fn = (float)fi; return fi + (fn < f); } static inline struct bounding_box calculate_bounding_box(float ax, float ay, float bx, float by, float cx, float cy) { int min_x = floor(min(ax, bx, cx)); int min_y = floor(min(ay, by, cy)); int max_x = ceil(max(ax, bx, cx)); int max_y = ceil(max(ay, by, cy)); return (bounding_box){ min_x >> 5, // round down min_y >> 5, // round down max_x >> 5, // round down max_y >> 5, // round down }; } struct previous_vertex { float x; float y; float z; int32_t base_color_0; int32_t offset_color_0; int32_t base_color_1; int32_t offset_color_1; }; int polygon(union i32_f * parameter, int list_type, int parameter_control_word, void * _dst) { union i32_f * dst = (union i32_f *)_dst; assert(((parameter_control_word >> 27) & 1) == 0); int group_en = (parameter_control_word >> 23) & 0b1; assert(((parameter_control_word >> 20) & 0b111) == 0); int strip_len = (parameter_control_word >> 18) & 0b11; int user_clip = (parameter_control_word >> 16) & 0b11; assert(((parameter_control_word >> 8) & 0xff) == 0); int shadow = (parameter_control_word >> 7) & 0b1; int volume = (parameter_control_word >> 6) & 0b1; int col_type = (parameter_control_word >> 4) & 0b11; int texture = (parameter_control_word >> 3) & 0b1; int offset = (parameter_control_word >> 2) & 0b1; int gouraud = (parameter_control_word >> 1) & 0b1; int _16bit_uv = (parameter_control_word >> 0) & 0b1; fprintf(stderr, "sv %d %d\n", shadow, volume); assert(!volume || shadow); // for polygons, shadow and volume must be the same value int32_t isp_tsp_instruction_word = parameter[1].i; isp_tsp_instruction_word &= ~(0b1111 << 22); isp_tsp_instruction_word |= (parameter_control_word & 0b1111) << 22; #define tsp_instruction_word_0 parameter[2].i #define texture_control_word_0 parameter[3].i #define tsp_instruction_word_1 parameter[4].i #define texture_control_word_1 parameter[5].i static float face_color_a_0; static float face_color_r_0; static float face_color_g_0; static float face_color_b_0; static float face_offset_color_a; static float face_offset_color_r; static float face_offset_color_g; static float face_offset_color_b; #define face_color_a_1 face_offset_color_a #define face_color_r_1 face_offset_color_r #define face_color_g_1 face_offset_color_g #define face_color_b_1 face_offset_color_b int vertex_index = 8; if (col_type == col_type::intensity_mode_1) { if (offset || volume) { face_color_a_0 = parameter[8].f; face_color_r_0 = parameter[9].f; face_color_g_0 = parameter[10].f; face_color_b_0 = parameter[11].f; face_offset_color_a = parameter[12].f; face_offset_color_r = parameter[13].f; face_offset_color_g = parameter[14].f; face_offset_color_b = parameter[15].f; vertex_index = 16; } else { face_color_a_0 = parameter[4].f; face_color_r_0 = parameter[5].f; face_color_g_0 = parameter[6].f; face_color_b_0 = parameter[7].f; } } int skip = decode_skip(texture, offset, _16bit_uv); int param_index = state.param_current; struct previous_vertex previous_vertex[2]; int color_control_word = volume << 1 | texture << 0; int vertex_length = 8 * (1 + (texture && (volume || col_type == col_type::floating_color))); int strip_index = 0; int32_t base_color_0; int32_t offset_color_0; int32_t base_color_1; int32_t offset_color_1; float x; float y; float z; while (true) { int parameter_control_word = parameter[vertex_index + 0].i; fprintf(stderr, "vi %d %08x\n", vertex_index, parameter_control_word); int para_type = (parameter_control_word >> 29) & 0b111; assert(para_type == para_type::vertex); int end_of_strip = (parameter_control_word >> 28) & 0b1; x = parameter[vertex_index + 1].f; y = parameter[vertex_index + 2].f; z = parameter[vertex_index + 3].f; switch (col_type) { case col_type::packed_color: switch (color_control_word) { case 0b00: // one volume, non-textured base_color_0 = parameter[vertex_index + 6].i; break; case 0b01: // one volume, textured base_color_0 = parameter[vertex_index + 6].i; offset_color_0 = parameter[vertex_index + 7].i; break; case 0b10: // two volumes, non-textured base_color_0 = parameter[vertex_index + 4].i; base_color_1 = parameter[vertex_index + 5].i; break; case 0b11: // two volumes, textured base_color_0 = parameter[vertex_index + 6].i; offset_color_0 = parameter[vertex_index + 7].i; base_color_1 = parameter[vertex_index + 10].i; offset_color_1 = parameter[vertex_index + 11].i; break; } break; case col_type::floating_color: { float base_color_a; float base_color_r; float base_color_g; float base_color_b; float offset_color_a; float offset_color_r; float offset_color_g; float offset_color_b; switch (color_control_word) { case 0b00: // one volume, non-textured base_color_a = parameter[vertex_index + 4].f; base_color_r = parameter[vertex_index + 5].f; base_color_g = parameter[vertex_index + 6].f; base_color_b = parameter[vertex_index + 7].f; base_color_0 = pack_floating_color(base_color_a, base_color_r, base_color_g, base_color_b); break; case 0b01: // one volume, textured base_color_a = parameter[vertex_index + 8].f; base_color_r = parameter[vertex_index + 9].f; base_color_g = parameter[vertex_index + 10].f; base_color_b = parameter[vertex_index + 11].f; base_color_a = parameter[vertex_index + 12].f; base_color_r = parameter[vertex_index + 13].f; base_color_g = parameter[vertex_index + 14].f; base_color_b = parameter[vertex_index + 15].f; base_color_0 = pack_floating_color(base_color_a, base_color_r, base_color_g, base_color_b); offset_color_0 = pack_floating_color(offset_color_a, offset_color_r, offset_color_g, offset_color_b); break; case 0b10: // two volumes, non-textured assert(!"two volume non-textured floating color"); break; case 0b11: // two volumes, textured assert(!"two volume textured floating color"); break; } } break; case col_type::intensity_mode_1: [[fallthrough]]; case col_type::intensity_mode_2: { float base_intensity_0; float base_intensity_1; float offset_intensity_0; float offset_intensity_1; switch (color_control_word) { case 0b00: // one volume, non-textured base_intensity_0 = parameter[vertex_index + 6].f; base_color_0 = pack_intensity_color(face_color_a_0, face_color_r_0, face_color_g_0, face_color_b_0, base_intensity_0); break; case 0b01: // one volume, textured base_intensity_0 = parameter[vertex_index + 6].f; offset_intensity_0 = parameter[vertex_index + 7].f; base_color_0 = pack_intensity_color(face_color_a_0, face_color_r_0, face_color_g_0, face_color_b_0, base_intensity_0); offset_color_0 = pack_intensity_color(face_offset_color_a, face_offset_color_r, face_offset_color_g, face_offset_color_b, offset_intensity_0); break; case 0b10: // two volumes, non-textured base_intensity_0 = parameter[vertex_index + 4].f; base_intensity_1 = parameter[vertex_index + 5].f; base_color_0 = pack_intensity_color(face_color_a_0, face_color_r_0, face_color_g_0, face_color_b_0, base_intensity_0); base_color_1 = pack_intensity_color(face_color_a_1, face_color_r_1, face_color_g_1, face_color_b_1, base_intensity_0); break; case 0b11: // two volumes, textured base_intensity_0 = parameter[vertex_index + 6].f; offset_intensity_0 = parameter[vertex_index + 7].f; base_color_0 = pack_intensity_color(face_color_a_0, face_color_r_0, face_color_g_0, face_color_b_0, base_intensity_0); offset_color_0 = pack_intensity_color(face_offset_color_a, face_offset_color_r, face_offset_color_g, face_offset_color_b, offset_intensity_0); base_intensity_1 = parameter[vertex_index + 10].f; offset_intensity_1 = parameter[vertex_index + 11].f; base_color_1 = pack_intensity_color(face_color_a_1, face_color_r_1, face_color_g_1, face_color_b_1, base_intensity_1); offset_color_1 = pack_intensity_color(face_offset_color_a, face_offset_color_r, face_offset_color_g, face_offset_color_b, offset_intensity_1); break; } } // switch col_type } if (strip_index >= 2) { // write an entire triangle to params: dst[param_index + 0].i = isp_tsp_instruction_word; dst[param_index + 1].i = tsp_instruction_word_0; dst[param_index + 2].i = texture_control_word_0; param_index += 3; dst[param_index + 0].f = previous_vertex[1].x; dst[param_index + 1].f = previous_vertex[1].y; dst[param_index + 2].f = previous_vertex[1].z; dst[param_index + 3].i = previous_vertex[1].base_color_0; assert(skip + 3 == 4); param_index += 4; dst[param_index + 0].f = previous_vertex[0].x; dst[param_index + 1].f = previous_vertex[0].y; dst[param_index + 2].f = previous_vertex[0].z; dst[param_index + 3].i = previous_vertex[0].base_color_0; assert(skip + 3 == 4); param_index += 4; dst[param_index + 0].f = x; dst[param_index + 1].f = y; dst[param_index + 2].f = z; dst[param_index + 3].i = base_color_0; assert(skip + 3 == 4); param_index += 4; } if (strip_index >= 2) { // find new tiles to includes // 2: {v1, v0} // 3: {v1, v2} // 4: {v3, v2} // 5: {v3, v4} /* 1 p0 B p0 B---D _ / \ \ / 0 p1 A---C 2 _ p1 C */ assert(texture == 0); // calculate bounding box struct bounding_box bb = calculate_bounding_box(x, y, previous_vertex[0].x, previous_vertex[0].y, previous_vertex[1].x, previous_vertex[1].y); fprintf(stderr, "bb: min %d %d max %d %d\n", bb.min_x, bb.min_y, bb.max_x, bb.max_y); for (int ty = bb.min_y; ty <= bb.max_y; ty++) { for (int tx = bb.min_x; tx <= bb.max_x; tx++) { int tile_ix = ty * 64 + tx; struct tile_state * tile = &state.tile[tile_ix]; tile->entry = 1; } } } // check for possible end of strip strip_index += 1; if (end_of_strip) { assert(strip_index >= 2); int num_triangles = strip_index - 2; assert(num_triangles >= 0); flush_ta_tiles(list_type, num_triangles, shadow, skip, dst); state.param_current = param_index; strip_index = 0; } // next_vertex (possible end of polygon array) vertex_index += vertex_length; parameter_control_word = parameter[vertex_index + 0].i; para_type = (parameter_control_word >> 29) & 0b111; if (para_type != para_type::vertex) { assert(end_of_strip); break; } // write previous vertex parameters to triangle strip array // 0: 1 & (strip_index & 1) → 1 // 1: 2 & (strip_index & 1) → 0 // 2: 3 & (strip_index & 1) → 1 // 3: 4 & (strip_index & 1) → 0 // 4: 5 & (strip_index & 1) → 1 // 5: 6 & (strip_index & 1) → 0 previous_vertex[strip_index & 1].x = x; previous_vertex[strip_index & 1].y = y; previous_vertex[strip_index & 1].z = z; previous_vertex[strip_index & 1].base_color_0 = base_color_0; previous_vertex[strip_index & 1].offset_color_0 = offset_color_0; previous_vertex[strip_index & 1].base_color_1 = base_color_1; previous_vertex[strip_index & 1].offset_color_1 = offset_color_1; } return vertex_index << 2; } void software_ta_transfer(void * src, int32_t src_size, void * dst) { int32_t src_offset = 0; while (src_offset < src_size) { fprintf(stderr, "src_offset: %08x\n", src_offset); union i32_f * parameter = (union i32_f *)(((int8_t *)src) + src_offset); int32_t parameter_control_word = parameter[0].i; int para_type = (parameter_control_word >> 29) & 0b111; int list_type = (parameter_control_word >> 24) & 0b111; switch (para_type) { case para_type::end_of_list: assert(state.current_list_type != -1); state.list_end |= (1 << state.current_list_type); terminate_ta_tiles(state.current_list_type, dst); state.current_list_type = -1; src_offset += 32; break; case para_type::user_tile_clip: src_offset += 32; assert(!"user tile clip"); break; case para_type::object_list_set: src_offset += 32; assert(!"object list set"); break; case para_type::polygon_or_modifier_volume: fprintf(stderr, "%d %d\n", state.current_list_type, list_type); fflush(stdout); assert(state.current_list_type == -1 || state.current_list_type == list_type); state.current_list_type = list_type; if (list_type == list_type::opaque_modifier_volume || list_type == list_type::translucent_modifier_volume) { assert(!"modifier_volume"); //src_offset += modifier_volume(parameter, list_type, parameter_control_word, dst); } else { src_offset += polygon(parameter, list_type, parameter_control_word, dst); } break; case para_type::sprite: assert(!"sprite"); break; case para_type::vertex: assert(!"vertex parameter with no global"); break; default: assert(!"invalid para_type"); break; } } assert(src_offset == src_size); }