Using the etnaviv shader assembler

This post describes the only part of etna_pipe API that is not yet equal or at least analogous to Gallium. Etnaviv does not yet have a GLSL shader compiler (or TGSI converter). Until then, shaders can only be compiled from native assembly.

Luckily the Vivante ISA is quite straightforward, at least compared to others such as Mali vertex shader assembly. It is possible to write it manually. And it also is an unified ISA, by which is meant that the vertex and fragment shaders use the same instruction set.

The shader instructions are documented in detail in isa.xml. Some further notes about the ISA can be found in isa.md. Start for example from the basic vertex shader of the framebuffer demo mip_cube:

; vertex shader for mip_cube_state
; attributes:
;   t0 position_in
;   t1 normal_in
;   t2 texturecoord_in
; uniforms:
;   u0 .. u3   model view projection matrix
;   u4.xyz .. u6.xyz  normal matrix
;   u7..u10    model view matrix
; constants:
;   u4.w = 2.0f
;   u5.w = 20.0f
;   u6.w = 0.0f
;   u11.x = 1.0f
;   u11.y = 0.5f
; outputs:
;   t0 color_out
;   t1 texturecoord_out
;   t4 position_out
;
; lightSource.xyz = (2.0, 2.0, 20.0)
MOV t3.xy__, void, void, u4.wwww
MOV t3.__z_, void, void, u5.wwww
;
; gl_Position = modelviewprojectionMatrix * in_position
MUL t4, u0, t0.xxxx, void
MAD t4, u1, t0.yyyy, t4
MAD t4, u2, t0.zzzz, t4
MAD t4, u3, t0.wwww, t4
;
; vEyeNormal.xyz = normalMatrix * in_normal
MUL t5.xyz_, u4.xyzz, t1.xxxx, void
MAD t5.xyz_, u5.xyzz, t1.yyyy, t5.xyzz
MAD t1.xyz_, u6.xyzz, t1.zzzz, t5.xyzz
;
; vPosition4 = modelviewMatrix * in_position
MUL t5, u7, t0.xxxx, void
MAD t5, u8, t0.yyyy, t5
MAD t5, u9, t0.zzzz, t5
MAD t0, u10, t0.wwww, t5
;
; t0.xyz = lightSource.xyz - vPosition4.xyz / vPosition4.w
RCP t1.___w, void, void, t0.wwww
MAD t0.xyz_, -t0.xyzz, t1.wwww, t3.xyzz
;
; vLightDir = normalize(t0.xyz)
DP3 t3.xyz_, t0.xyzz, t0.xyzz, void
RSQ t3.xyz_, void, void, t3.xxxx
MUL t0.xyz_, t0.xyzz, t3.xyzz, void
;
; color_out.xyz = diff = max(0.0, dot(vEyeNormal, vLightDir))
DP3 t0.x___, t1.xyzz, t0.xyzz, void
SELECT.LT t0.xyz_, u6.wwww, t0.xxxx, u6.wwww
;
; color_out.w = 1.0
MOV t0.___w, void, void, u11.xxxx
;
; texcoord_out.xy = texcoord_in.xy
MOV t1.xy__, void, void, t2.xyyy
;
; Vivante specific transform at the end of every vertex shader
; position_out.z = (position_out.z + position_out.w) / 2.0
ADD t4.__z_, t4.zzzz, void, t4.wwww
MUL t4.__z_, t4.zzzz, u11.yyyy, void 

Save this in a text file named mip_cube_vs.asm, then use the external tool asm.py to compile it

$ ../../tools/asm.py mip_cube_vs.asm
This will give the output as a C array with four uint32_ts per instruction:
0x01831009,0x00000000,0x00000000,0x203fc048,
0x02031009,0x00000000,0x00000000,0x203fc058,
...
0x02041001,0x2a804800,0x00000000,0x003fc048,
0x02041003,0x2a804800,0x00aa05c0,0x00000002,
To embed this into the program code, copy-paste this output into the source file into the array vs[] (alternatively, pipe to a file and #include it):
uint32_t vs[] = {
   ...
}

Do the same thing for the fragment shader and ps[]. These arrays are used inside the etna_shader_program structure:

const struct etna_shader_program shader = {
    .num_inputs = 3,
    .inputs = { { .vs_reg=0 },{ .vs_reg=1 },{ .vs_reg=2 } },
    .num_varyings = 2,
    .varyings = {
        {.num_components=4, .special=ETNA_VARYING_VSOUT, .pa_attributes=0x200, .vs_reg=0}, /* color */
        {.num_components=2, .special=ETNA_VARYING_VSOUT, .pa_attributes=0x200, .vs_reg=1}  /* texcoord */
    }, 
    .vs_code_size = sizeof(vs)/4,
    .vs_code = (uint32_t*)vs,
    .vs_pos_out_reg = 4, // t4 out
    .vs_load_balancing = 0xf3f0542,  /* depends on number of inputs/outputs/varyings? XXX how exactly */
    .vs_num_temps = 6,
    .vs_uniforms_size = 12*4,
    .vs_uniforms = (uint32_t*)(const float[12*4]){
        [19] = 2.0f, /* u4.w */
        [23] = 20.0f, /* u5.w */
        [27] = 0.0f, /* u6.w */
        [44] = 1.0f, /* u11.x */
        [45] = 0.5f, /* u11.y */
    },
    .ps_code_size = sizeof(ps)/4,
    .ps_code = (uint32_t*)ps,
    .ps_color_out_reg = 1, // t1 out
    .ps_num_temps = 3,
    .ps_uniforms_size = 1*4,
    .ps_uniforms = (uint32_t*)(const float[1*4]){
        [0] = 1.0f, /* u0.x */
    },
};
This structure defines all the meta-data for the shader. This includes the number of inputs, outputs, varyings, definitions of varyings, number of temporaries (t* registers), initial uniform values for both the vertex and fragment program. Inside the program, this structure can be compiled to a state atom, that can be bound using bind_etna_shader_state to use it for rendering:
void *shader_state = pipe->create_etna_shader_state(pipe, &shader);
pipe->bind_etna_shader_state(pipe, shader_state);
Written on February 26, 2013
Tags:
Filed under