----------------------------------------------------------------------------------- Create Date: 06/23/2008 -- Design Name: ETC1 Texture Decoder. -- Module Name: ETC1Decompressor --- Description: Decoding module for ETC1 texture compression format. --- Take a 64 bit chunk and coordinate (4x4) as input. -- It outputs RGB 888 value as a result. -- It also output the overflow for R,G,B component to allow external ETC2 -- decoder. --- The decoder itself has NO register and is made of pure logic. -- And can be put directly after a texture cache line inside the texture sampler. -- Inside FPGA speed above 80 Mhz seems sustainable. --- Revision: -- Revision 0.01 - File Created -- Revision 0.02 - Comment added, cleaning started. Implementation finished. -- Additional Comments: -Seach for "OPTIMIZE" in source code to see where changes could be made -for trade off. -Search for "ETC2" in source code to modify the decoder as a pure ETC1 decoder. ---------------------------------------------------------------------------------library IEEE; use IEEE.STD_LOGIC_1164.ALL; use IEEE.STD_LOGIC_ARITH.ALL; use IEEE.STD_LOGIC_UNSIGNED.ALL; ----------------------------------------------------------------------------------- Connectivity. ---------------------------------------------------------------------------------entity ETC1Decompressor is Port ( textureLine : in STD_LOGIC_VECTOR(63 downto 0); PIX : in STD_LOGIC_VECTOR(3 downto 0); R : out G : out B : out
STD_LOGIC_VECTOR(7 downto 0); STD_LOGIC_VECTOR(7 downto 0); STD_LOGIC_VECTOR(7 downto 0); -- ETC2 OverflowR : out STD_LOGIC; OverflowG : out STD_LOGIC; OverflowB : out STD_LOGIC
); end ETC1Decompressor; ----------------------------------------------------------------------------------- Synthetizable logic. ---------------------------------------------------------------------------------architecture BehavioralETC1Decompressor of ETC1Decompressor is -- Macro to avoid reading directly in 64 bit input. signal component1 : std_logic_vector(7 downto 0);
signal signal signal signal signal
component2 : std_logic_vector(7 downto 0); component3 : std_logic_vector(7 downto 0); pixels : std_logic_vector(31 downto 0); diffbit : std_logic; flipbit : std_logic;
-- Working signal u : signal v : signal w :
mode bits. std_logic; std_logic; std_logic;
-- Temporary signal for each component (RGB) signal lsbR : std_logic; signal lsbG : std_logic; signal lsbB : std_logic; signal add_compR : std_logic_vector(2 downto 0); signal add_compG : std_logic_vector(2 downto 0); signal add_compB : std_logic_vector(2 downto 0); signal tempCompR : std_logic_vector(7 downto 0); signal tempCompG : std_logic_vector(7 downto 0); signal tempCompB : std_logic_vector(7 downto 0); signal tempCompR2 : std_logic_vector(8 downto 0); signal tempCompG2 : std_logic_vector(8 downto 0); signal tempCompB2 : std_logic_vector(8 downto 0); -- ETC2 : Use this signals to add 5 bit + 5 bit. -- Overflow NEVER occur normally except for ETC2 extensions. -- So if you want to make a PURE ETC1 decoder, -- modify "5 downto 0" -> "4 down 0" signal add_compoutR : std_logic_vector(5 downto 0); signal add_compoutG : std_logic_vector(5 downto 0); signal add_compoutB : std_logic_vector(5 downto 0); signal outcolR : std_logic_vector(5 downto 0); signal outcolG : std_logic_vector(5 downto 0); signal outcolB : std_logic_vector(5 downto 0); signal tableCode : std_logic_vector(2 downto 0); signal addCode : std_logic_vector(8 downto 0); signal pixIdx : std_logic_vector(1 downto 0); -- Used in Clipper. signal Rs : std_logic; signal Ro : std_logic; signal Gs : std_logic; signal Go : std_logic; signal Bs : std_logic; signal Bo : std_logic; --ROM Modifier Table ---Adress on 5 bit : LSB 1..0 : PixelIndex value (see lower table) --pixel index value
--------------msb lsb --------0 0 0 1 1 0 1 1
resulting modifier value ------------------------a (small positive value) b (large positive value) -a (small negative value) -b (large negative value)
-> -> -> ->
Index Index Index Index
0 1 2 3
MSB 4..2 : Table code word as directly given from compressed block. table codeword -----------------0 1 2 3 4 5 6 7
modifier table ---------------------2 8 -2 -8 5 17 -5 -17 9 29 -9 -29 13 42 -13 -42 18 60 -18 -60 24 80 -24 -80 33 106 -33 -106 47 183 -47 -183
type RomType is array (0 to 31) of std_logic_vector(8 downto 0); constant ROMTbl : RomType := ( --- Table 0 B"000000010", B"000001000", B"111111110", B"111111000", --- Table 1 B"000000101", B"000010001", B"111111011", B"111101111", --- Table 2 B"000001001", B"000011101", B"111110111", B"111100011", --- Table 3 B"000001101", B"000101010", B"111110011", B"111010110", --- Table 4 B"000010010", B"000111100", B"111101110", B"111000100", --- Table 5 B"000011000", B"001010000", B"111101000", B"110110000", --- Table 6 B"000100001", B"001101010", B"111011111",
B"110010110", --- Table 7 B"000101111", B"010110111", B"111010001", B"101001001", others => "000000000" ) ; begin
--- Stage 0 : -Init, cut the 64 bit chunk and assign to temp signals. -Perform u,v,w working bit calculation. process (textureLine, PIX) begin -- Chunk cutting. ---------------------------------------------------diffbit <= textureLine(33); flipbit <= textureLine(32); pixels <= textureLine(31 downto 0); component1 component2 component3
<= textureLine(63 downto 56); <= textureLine(55 downto 48); <= textureLine(47 downto 40);
-- Stage Ctrl Logic ------------------------------------------------------- Process the first 2x4 block(0) or second 2x4 block(1) w <= (flipbit and PIX(1)) or (not(flipbit) and PIX(3)); -- Perform differential mode only in second block AND when diff mode activated.
-- 0 : Add 0, 1 : Add value. u <= diffbit and w; -- Use directly the second color in other mode. -- 0 : Use second color directly. -- 1 : Use differential color or direct first color. v <= diffbit or not(w); end process; --- Stage 1A : -Select Base Color A, B or A+Delta for each component. -Perform at the same time a conversion from 555/444/ process (u,v,diffbit,component1, component2, component3) begin -- Stage U : Select Color if (u = '0') then add_compR <= "000"; add_compG <= "000"; add_compB <= "000"; else add_compR <= component1(2 downto 0); -- 3 Bit delta extended to 5
bit. bit. bit.
add_compG <= component2(2 downto 0); -- 3 Bit delta extended to 5 add_compB <= component3(2 downto 0); -- 3 Bit delta extended to 5
end if; if (diffbit = '0') then -- Extenstion 444 -> 555 lsbR <= component1(7); lsbG <= component2(7); lsbB <= component3(7); else -- Get real 555 LSB info. lsbR <= component1(3); lsbG <= component2(3); lsbB <= component3(3); end if; -- Add 5 bit original + 5 bit delta. (no overflow garantee) add_compoutR <= ('0' & component1(7 downto 4) & lsbR) + ('0' & add_compR(2) & add_compR(2) & add_compR(2 downto 0)); add_compoutG <= ('0' & component2(7 downto 4) & lsbG) + ('0' & add_compG(2) & add_compG(2) & add_compG(2 downto 0)); add_compoutB <= ('0' & component3(7 downto 4) & lsbB) + ('0' & add_compB(2) & add_compB(2) & add_compB(2 downto 0)); -- Pure ETC1 version. : remove the '0' headers. --- add_compoutR <= component1(4 downto 0) + (add_compR(2 downto 0) & add_compR(2 downto 1)); -- Stage V : Select Diff if (v = '0') then -- ETC2 : Can remove the first '0' if pure ETC1 decoder. -- Extension 444 -> 555 done at the same time. outcolR <= '0' & component1(3 downto 0) & component1(3); Direct second color. outcolG <= '0' & component2(3 downto 0) & component2(3); Direct second color. outcolB <= '0' & component3(3 downto 0) & component3(3); Direct second color. else outcolR <= add_compoutR; -- Differential color or direct color. outcolG <= add_compoutG; -- Differential color or direct color. outcolB <= add_compoutB; -- Differential color or direct color. end if; end process; --- State 1B : Optionnal overflow detector for ETC2 extension. --- ETC2 : Delete this block if pure ETC1 decoder. process (v, outcolR, outcolG, outcolB) begin if (v='0') then OverflowR <= '0'; OverflowG <= '0'; OverflowB <= '0'; else OverflowR <= outcolR(5); OverflowG <= outcolG(5);
---first first first
OverflowB <= outcolB(5); end if; end process; --- Stage 1C : Code book setup. -process (w,PIX, pixels, textureLine) begin if (w = '0') then -- Sub block 1 tableCode <= textureLine(39 downto 37); else -- Sub block 2 tableCode <= textureLine(36 downto 34); end if; ----------------
Pixel Layout. --> U / Width 0---- ---- ---- ---|a |e |i |m | | | | | | ---- ---- ---- ---|b |f |j |n | | | | | | ---- ---- ---- ---|c |g |k |o | | | | | | ---- ---- ---- ---|d |h |l |p | | | | | | ---- ---- ---- ---case PIX is -- As specified in Khronos Group documentation. when "0000" => pixIdx <= pixels(16) & pixels(0); -when "0001" => pixIdx <= pixels(17) & pixels(1); -when "0010" => pixIdx <= pixels(18) & pixels(2); -when "0011" => pixIdx <= pixels(19) & pixels(3); -when "0100" => pixIdx <= pixels(20) & pixels(4); -when "0101" => pixIdx <= pixels(21) & pixels(5); when "0110" => pixIdx <= pixels(22) & pixels(6); when "0111" => pixIdx <= pixels(23) & pixels(7); when "1000" => pixIdx <= pixels(24) & pixels(8); when "1001" => pixIdx <= pixels(25) & pixels(9); when "1010" => pixIdx <= pixels(26) & pixels(10); when "1011" => pixIdx <= pixels(27) & pixels(11); when "1100" => pixIdx <= pixels(28) & pixels(12); when "1101" => pixIdx <= pixels(29) & pixels(13); when "1110" => pixIdx <= pixels(30) & pixels(14); when others => pixIdx <= pixels(31) & pixels(15); end case;
a b c d e
-- OPTIMIZE : divide rom table by 2, store positive value only.(A,B -- Use pixIdx(1) as selector for a -- multiplexer to choose betwen value and -value (not(value)+1) from
addCode <= ROMTbl(conv_integer(tableCode & pixIdx)) ; end process; --- logic. -process (outcolR, outcolG, outcolB, begin tempCompR <= outcolR(4 downto tempCompG <= outcolG(4 downto tempCompB <= outcolB(4 downto
diffbit, addCode) 0) & outcolR(4 downto 2); -- 5 -> 8 0) & outcolG(4 downto 2); -- 5 -> 8 0) & outcolB(4 downto 2); -- 5 -> 8
tempCompR2 <= ('0' & tempCompR) + addCode; tempCompG2 <= ('0' & tempCompG) + addCode; tempCompB2 <= ('0' & tempCompB) + addCode; end process; --- Clamp Code. -process (tempCompR2, tempCompG2, tempCompB2, addCode) begin -- Clamp Code. -- 255 if too positive, 0 if too negative. -- * tempComp garanteed by encoder. -- * sign of addCode then valid overflow/underflow. -- Behaviour -- Overflow / addcode Sign -> Operation. -0 x -> Value -1 / 0 -> 255 -1 / 1 -> 0 --- Logic : (val & not(Overflow)) | (Overflow & not(sign)) -(Value if no overflow) | 0 if no overflow, 1 if overflow and + value. -else 0 --- Demonstration. -- O S V&(~O) (O&(~S)) Complete Formula -- 0 0 V | 0 --> V -- 0 1 V | 0 --> V -- 1 0 0 | 1 --> 1 -- 1 1 0 | 0 --> 0 -Ro <= not(tempCompR2(8)); Rs <= tempCompR2(8) and (not(addCode(8))); Go <= not(tempCompG2(8)); Gs <= tempCompG2(8) and (not(addCode(8))); Bo <= not(tempCompB2(8)); Bs <= tempCompB2(8) and (not(addCode(8))); r <= (tempCompR2(7 downto 0) and (Ro&Ro&Ro&Ro&Ro&Ro&Ro&Ro)) or (Rs&Rs&Rs&Rs&Rs&Rs&Rs&Rs); g <= (tempCompG2(7 downto 0) and (Go&Go&Go&Go&Go&Go&Go&Go)) or (Gs&Gs&Gs&Gs&Gs&Gs&Gs&Gs);
b <= (tempCompB2(7 downto 0) and (Bo&Bo&Bo&Bo&Bo&Bo&Bo&Bo)) or (Bs&Bs&Bs&Bs&Bs&Bs&Bs&Bs); end process; end BehavioralETC1Decompressor;