matrix multiplier is not working...

A

abou ALsari

Guest
I am trying to design a matrix multiplier in Verilog using systolic array architecture. If I test the Processing Elements they work but when trying to connect them to build the array they get the inputs but don\'t generate the output.


Here is my code:

`timescale 1 ns/ 1 ps

module sys_array_tb;

reg reset, clk;
localparam M = 2;
localparam N = 2;
localparam DW = 16;

reg start;
reg signed [DW-1:0] W [M*N-1:0];
reg signed [DW-1:0] X [M*N-1:0];
wire signed [DW*2:0] Y [M*N-1:0];
wire done;

systolic_array #(.M(M),.N(N), .DW(DW)) uut(.clk(clk), .reset(reset),
.start(start), .X(X),.W(W),.done(done), .Y(Y));

initial begin
reset <= 1;
clk <= 0;
#3
reset <= 0;
#3



W = {16\'h1, 16\'h2, 16\'h3, 16\'h4};
X = {16\'h1, 16\'h2, 16\'h3, 16\'h4};

start = 1;
#10


wait(done);

#3

W = {16\'sh1, 16\'sh2, 16\'sh3, 16\'sh4};
X = {16\'h1, 16\'h2, 16\'h3, 16\'h4};


wait(done);

#3

W = {-16\'sh1, -16\'sh2, -16\'sh3, -16\'sh4};
X = {-16\'sh1, -16\'sh2, -16\'sh3, -16\'sh4};


wait(done);

#3

W = {-16\'sh1, 16\'h2, 16\'h3, -16\'sh4};
X = {16\'h1, -16\'sh2, -16\'sh3, 16\'h4};


wait(done);

end

initial begin
repeat(100)
#5 clk <= ~clk;
end

/*
always @(posedge clk) begin

$display(\"output is = %h \" uut.pe.mac);

end
*/

initial begin
$dumpfile(\"wave.vcd\");
$dumpvars(0, sys_array_tb);
end



endmodule


module systolic_array #(
parameter M = 4,
parameter N = 4,
parameter DW = 16
)
(
input clk,
input reset,
input start,
output reg done,

input signed [DW-1:0] W [M*N-1:0],
input signed [DW-1:0] X [M*N-1:0],
output signed [2*DW:0] Y [M*N-1:0]
);


reg calc_done;

reg signed [DW-1:0] A_reg [M-1:0][N-1:0];
reg signed [DW-1:0] B_reg [M-1:0][N-1:0];
reg signed [2*DW:0] C_reg [M-1:0][N-1:0];

wire [DW-1:0] a_ins [M-1:0][N:0];
wire [DW-1:0] b_ins [M:0][N-1:0];
wire [2*DW:0] c_outs [M-1:0][N-1:0];



integer m,n;
always @(posedge clk) begin
if(reset) begin
m = 0;
n = 0 ;
calc_done = 0;
done = 0;
// count <= 0;
for(m=0;m<=M;m=m+1) begin
for(n=0;n<=N;n=n+1) begin
A_reg[m][n] = 16\'d0;
B_reg[m][n] = 16\'d0;
C_reg[m][n] = 33\'d0;
end
end
end
else begin
if (start) begin
for(m=0;m<=M;m=m+1) begin
for(n=0;n<=N;n=n+1) begin
A_reg[m][n] = W[(2*m)+n];
B_reg[m][n] = X[(2*m)+n];
C_reg[m][n] = 33\'d0;
end
end

if(!calc_done) begin
C_reg[m][n] = c_outs[m][n];
if(n == N) begin
n = 0;
if (m == M) begin
m = 0;
calc_done = 1;
end
else m = m + 1;
end
else n = n + 1;
end else if(calc_done) begin
for(m=0;m<=M;m=m+1) begin //run through the rows
for(n=0;n<=N;n=n+1) begin //run through the columns
Y[(2*m)+n] = C_reg[m][n];
//Y[m][n] = C_reg[m][n];
end
end
done = 1;
end

end
end
end



genvar i,j;
generate
for (i = 0; i < M; i = i+1) begin : Rows
for (j = 0; j < N; j = j+1) begin : Columns
//if(i == M-1 || j == N-1) begin
//end
PE #(.DW(DW)) pe (
.clk(clk),
.reset(reset),
.x_i(a_ins[j]),
.y_i(b_ins[j]),
.x_o(a_ins[j+1]),
.y_o(b_ins[i+1][j]),
.mac(c_outs[j])
);
end
end
for (i = 0; i < M; i = i +1) begin : Left
assign a_ins[0] = A_reg[0];
end
for (j = 0; j < N; j = j +1) begin : Top
assign b_ins[0][j] = B_reg[0][j];
end

endgenerate

endmodule

module PE #(
parameter DW = 16
)
(
input clk,
input reset,

input signed[DW-1:0] x_i,
input signed[DW-1:0] y_i,

output reg signed[DW-1:0] x_o,
output reg signed[DW-1:0] y_o,
output reg signed[2*DW:0] mac

);


reg signed[DW-1:0] x_reg,y_reg;
reg signed[2*DW:0] mac_reg;

wire signed [2*DW:0] multiply;
assign multiply = x_i * y_i;

always @(posedge clk or posedge reset) begin
if (reset) begin

x_reg <= 0;
y_reg <= 0;
mac_reg <= 0;
end
else begin

x_reg <= x_i;
y_reg <= y_i;

mac_reg <= mac_reg + multiply;
$display (\"time: %t, x = %d, y = %d ,mac is %d\", $time ,x_i,y_i,mac);
end
end

assign x_o = x_reg;
assign y_o = y_reg;
assign mac = mac_reg;
endmodule

And as you might noticed the value of 16\'h1 never fed to the inputs (x & y). Could you please tell me where I\'m doing wrong?
 

Welcome to EDABoard.com

Sponsor

Back
Top