A
abou ALsari
Guest
I am trying to design a matrix multiplier in Verilog using systolic array architecture. If I test the Processing Elements they work but when trying to connect them to build the array they get the inputs but don\'t generate the output.
Here is my code:
`timescale 1 ns/ 1 ps
module sys_array_tb;
reg reset, clk;
localparam M = 2;
localparam N = 2;
localparam DW = 16;
reg start;
reg signed [DW-1:0] W [M*N-1:0];
reg signed [DW-1:0] X [M*N-1:0];
wire signed [DW*2:0] Y [M*N-1:0];
wire done;
systolic_array #(.M(M),.N(N), .DW(DW)) uut(.clk(clk), .reset(reset),
.start(start), .X(X),.W(W),.done(done), .Y(Y));
initial begin
reset <= 1;
clk <= 0;
#3
reset <= 0;
#3
W = {16\'h1, 16\'h2, 16\'h3, 16\'h4};
X = {16\'h1, 16\'h2, 16\'h3, 16\'h4};
start = 1;
#10
wait(done);
#3
W = {16\'sh1, 16\'sh2, 16\'sh3, 16\'sh4};
X = {16\'h1, 16\'h2, 16\'h3, 16\'h4};
wait(done);
#3
W = {-16\'sh1, -16\'sh2, -16\'sh3, -16\'sh4};
X = {-16\'sh1, -16\'sh2, -16\'sh3, -16\'sh4};
wait(done);
#3
W = {-16\'sh1, 16\'h2, 16\'h3, -16\'sh4};
X = {16\'h1, -16\'sh2, -16\'sh3, 16\'h4};
wait(done);
end
initial begin
repeat(100)
#5 clk <= ~clk;
end
/*
always @(posedge clk) begin
$display(\"output is = %h \" uut.pe.mac);
end
*/
initial begin
$dumpfile(\"wave.vcd\");
$dumpvars(0, sys_array_tb);
end
endmodule
module systolic_array #(
parameter M = 4,
parameter N = 4,
parameter DW = 16
)
(
input clk,
input reset,
input start,
output reg done,
input signed [DW-1:0] W [M*N-1:0],
input signed [DW-1:0] X [M*N-1:0],
output signed [2*DW:0] Y [M*N-1:0]
);
reg calc_done;
reg signed [DW-1:0] A_reg [M-1:0][N-1:0];
reg signed [DW-1:0] B_reg [M-1:0][N-1:0];
reg signed [2*DW:0] C_reg [M-1:0][N-1:0];
wire [DW-1:0] a_ins [M-1:0][N:0];
wire [DW-1:0] b_ins [M:0][N-1:0];
wire [2*DW:0] c_outs [M-1:0][N-1:0];
integer m,n;
always @(posedge clk) begin
if(reset) begin
m = 0;
n = 0 ;
calc_done = 0;
done = 0;
// count <= 0;
for(m=0;m<=M;m=m+1) begin
for(n=0;n<=N;n=n+1) begin
A_reg[m][n] = 16\'d0;
B_reg[m][n] = 16\'d0;
C_reg[m][n] = 33\'d0;
end
end
end
else begin
if (start) begin
for(m=0;m<=M;m=m+1) begin
for(n=0;n<=N;n=n+1) begin
A_reg[m][n] = W[(2*m)+n];
B_reg[m][n] = X[(2*m)+n];
C_reg[m][n] = 33\'d0;
end
end
if(!calc_done) begin
C_reg[m][n] = c_outs[m][n];
if(n == N) begin
n = 0;
if (m == M) begin
m = 0;
calc_done = 1;
end
else m = m + 1;
end
else n = n + 1;
end else if(calc_done) begin
for(m=0;m<=M;m=m+1) begin //run through the rows
for(n=0;n<=N;n=n+1) begin //run through the columns
Y[(2*m)+n] = C_reg[m][n];
//Y[m][n] = C_reg[m][n];
end
end
done = 1;
end
end
end
end
genvar i,j;
generate
for (i = 0; i < M; i = i+1) begin : Rows
for (j = 0; j < N; j = j+1) begin : Columns
//if(i == M-1 || j == N-1) begin
//end
PE #(.DW(DW)) pe (
.clk(clk),
.reset(reset),
.x_i(a_ins[j]),
.y_i(b_ins[j]),
.x_o(a_ins[j+1]),
.y_o(b_ins[i+1][j]),
.mac(c_outs[j])
);
end
end
for (i = 0; i < M; i = i +1) begin : Left
assign a_ins[0] = A_reg[0];
end
for (j = 0; j < N; j = j +1) begin : Top
assign b_ins[0][j] = B_reg[0][j];
end
endgenerate
endmodule
module PE #(
parameter DW = 16
)
(
input clk,
input reset,
input signed[DW-1:0] x_i,
input signed[DW-1:0] y_i,
output reg signed[DW-1:0] x_o,
output reg signed[DW-1:0] y_o,
output reg signed[2*DW:0] mac
);
reg signed[DW-1:0] x_reg,y_reg;
reg signed[2*DW:0] mac_reg;
wire signed [2*DW:0] multiply;
assign multiply = x_i * y_i;
always @(posedge clk or posedge reset) begin
if (reset) begin
x_reg <= 0;
y_reg <= 0;
mac_reg <= 0;
end
else begin
x_reg <= x_i;
y_reg <= y_i;
mac_reg <= mac_reg + multiply;
$display (\"time: %t, x = %d, y = %d ,mac is %d\", $time ,x_i,y_i,mac);
end
end
assign x_o = x_reg;
assign y_o = y_reg;
assign mac = mac_reg;
endmodule
And as you might noticed the value of 16\'h1 never fed to the inputs (x & y). Could you please tell me where I\'m doing wrong?
Here is my code:
`timescale 1 ns/ 1 ps
module sys_array_tb;
reg reset, clk;
localparam M = 2;
localparam N = 2;
localparam DW = 16;
reg start;
reg signed [DW-1:0] W [M*N-1:0];
reg signed [DW-1:0] X [M*N-1:0];
wire signed [DW*2:0] Y [M*N-1:0];
wire done;
systolic_array #(.M(M),.N(N), .DW(DW)) uut(.clk(clk), .reset(reset),
.start(start), .X(X),.W(W),.done(done), .Y(Y));
initial begin
reset <= 1;
clk <= 0;
#3
reset <= 0;
#3
W = {16\'h1, 16\'h2, 16\'h3, 16\'h4};
X = {16\'h1, 16\'h2, 16\'h3, 16\'h4};
start = 1;
#10
wait(done);
#3
W = {16\'sh1, 16\'sh2, 16\'sh3, 16\'sh4};
X = {16\'h1, 16\'h2, 16\'h3, 16\'h4};
wait(done);
#3
W = {-16\'sh1, -16\'sh2, -16\'sh3, -16\'sh4};
X = {-16\'sh1, -16\'sh2, -16\'sh3, -16\'sh4};
wait(done);
#3
W = {-16\'sh1, 16\'h2, 16\'h3, -16\'sh4};
X = {16\'h1, -16\'sh2, -16\'sh3, 16\'h4};
wait(done);
end
initial begin
repeat(100)
#5 clk <= ~clk;
end
/*
always @(posedge clk) begin
$display(\"output is = %h \" uut.pe.mac);
end
*/
initial begin
$dumpfile(\"wave.vcd\");
$dumpvars(0, sys_array_tb);
end
endmodule
module systolic_array #(
parameter M = 4,
parameter N = 4,
parameter DW = 16
)
(
input clk,
input reset,
input start,
output reg done,
input signed [DW-1:0] W [M*N-1:0],
input signed [DW-1:0] X [M*N-1:0],
output signed [2*DW:0] Y [M*N-1:0]
);
reg calc_done;
reg signed [DW-1:0] A_reg [M-1:0][N-1:0];
reg signed [DW-1:0] B_reg [M-1:0][N-1:0];
reg signed [2*DW:0] C_reg [M-1:0][N-1:0];
wire [DW-1:0] a_ins [M-1:0][N:0];
wire [DW-1:0] b_ins [M:0][N-1:0];
wire [2*DW:0] c_outs [M-1:0][N-1:0];
integer m,n;
always @(posedge clk) begin
if(reset) begin
m = 0;
n = 0 ;
calc_done = 0;
done = 0;
// count <= 0;
for(m=0;m<=M;m=m+1) begin
for(n=0;n<=N;n=n+1) begin
A_reg[m][n] = 16\'d0;
B_reg[m][n] = 16\'d0;
C_reg[m][n] = 33\'d0;
end
end
end
else begin
if (start) begin
for(m=0;m<=M;m=m+1) begin
for(n=0;n<=N;n=n+1) begin
A_reg[m][n] = W[(2*m)+n];
B_reg[m][n] = X[(2*m)+n];
C_reg[m][n] = 33\'d0;
end
end
if(!calc_done) begin
C_reg[m][n] = c_outs[m][n];
if(n == N) begin
n = 0;
if (m == M) begin
m = 0;
calc_done = 1;
end
else m = m + 1;
end
else n = n + 1;
end else if(calc_done) begin
for(m=0;m<=M;m=m+1) begin //run through the rows
for(n=0;n<=N;n=n+1) begin //run through the columns
Y[(2*m)+n] = C_reg[m][n];
//Y[m][n] = C_reg[m][n];
end
end
done = 1;
end
end
end
end
genvar i,j;
generate
for (i = 0; i < M; i = i+1) begin : Rows
for (j = 0; j < N; j = j+1) begin : Columns
//if(i == M-1 || j == N-1) begin
//end
PE #(.DW(DW)) pe (
.clk(clk),
.reset(reset),
.x_i(a_ins[j]),
.y_i(b_ins[j]),
.x_o(a_ins[j+1]),
.y_o(b_ins[i+1][j]),
.mac(c_outs[j])
);
end
end
for (i = 0; i < M; i = i +1) begin : Left
assign a_ins[0] = A_reg[0];
end
for (j = 0; j < N; j = j +1) begin : Top
assign b_ins[0][j] = B_reg[0][j];
end
endgenerate
endmodule
module PE #(
parameter DW = 16
)
(
input clk,
input reset,
input signed[DW-1:0] x_i,
input signed[DW-1:0] y_i,
output reg signed[DW-1:0] x_o,
output reg signed[DW-1:0] y_o,
output reg signed[2*DW:0] mac
);
reg signed[DW-1:0] x_reg,y_reg;
reg signed[2*DW:0] mac_reg;
wire signed [2*DW:0] multiply;
assign multiply = x_i * y_i;
always @(posedge clk or posedge reset) begin
if (reset) begin
x_reg <= 0;
y_reg <= 0;
mac_reg <= 0;
end
else begin
x_reg <= x_i;
y_reg <= y_i;
mac_reg <= mac_reg + multiply;
$display (\"time: %t, x = %d, y = %d ,mac is %d\", $time ,x_i,y_i,mac);
end
end
assign x_o = x_reg;
assign y_o = y_reg;
assign mac = mac_reg;
endmodule
And as you might noticed the value of 16\'h1 never fed to the inputs (x & y). Could you please tell me where I\'m doing wrong?