矩阵扫描

本文最后更新于:2024年12月20日 下午

实现一个Zigzag扫描算法,将8x8矩阵按“之”字形的顺序输出为一维数组。

(和上一个排序算法一样,都是霍夫曼编码之前的预处理操作,怀疑老师出题的时候手上正好在做霍夫曼编码?)

一眼扫过去,看到题干中要求使用SRAM缓存输入数据,那应该就需要例化一个SRAM的IP了,做了这么多次作业,也确实可以考虑用一下IP核了,有现成的轮子不用,这不是自找麻烦吗。

算法实现

输入数据是从左往右、从上到下依次按顺序排列,这也符合C语言中矩阵存储时的地址构造。

看起来像是一个数学中的找规律问题,就是计算出从左上角一直输出到右下角的数据地址,对于8x8的输入矩阵,直接给出每个输出元素的一地址如下,用verilog写一个查找表就可以了,不用想太多。

按照题目要求,先在vivado中例化一个单端口RAM的IP,可以在IP Catalog中搜block memory找到。

在代码中按照每个时钟周期一个数据的顺序往RAM中写入输入数据,接着再从查找表中也按照每个时钟周期一个数据的顺序从BRAM中读取数据输出即可,过程并不复杂,但需要认真规划好数据读写的时序,比如RAM写数据需要将数据地址与输入数据在同一时钟给出;读数据时根据你对IP核的配置,可能是给出读数据地址的下一个周期读出数据,也可能是下两个周期读出数据。

代码+仿真

代码不难,分为两个模块,一个是读写ram模块ram_rw,一个是顶层模块mat_scan。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
module ram_rw
(
input wire clk,
input wire rst_n,
input wire [9:0] din,
input wire [5:0] addr,
input wire vld_in,

output wire [9:0] dout,
output reg data_ready
);


reg [5:0] input_cnt; //当连续写入64个数据之后,data_ready信号拉高
reg [5:0] ready_cnt; //data_ready信号需要拉高用于数据输出
reg cnt_flag;

blk_mem_gen_0 blk_mem_gen_0_inst(
.clka (clk) ,
.ena (1'b1) ,
.wea (vld_in) ,
.addra(addr) ,
.dina (din) ,
.douta(dout)
);

//利用四个always块生成data ready信号
always @(posedge clk or negedge rst_n) begin
if((!rst_n) || (input_cnt == 6'd63))
input_cnt <= 6'd0;
else if(vld_in)
input_cnt <= input_cnt + 1'b1;
else
input_cnt <= 6'd0;
end

always @(posedge clk or negedge rst_n) begin
if(!rst_n)
cnt_flag <= 1'b0;
else if(input_cnt == 6'd63)
cnt_flag <= 1'b1;
else if(ready_cnt == 6'd63)
cnt_flag <= 1'b0;
else
cnt_flag <= cnt_flag;
end

always @(posedge clk or negedge rst_n) begin
if((!rst_n) || (ready_cnt == 6'd63))
ready_cnt <= 6'd0;
else if(cnt_flag || (input_cnt == 6'd63))
ready_cnt <= ready_cnt + 1'b1;
else
ready_cnt <= ready_cnt;
end

always @(posedge clk or negedge rst_n) begin
if(!rst_n)
data_ready <= 1'b0;
else if((cnt_flag) || (input_cnt == 6'd63)) //当64个周期过去时,data_ready拉高64个时钟,用来输出
data_ready <= 1'b1;
else
data_ready <= 1'b0;
end


endmodule

VERILOG
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
module mat_scan
(
input wire clk,
input wire rst_n,
input wire vld_in,
input wire [9:0] din,

output wire [9:0] dout,
output reg vld_out
);

wire data_ready;

reg [5:0] input_addr; //输入数据的地址
reg [5:0] output_addr; //输出数据的地址
reg [5:0] addr; //传给sram的地址
reg [5:0] addr_cnt; //计算地址的计数器

reg vld_out_temp[0:2]; //给data ready信号打三拍
reg vld_in_temp; //给vld_in信号打一拍

ram_rw ram_rw_inst (
.clk(clk),
.rst_n(rst_n),
.din(din),
.addr(addr),
.vld_in(vld_in_temp),
.dout(dout),
.data_ready(data_ready)
);

always @(posedge clk or negedge rst_n) begin
if(!rst_n)begin
vld_in_temp <= 1'b0;
end
else begin
vld_in_temp <= vld_in;
end
end

always @(posedge clk or negedge rst_n) begin
if((!rst_n || (input_addr == 6'd63)))
input_addr <= 6'd0;
else if(vld_in)
input_addr <= input_addr + 1'b1;
else
input_addr <= input_addr;
end

always @(posedge clk or negedge rst_n) begin
if((!rst_n || (addr_cnt == 6'd63)))
addr_cnt <= 6'd0;
else if(vld_out_temp[0])
addr_cnt <= addr_cnt + 1'b1;
else
addr_cnt <= addr_cnt;
end

always @(posedge clk or negedge rst_n) begin
if(!rst_n)
output_addr <= 6'd0;
else begin
case (addr_cnt)
6'd0: output_addr <= 6'd0;
6'd1: output_addr <= 6'd1;
6'd2: output_addr <= 6'd8;
6'd3: output_addr <= 6'd16;
6'd4: output_addr <= 6'd9;
6'd5: output_addr <= 6'd2;
6'd6: output_addr <= 6'd3;
6'd7: output_addr <= 6'd10;
6'd8: output_addr <= 6'd17;
6'd9: output_addr <= 6'd24;
6'd10: output_addr <= 6'd32;
6'd11: output_addr <= 6'd25;
6'd12: output_addr <= 6'd18;
6'd13: output_addr <= 6'd11;
6'd14: output_addr <= 6'd4;
6'd15: output_addr <= 6'd5;
6'd16: output_addr <= 6'd12;
6'd17: output_addr <= 6'd19;
6'd18: output_addr <= 6'd26;
6'd19: output_addr <= 6'd33;
6'd20: output_addr <= 6'd40;
6'd21: output_addr <= 6'd48;
6'd22: output_addr <= 6'd41;
6'd23: output_addr <= 6'd34;
6'd24: output_addr <= 6'd27;
6'd25: output_addr <= 6'd20;
6'd26: output_addr <= 6'd13;
6'd27: output_addr <= 6'd6;
6'd28: output_addr <= 6'd7;
6'd29: output_addr <= 6'd14;
6'd30: output_addr <= 6'd21;
6'd31: output_addr <= 6'd28;
6'd32: output_addr <= 6'd35;
6'd33: output_addr <= 6'd42;
6'd34: output_addr <= 6'd49;
6'd35: output_addr <= 6'd56;
6'd36: output_addr <= 6'd57;
6'd37: output_addr <= 6'd50;
6'd38: output_addr <= 6'd43;
6'd39: output_addr <= 6'd36;
6'd40: output_addr <= 6'd29;
6'd41: output_addr <= 6'd22;
6'd42: output_addr <= 6'd15;
6'd43: output_addr <= 6'd23;
6'd44: output_addr <= 6'd30;
6'd45: output_addr <= 6'd37;
6'd46: output_addr <= 6'd44;
6'd47: output_addr <= 6'd51;
6'd48: output_addr <= 6'd58;
6'd49: output_addr <= 6'd59;
6'd50: output_addr <= 6'd52;
6'd51: output_addr <= 6'd45;
6'd52: output_addr <= 6'd38;
6'd53: output_addr <= 6'd31;
6'd54: output_addr <= 6'd39;
6'd55: output_addr <= 6'd46;
6'd56: output_addr <= 6'd53;
6'd57: output_addr <= 6'd60;
6'd58: output_addr <= 6'd61;
6'd59: output_addr <= 6'd54;
6'd60: output_addr <= 6'd47;
6'd61: output_addr <= 6'd55;
6'd62: output_addr <= 6'd62;
6'd63: output_addr <= 6'd63;
endcase
end
end

always @(posedge clk or negedge rst_n) begin
if(!rst_n)begin
vld_out_temp[0] <= 1'b0;
vld_out_temp[1] <= 1'b0;
vld_out_temp[2] <= 1'b0;
vld_out <= 1'b0;
end
else begin
vld_out_temp[0] <= data_ready;
vld_out_temp[1] <= vld_out_temp[0];
vld_out_temp[2] <= vld_out_temp[1];
vld_out <= vld_out_temp[2];
end
end

always @(posedge clk or negedge rst_n) begin
if(!rst_n)
addr <= 6'd0;
else if(vld_in)
addr <= input_addr;
else if(vld_out_temp[1])
addr <= output_addr;
else
addr <= 6'd0;
end

endmodule

VERILOG

testbench如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
`timescale 1ns/1ns
module mat_scan_tb();

// Parameters

//Ports
reg clk;
reg rst_n;
reg vld_in;
reg [9:0] din;
wire [9:0] dout;
wire vld_out;

mat_scan mat_scan_inst (
.clk(clk),
.rst_n(rst_n),
.vld_in(vld_in),
.din(din),
.dout(dout),
.vld_out(vld_out)
);

always #10 clk = ! clk ;

initial begin
clk <= 1'b0;
rst_n <= 1'b0;
vld_in <= 1'b0;
din <= 10'd0;
#110
rst_n <= 1'b1;
vld_in <= 1'b1;
#20
din <= 10'd1;
#20
din <= 10'd2;
#20
din <= 10'd3;
#20
din <= 10'd4;
#20
din <= 10'd5;
#20
din <= 10'd6;
#20
din <= 10'd7;
#20
din <= 10'd8;
#20
din <= 10'd9;
#20
din <= 10'd10;
#20
din <= 10'd11;
#20
din <= 10'd12;
#20
din <= 10'd13;
#20
din <= 10'd14;
#20
din <= 10'd15;
#20
din <= 10'd16;
#20
din <= 10'd17;
#20
din <= 10'd18;
#20
din <= 10'd19;
#20
din <= 10'd20;
#20
din <= 10'd21;
#20
din <= 10'd22;
#20
din <= 10'd23;
#20
din <= 10'd24;
#20
din <= 10'd25;
#20
din <= 10'd26;
#20
din <= 10'd27;
#20
din <= 10'd28;
#20
din <= 10'd29;
#20
din <= 10'd30;
#20
din <= 10'd31;
#20
din <= 10'd32;
#20
din <= 10'd33;
#20
din <= 10'd34;
#20
din <= 10'd35;
#20
din <= 10'd36;
#20
din <= 10'd37;
#20
din <= 10'd38;
#20
din <= 10'd39;
#20
din <= 10'd40;
#20
din <= 10'd41;
#20
din <= 10'd42;
#20
din <= 10'd43;
#20
din <= 10'd44;
#20
din <= 10'd45;
#20
din <= 10'd46;
#20
din <= 10'd47;
#20
din <= 10'd48;
#20
din <= 10'd49;
#20
din <= 10'd50;
#20
din <= 10'd51;
#20
din <= 10'd52;
#20
din <= 10'd53;
#20
din <= 10'd54;
#20
din <= 10'd55;
#20
din <= 10'd56;
#20
din <= 10'd57;
#20
din <= 10'd58;
#20
din <= 10'd59;
#20
din <= 10'd60;
#20
din <= 10'd61;
#20
din <= 10'd62;
#20
din <= 10'd63;
#20
din <= 10'd64;
vld_in <= 1'b0;

end

endmodule
VERILOG

modelsim仿真结果如下:

需要说明的一点是,Vivado和model Sim联合仿真的时候,每次修改了代码之后,在modelsim里面右击testbench文件recompare和simulate,进行重新仿真时,在没用IP的时候,它是能够重新仿真的,但是我用了IP之后点simulate,它会显示IP模块无法载入,就是找不到

于是我研究了一会,发现每次从vivado调用modelsim进行仿真的时候都会执行一个mat_scan_tb_simulate.do的脚本文件,它位于工程文件夹work3.sim_1,里面是这样的:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
######################################################################
#
# File name : mat_scan_tb_simulate.do
# Created on: Fri Dec 20 16:06:54 +0800 2024
#
# Auto generated by Vivado for 'behavioral' simulation
#
######################################################################
vsim -voptargs="+acc" -L xil_defaultlib -L blk_mem_gen_v8_4_4 -L unisims_ver -L unimacro_ver -L secureip -L xpm -lib xil_defaultlib xil_defaultlib.mat_scan_tb xil_defaultlib.glbl

set NumericStdNoWarnings 1
set StdArithNoWarnings 1

do {mat_scan_tb_wave.do}

view wave
view structure
view signals

do {mat_scan_tb.udo}

run 1000ns

BASH

具体执行仿真操作的就是vsim那一句指令,然后当我们回顾上面点击simulate时执行的指令就会发现里面很明显少了-L blk_mem_gen_v8_4_4这个选项,我怀疑问题就出在这里。

我测试了一下,果不其然,点击simulate选项执行的指令默认不带ip核的库,也就是少了-L blk_mem_gen_v8_4_4这个选项,当我加上这个选项,也就是执行下面这句:

1
vsim -voptargs=+acc xil_defaultlib.mat_scan_tb -L blk_mem_gen_v8_4_4
BASH

是能够正常仿真出波形的,没有报错信息,目前没找到在哪里能够设置点击simulate时自动带上IP库的,我的建议,每次跑仿真时都到.sim_1_tb_simulate.do的脚本,把里面涉及vsim的那一句copy下来。然后当你需要在modelsim中重新跑仿真时,直接在命令行粘贴一下,就不要右键tb文件点击simulate了。

不过如果有大佬知道如何设置,麻烦在评论区告诉我,谢谢♪(・ω・)ノ。


矩阵扫描
http://example.com/2024/12/20/矩阵扫描/
作者
叶逸昇
发布于
2024年12月20日
许可协议