FPGA滤波器实施概述

我协助我的一位研究生进行了一些有关FPGA滤波器实现的定向研究。她的任务是了解并实现FPGA硬件上的几种类型的过滤器。设计的所有滤波器均为15阶滤波器,并使用16位定点数学运算。然后检查了过滤器的性能和资源使用情况。她关于研究的最终演讲可以在下面查看:

FPGA滤波器实现

研究项目期间创建的Verilog源文件如下。

FIR滤波器

FIR滤波器是四个滤波器中最简单,最快的。它利用了预加器的对称性。而且,使用加法器树来最小化组合路径延迟。

FIR_Filter.v
`define FILT_LENGTH 16
 
module FIR_Filter(
    input  clk,
    input  en,
    input  [15:0]din,
    output [15:0]dout
    );
 
    reg signed [15:0]coeff[`FILT_LENGTH/2-1:0]; //Filter coefficients.
    reg signed [15:0]delay_line[`FILT_LENGTH-1:0]; //Input delay line.
    wire [31:0]accum; //Accumulator for output filter calculation.    
    integer i; //Initialization integer.
    genvar c;  //Delay line generation variable.
 
    reg signed [16:0]preadd_regs[7:0]; //Save calc after preadd.
    reg signed [31:0]mult_regs[7:0];   //Save calc after multiplication.
    reg signed [31:0]tree1_regs[3:0];  //Save calc after first layer of adder tree.
    reg signed [31:0]tree2_regs[1:0];  //Save calc after first layer of adder tree.
    reg signed [31:0]treeout_reg;      //Save calc after complete.
 
    //assign dout = treeout_reg[31:16];
 
    //Calculate value in the accumulator.
    assign accum = (delay_line[0] + delay_line[15]) * coeff[0] +
                   (delay_line[1] + delay_line[14]) * coeff[1] +
                   (delay_line[2] + delay_line[13]) * coeff[2] +
                   (delay_line[3] + delay_line[12]) * coeff[3] +
                   (delay_line[4] + delay_line[11]) * coeff[4] +
                   (delay_line[5] + delay_line[10]) * coeff[5] +
                   (delay_line[6] + delay_line[9])  * coeff[6] +
                   (delay_line[7] + delay_line[8])  * coeff[7];
 
    //Assign upper 16-bits to output.
    assign dout = accum[31:16];
 
    initial begin   
        //Load the filter coefficients.
        coeff[0] = 16'd2320;
        coeff[1] = 16'd4143;
        coeff[2] = 16'd4592;
        coeff[3] = 16'd7278;
        coeff[4] = 16'd8423;
        coeff[5] = 16'd10389;
        coeff[6] = 16'd11269;
        coeff[7] = 16'd12000;
 
        //Initialize delay line.
        for(i = 0; i < `FILT_LENGTH; i = i+1'b1) begin
            delay_line[i] = 16'd0;
        end
 
        //Initialize the preadder regs.
        for(i = 0; i < 8; i = i+1'b1) begin
            preadd_regs[i] = 17'd0;
        end
 
        //Initialize the multiplier regs.
        for(i = 0; i < 8; i = i+1'b1) begin
            mult_regs[i] = 32'd0;
        end
 
        //Initialize the first layer of the adder tree regs.
        for(i = 0; i < 4; i = i+1'b1) begin
            tree1_regs[i] = 32'd0;
        end
 
        //Initialize the second layer of the adder tree regs.
        for(i = 0; i < 2; i = i+1'b1) begin
            tree2_regs[i] = 32'd0;
        end        
 
        //Initialize the adder tree output reg.
        treeout_reg = 32'd0;
    end
 
    //Advance the data through the delay line every clock cycle.
    generate
        for (c = `FILT_LENGTH-1; c > 0; c = c - 1) begin: inc_delay
            always @(posedge clk) begin
                if(en) delay_line[c] <= delay_line[c-1];
            end
        end
    endgenerate
 
    //Update with input data.
    always @(posedge clk) begin
        if(en) delay_line[0] <= din;
    end
endmodule


filter_top.v
module filter_top(
    input clk,
    input [7:0]sw,
    output [7:0]JA
    );
 
    wire [15:0]tout; //The output of the tone lookup table.
    wire [15:0]fout; //The output of the FIR filter.
    wire [15:0]mout; //Selected output.
    wire en;         //Clock enble.
    wire clk_out;    //264MHz System clock.
 
    assign en = 1'b1;
 
    clk_wiz_0 cw (.clk_in1(clk), .clk_out1(clk_out));
    //clk_en ce(.clk(clk_out), .en(en));
    ToneGen tg(.clk(clk_out), .en(en), .dout(tout));
    FIR_Filter ff(.clk(clk_out), .en(en), .din(tout), .dout(fout));
 
    //Choose filtered or unfiltered output based on sw0.
    assign mout = sw[0] ? fout : tout;
 
    //Take only the upper 8 bits and make it unsigned.
    //assign JA = mout[15:8] + 8'd128;
    assign {JA[3:0], JA[7:4]} = mout[15:8] + 8'd128;
 
endmodule


ToneGen.v
`timescale 1ns/1ps
 
module ToneGen(
    input clk,
    input en,
    output [15:0]dout
    );
 
    //Tone generation data.  At a 44KHz sampling rate, the lookup table
    //generates a 440Hz and 4.4KHz tone mixed together.
    reg [15:0]tonetbl[99:0];
 
    //Pointer into the tone table.
    reg [6:0]toneptr = 7'h00;
 
    initial begin
        tonetbl[0] = 10970;
        tonetbl[1] = 18151;
        tonetbl[2] = 19197;
        tonetbl[3] = 14105;
        tonetbl[4] = 5211;
        tonetbl[5] = -3704;
        tonetbl[6] = -8858;
        tonetbl[7] = -7914;
        tonetbl[8] = -876;
        tonetbl[9] = 9912;
        tonetbl[10] = 20660;
        tonetbl[11] = 27581;
        tonetbl[12] = 28330;
        tonetbl[13] = 22905;
        tonetbl[14] = 13642;
        tonetbl[15] = 4326;
        tonetbl[16] = -1260;
        tonetbl[17] = -780;
        tonetbl[18] = 5767;
        tonetbl[19] = 16037;
        tonetbl[20] = 26244;
        tonetbl[21] = 32601;
        tonetbl[22] = 32767;
        tonetbl[23] = 26741;
        tonetbl[24] = 16863;
        tonetbl[25] = 6918;
        tonetbl[26] = 692;
        tonetbl[27] = 527;
        tonetbl[28] = 6421;
        tonetbl[29] = 16037;
        tonetbl[30] = 25590;
        tonetbl[31] = 31295;
        tonetbl[32] = 30814;
        tonetbl[33] = 24149;
        tonetbl[34] = 13642;
        tonetbl[35] = 3081;
        tonetbl[36] = -3745;
        tonetbl[37] = -4494;
        tonetbl[38] = 837;
        tonetbl[39] = 9912;
        tonetbl[40] = 18947;
        tonetbl[41] = 24161;
        tonetbl[42] = 23217;
        tonetbl[43] = 16119;
        tonetbl[44] = 5211;
        tonetbl[45] = -5718;
        tonetbl[46] = -12878;
        tonetbl[47] = -13924;
        tonetbl[48] = -8853;
        tonetbl[49] = 0;
        tonetbl[50] = 8853;
        tonetbl[51] = 13924;
        tonetbl[52] = 12878;
        tonetbl[53] = 5718;
        tonetbl[54] = -5211;
        tonetbl[55] = -16119;
        tonetbl[56] = -23217;
        tonetbl[57] = -24161;
        tonetbl[58] = -18947;
        tonetbl[59] = -9912;
        tonetbl[60] = -837;
        tonetbl[61] = 4494;
        tonetbl[62] = 3745;
        tonetbl[63] = -3081;
        tonetbl[64] = -13642;
        tonetbl[65] = -24149;
        tonetbl[66] = -30814;
        tonetbl[67] = -31295;
        tonetbl[68] = -25590;
        tonetbl[69] = -16037;
        tonetbl[70] = -6421;
        tonetbl[71] = -527;
        tonetbl[72] = -692;
        tonetbl[73] = -6918;
        tonetbl[74] = -16863;
        tonetbl[75] = -26741;
        tonetbl[76] = -32767;
        tonetbl[77] = -32601;
        tonetbl[78] = -26244;
        tonetbl[79] = -16037;
        tonetbl[80] = -5767;
        tonetbl[81] = 780;
        tonetbl[82] = 1260;
        tonetbl[83] = -4326;
        tonetbl[84] = -13642;
        tonetbl[85] = -22905;
        tonetbl[86] = -28330;
        tonetbl[87] = -27581;
        tonetbl[88] = -20660;
        tonetbl[89] = -9912;
        tonetbl[90] = 876;
        tonetbl[91] = 7914;
        tonetbl[92] = 8858;
        tonetbl[93] = 3704;
        tonetbl[94] = -5211;
        tonetbl[95] = -14105;
        tonetbl[96] = -19197;
        tonetbl[97] = -18151;
        tonetbl[98] = -10970;
        tonetbl[99] = 0;
    end
 
    //Assign the output to the value in the lookup table
    //pointed to by toneptr.
    assign dout = tonetbl[toneptr];
 
    //Increment through the tone lookup table and wrap back
    //to zero when at the end.
    always @(posedge clk) begin
        if(en) begin
            if(toneptr == 7'd99) begin
                toneptr <= 7'd0;
            end
            else begin
                toneptr <= toneptr + 1'b1;
            end
        end
    end
endmodule


clk_en.v
module clk_en(
    input clk,
    output reg en = 1'b0
    );
 
    reg [15:0]en_counter = 16'h0;
 
    always @(posedge clk) begin
        if(en_counter == 16'd5999) begin
            en_counter <= 16'h0;
            en <= 1'b1;
        end
        else begin
            en_counter <= en_counter + 1'b1;
            en <= 1'b0;
        end
    end
 
endmodule


TB_ToneGen.v
module TB_ToneGen;
 
    reg clk = 1'b0;
    reg sw0 = 1'b1;
    wire [7:0]JA;
 
    wire en;    
    //wire [15:0]cout;
 
    filter_top uut(.clk(clk), .sw({15'h00,sw0}), .JA({JA[3:0], JA[7:4]}));
 
    assign en = uut.en;
    //assign cout = uut.ce.en_counter;
 
    //initial begin
    //     #600000 sw0 = 1'b1;
    //end
 
    //100MHz clock
    always #5 clk = ~clk;
endmodule



LMS自适应滤波器

LMS自适应滤波器在单个时钟周期内完成其输出计算和权重更新。由于其单周期运行,因此存在大量的组合延迟。该滤波器只能以FIR滤波器速度的一小部分运行。

LMS_Adapt.v
`define ADAPT_FILT_LENGTH 16
module LMS_Adapt(
    input  clk,
    input  en,
    input  [15:0]din,
    input  signed [15:0]desired,
    output signed [15:0]dout,
    output signed [15:0]err
    );
 
    parameter MU = 16'd32767;
 
    reg  signed [15:0]mu = MU;
    reg  signed [15:0]coeff[`ADAPT_FILT_LENGTH-1:0]; //Filter coefficients.
    reg  signed [15:0]delay_line[`ADAPT_FILT_LENGTH-1:0]; //Input delay line.
    wire signed [31:0]accum; //Accumulator for output filter calculation.
    wire signed [31:0]stepped; //Error after step sized applied.    
    integer i; //Initialization integer.
    genvar c;  //Delay line generation variable.
 
    //Calculate the error and multiply it by the step size.
    assign err = desired - dout;   
    //assign stepped = err * mu;
    assign stepped = err << 15;
 
    //Calculate value in the accumulator.
    assign accum = delay_line[0]*coeff[0] +
                   delay_line[1]*coeff[1] +
                   delay_line[2]*coeff[2] +
                   delay_line[3]*coeff[3] +
                   delay_line[4]*coeff[4] +
                   delay_line[5]*coeff[5] +
                   delay_line[6]*coeff[6] +
                   delay_line[7]*coeff[7] +
                   delay_line[8]*coeff[8] +
                   delay_line[9]*coeff[9] +
                   delay_line[10]*coeff[10] +
                   delay_line[11]*coeff[11] +
                   delay_line[12]*coeff[12] +
                   delay_line[13]*coeff[13] +
                   delay_line[14]*coeff[14] +
                   delay_line[15]*coeff[15];
 
    //Assign upper 16-bits to output.
    assign dout = accum[31:16];
 
    initial begin   
        //Load the filter coefficients.
        coeff[0]  = 20'd0;
        coeff[1]  = 20'd0;
        coeff[2]  = 20'd0;
        coeff[3]  = 20'd0;
        coeff[4]  = 20'd0;
        coeff[5]  = 20'd0;
        coeff[6]  = 20'd0;
        coeff[7]  = 20'd0;
        coeff[8]  = 20'd0;
        coeff[9]  = 20'd0;
        coeff[10] = 20'd0;
        coeff[11] = 20'd0;
        coeff[12] = 20'd0;
        coeff[13] = 20'd0;
        coeff[14] = 20'd0;
        coeff[15] = 20'd0;
 
        //Initialize delay line.
        for(i = 0; i < `ADAPT_FILT_LENGTH; i = i+1'b1) begin
            delay_line[i] = 16'd0;
        end
    end
 
    //Advance the data through the delay line every clock cycle.
    generate
        for (c = `ADAPT_FILT_LENGTH-1; c > 0; c = c - 1) begin: inc_delay
            always @(posedge clk) begin
                if(en) delay_line[c] <= delay_line[c-1];
            end
        end
    endgenerate
 
    //Update with input data.
    always @(posedge clk) begin
        if(en) delay_line[0] <= din;
    end
 
    wire signed [31:0]num2[`ADAPT_FILT_LENGTH-1:0];
    wire signed [15:0]n1[`ADAPT_FILT_LENGTH-1:0];
    wire signed [15:0]n2[`ADAPT_FILT_LENGTH-1:0];
 
    //Update the coefficients with the LMS algorithm.
    generate
        for (c = `ADAPT_FILT_LENGTH; c > 0; c = c - 1) begin: adapt_update
            assign n1[c-1] = stepped[31:16];
 
            assign num2[c-1] = n1[c-1]*delay_line[c-1];
            assign n2[c-1] = num2[c-1][31:16];
 
            always @(posedge clk) begin
                if(en) coeff[c-1] <= coeff[c-1] + n2[c-1];
            end
        end
    endgenerate
endmodule


filter_top.v
module filter_top(
    input clk,
    input [7:0]sw,
    output [7:0]JA
    );
 
    wire signed [15:0]tout;           //Output of the tone lookup table.
    wire [15:0]fout;                  //Output of the FIR filter.
    wire [15:0]aout;                  //Output of the adaptive filter.
    wire [15:0]err;                   //Adaptive filter error.
    wire [15:0]lfsr_out;              //16-bit Random noise variable.
    wire en;                          //Clock enble.
    wire [9:0]rnd;                    //10-bit noise.
    wire signed [15:0]tone_and_noise; //Combination of tone and noise.
    wire signed [15:0]delay_out;      //Output of the delay line.    
    wire [15:0]mout;                  //Selected desired output.
    wire clk19_8MHz;                  //19.8MHz system clock.
 
    assign rnd = lfsr_out[15:6];
    assign tone_and_noise = tout + rnd;
    //assign en = 1'b1;
 
    clk_wiz_0 cw(.clk_in1(clk), .clk_out1(clk19_8MHz), .reset(1'b0));
    ToneGen tg(.clk(clk19_8MHz), .en(en), .dout(tout));
    FIR_Filter ff(.clk(clk19_8MHz), .en(en), .din(tone_and_noise), .dout(fout));
    LMS_Adapt af(.clk(clk19_8MHz), .en(en), .din(tone_and_noise), .desired(mout), .dout(aout), .err(err));
    lfsr sr(.clk(clk19_8MHz), .en(en), .lfsr_out(lfsr_out));
    clk_en ce(.clk(clk19_8MHz), .en(en));
 
    //Choose filtered or unfiltered output based on sw0.
    assign mout = sw[0] ? fout : tone_and_noise;//delay_out;
 
    //Take only the upper 8 bits and make it unsigned.
    assign {JA[3:0], JA[7:4]} = aout[15:8] + 8'd128;
    //assign JA[7:0] = aout[15:8] + 8'd128;
 
    //assign {JA[3:0], JA[7:4]} = sw[7:0];   
endmodule


ToneGen.v
`timescale 1ns/1ps
 
module ToneGen(
    input clk,
    input en,
    output [15:0]dout
    );
 
    //Tone generation data.  At a 44KHz sampling rate, the lookup table
    //generates a 440Hz and 4.4KHz tone mixed together.
    reg [15:0]tonetbl[99:0];
 
    //Pointer into the tone table.
    reg [6:0]toneptr = 7'h00;
 
    initial begin
        tonetbl[0] = 10970;
        tonetbl[1] = 18151;
        tonetbl[2] = 19197;
        tonetbl[3] = 14105;
        tonetbl[4] = 5211;
        tonetbl[5] = -3704;
        tonetbl[6] = -8858;
        tonetbl[7] = -7914;
        tonetbl[8] = -876;
        tonetbl[9] = 9912;
        tonetbl[10] = 20660;
        tonetbl[11] = 27581;
        tonetbl[12] = 28330;
        tonetbl[13] = 22905;
        tonetbl[14] = 13642;
        tonetbl[15] = 4326;
        tonetbl[16] = -1260;
        tonetbl[17] = -780;
        tonetbl[18] = 5767;
        tonetbl[19] = 16037;
        tonetbl[20] = 26244;
        tonetbl[21] = 32601;
        tonetbl[22] = 32767;
        tonetbl[23] = 26741;
        tonetbl[24] = 16863;
        tonetbl[25] = 6918;
        tonetbl[26] = 692;
        tonetbl[27] = 527;
        tonetbl[28] = 6421;
        tonetbl[29] = 16037;
        tonetbl[30] = 25590;
        tonetbl[31] = 31295;
        tonetbl[32] = 30814;
        tonetbl[33] = 24149;
        tonetbl[34] = 13642;
        tonetbl[35] = 3081;
        tonetbl[36] = -3745;
        tonetbl[37] = -4494;
        tonetbl[38] = 837;
        tonetbl[39] = 9912;
        tonetbl[40] = 18947;
        tonetbl[41] = 24161;
        tonetbl[42] = 23217;
        tonetbl[43] = 16119;
        tonetbl[44] = 5211;
        tonetbl[45] = -5718;
        tonetbl[46] = -12878;
        tonetbl[47] = -13924;
        tonetbl[48] = -8853;
        tonetbl[49] = 0;
        tonetbl[50] = 8853;
        tonetbl[51] = 13924;
        tonetbl[52] = 12878;
        tonetbl[53] = 5718;
        tonetbl[54] = -5211;
        tonetbl[55] = -16119;
        tonetbl[56] = -23217;
        tonetbl[57] = -24161;
        tonetbl[58] = -18947;
        tonetbl[59] = -9912;
        tonetbl[60] = -837;
        tonetbl[61] = 4494;
        tonetbl[62] = 3745;
        tonetbl[63] = -3081;
        tonetbl[64] = -13642;
        tonetbl[65] = -24149;
        tonetbl[66] = -30814;
        tonetbl[67] = -31295;
        tonetbl[68] = -25590;
        tonetbl[69] = -16037;
        tonetbl[70] = -6421;
        tonetbl[71] = -527;
        tonetbl[72] = -692;
        tonetbl[73] = -6918;
        tonetbl[74] = -16863;
        tonetbl[75] = -26741;
        tonetbl[76] = -32767;
        tonetbl[77] = -32601;
        tonetbl[78] = -26244;
        tonetbl[79] = -16037;
        tonetbl[80] = -5767;
        tonetbl[81] = 780;
        tonetbl[82] = 1260;
        tonetbl[83] = -4326;
        tonetbl[84] = -13642;
        tonetbl[85] = -22905;
        tonetbl[86] = -28330;
        tonetbl[87] = -27581;
        tonetbl[88] = -20660;
        tonetbl[89] = -9912;
        tonetbl[90] = 876;
        tonetbl[91] = 7914;
        tonetbl[92] = 8858;
        tonetbl[93] = 3704;
        tonetbl[94] = -5211;
        tonetbl[95] = -14105;
        tonetbl[96] = -19197;
        tonetbl[97] = -18151;
        tonetbl[98] = -10970;
        tonetbl[99] = 0;
    end
 
    //Assign the output to the value in the lookup table
    //pointed to by toneptr.
    assign dout = tonetbl[toneptr];
 
    //Increment through the tone lookup table and wrap back
    //to zero when at the end.
    always @(posedge clk) begin
        if(en) begin
            if(toneptr == 7'd99) begin
                toneptr <= 7'd0;
            end
            else begin
                toneptr <= toneptr + 1'b1;
            end
        end
    end
endmodule


FIR_Filter.v
`define FILT_LENGTH 16
 
module FIR_Filter(
    input  clk,
    input  en,
    input  [15:0]din,
    output [15:0]dout
    );
 
    reg signed [15:0]coeff[`FILT_LENGTH/2-1:0]; //Filter coefficients.
    reg signed [15:0]delay_line[`FILT_LENGTH-1:0]; //Input delay line.
    wire [31:0]accum; //Accumulator for output filter calculation.    
    integer i; //Initialization integer.
    genvar c;  //Delay line generation variable.
 
    //Calculate value in the accumulator.
    assign accum = (delay_line[0] + delay_line[15]) * coeff[0] +
                   (delay_line[1] + delay_line[14]) * coeff[1] +
                   (delay_line[2] + delay_line[13]) * coeff[2] +
                   (delay_line[3] + delay_line[12]) * coeff[3] +
                   (delay_line[4] + delay_line[11]) * coeff[4] +
                   (delay_line[5] + delay_line[10]) * coeff[5] +
                   (delay_line[6] + delay_line[9])  * coeff[6] +
                   (delay_line[7] + delay_line[8])  * coeff[7];
 
    //Assign upper 16-bits to output.
    assign dout = accum[31:16];
 
    initial begin   
        //Load the filter coefficients.
        coeff[0] = 16'd2552;
        coeff[1] = 16'd4557;
        coeff[2] = 16'd5051;
        coeff[3] = 16'd8006;
        coeff[4] = 16'd9265;
        coeff[5] = 16'd11427;
        coeff[6] = 16'd12396;
        coeff[7] = 16'd13200;
 
        //Initialize delay line.
        for(i = 0; i < `FILT_LENGTH; i = i+1'b1) begin
            delay_line[i] = 16'd0;
        end
    end
 
    //Advance the data through the delay line every clock cycle.
    generate
        for (c = `FILT_LENGTH-1; c > 0; c = c - 1) begin: inc_delay
            always @(posedge clk) begin
                if(en) delay_line[c] <= delay_line[c-1];
            end
        end
    endgenerate
 
    //Update with input data.
    always @(posedge clk) begin
        if(en) delay_line[0] <= din;
    end
endmodule


clk_en.v
module clk_en(
    input clk,
    output reg en = 1'b0
    );
 
    reg [15:0]en_counter = 16'h0;
 
    always @(posedge clk) begin
        if(en_counter == 16'd449) begin
            en_counter <= 16'h0;
            en <= 1'b1;
        end
        else begin
            en_counter <= en_counter + 1'b1;
            en <= 1'b0;
        end
    end
 
endmodule


lfsr.v
`timescale 1ns / 1ps
 
module lfsr(
    input clk,
    input en,
    input rst,
    output [15:0]lfsr_out
    );
 
    //Create a 16-bit linear feedback shift register with
    //maximal polynomial x^16 + x^14 + x^13 + x^11 + 1.
    reg [15:0]lfsr = 16'd1;
    wire feedback;
 
    assign feedback = ((lfsr[15] ^ lfsr[13]) ^ lfsr[12]) ^ lfsr[10];
    assign lfsr_out = lfsr;
 
    //Update the linear feedback shift register.
    always @(posedge clk) begin
        if(rst) lfsr <= 16'd1;
        else if(en) lfsr <= {lfsr[14:0], feedback};
    end
endmodule


TB_LMS_Adapt.v
module TB_LMS_Adapt;
 
    reg  clk = 1'b0;
    reg  [7:0]sw = 1'b1;
    wire [7:0]JA;
 
    wire [15:0]d;
    wire [15:0]y;
    wire [15:0]e;
    wire [15:0]n1;
 
    filter_top uut(.clk(clk), .sw(sw), .JA(JA));
 
    assign d = uut.fout;
    assign y = uut.aout;
    assign e = uut.err;
    assign n1 = uut.af.n1[0];
 
    always #5 clk = ~clk;
 
    initial begin
        //#20000000 sw[0] = 1'b1;
        //#40000 sw[0] = 1'b1;
    end
endmodule