2021年3月8日星期一

High Level Synthesis by using Xilinx Vitis

// Analyze the Xilinx Vitis example
 
// C General include header file
#include <ap_cint.h>

// C typedefs for top-level input and output int / fixed-point formats
// [u]int<precision> (1024 bits) 
typedef int7 in_data_t;
typedef uint7 in_data_t;

// C++ General include header file
// ap_[u]int<W> (1024 bits)
// ap_[u]fixed<W,I,Q,O,N> 
#include <ap_int.h>
#include <ap_fixed.h>
#include <hls_stream.h>

// C++ typedefs for top-level input and output int / fixed-point formats
typedef ap_ufixed<IN_BW,IN_IW> in_data_t;
typedef ap_ufixed<OUT_BW,OUT_IW> out_data_t;
typedef ap_uint<IN_BW> data_t;

typedef ap_int<18> dout1_t;
typedef ap_uint<13> dout2_t;

typedef ap_ufixed<10,8, AP_RND, AP_SAT> din1_t;
typedef ap_fixed<36,30> dout_t;

hls::stream<uint8_t> &bytes_in,

// Support array
typedef ap_int<7> din_t;
typedef ap_int<10> dout_t;
dout_t mem_bottleneck_resolved(din_t mem[N]) {
}

// Support pointer
void cpp_ap_int_arith(dinA_t inA, dinB_t inB, dinC_t inC, dinD_t inD,
dout1_t *out1, dout2_t *out2, dout3_t *out3, dout4_t *out4)
{
// Basic arithmetic operations
*out1 = inA * inB;
*out2 = inB + inA;
*out3 = inC / inA;
*out4 = inD % inA;
}

// Support double pointer inside callee function
data_t sub(data_t ptr[10], data_t size, data_t**flagPtr)
{
//....
}

data_t pointer_double(data_t pos, data_t x, data_t* flag)
{
//....
return sub(array, 10, &ptrFlag);
}

// Support pass by reference
out_data_t fxp_sqrt_top(in_data_t& in_val);

// Support template keyword
template <int W2, int IW2, int W1, int IW1>
void fxp_sqrt(ap_ufixed<W2,IW2>& result, ap_ufixed<W1,IW1>& in_val)
{
//....
}

template<typename T, int K>
static void convolution_orig(
int width, int height,
const T *src, T *dst,
const T *hcoeff, const T *vcoeff)
{
//....
}

// Support preprocessor __SYNTHESIS__
#ifndef __SYNTHESIS__
#else
#endif

// Support operator overload
friend ostream&
operator<<(ostream& o, const CFir<coef_TT, data_TT, acc_TT> &f);

// Test bench support C / C++ standard library
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <cassert>
#include <stdint.h>
#include <fstream>
#include <iomanip>
#include <cmath>

using namespace std;

// Support hls::vector
using float4 = hls::vector<float, 4>;

// Support hls::stream
hls::stream<int32_t> &strm_out,

// Support cout
cout << "*** Test passes ***" << endl << endl;

// Support printf
printf("!!! TEST PASSED !!!\n");

// Support save the results to a file by using ofstream (TESTBENCH ONLY)
ofstream FILE;
FILE.open ("result.dat");
FILE << Y << endl;
FILE.close();

// Support fprintf (TESTBENCH ONLY)
FILE *fp;
fp=fopen("result.dat","w");
fprintf(fp, "%d*%d=%d; %d+%d=%d; %d/%d=%d; %d mod %d=%d;\n", inA.to_int(), inB.to_int(), out1.to_int(), inB.to_int(), inA.to_int(), out2.to_int(), inC.to_int(), inA.to_int(), out3.to_int(), inD.to_int(), inA.to_int(),out4.to_int());
fclose(fp);

// Support fscanf (TESTBENCH ONLY)
int tmp;
fp=fopen("tb_data/outC.golden.dat","r");
fscanf(fp, "%d", &tmp);
fclose(fp);

// Support system (TESTBENCH ONLY)
retval = system("diff --brief -w result.dat result.golden.dat");

// Support Template metaprogramming
// Tail recursive call
template<data_t N>
struct fibon_s {
template<typename T>
static T fibon_f(T a, T b) {
return fibon_s<N-1>::fibon_f(b, (a+b));
}
};

// Test bench support new and delete keyword (TESTBENCH ONLY)
data_t * const src_img = new data_t[TEST_IMG_ROWS*TEST_IMG_COLS];
data_t * const ref_img = new data_t[TEST_IMG_ROWS*TEST_IMG_COLS];
delete [] src_img;
delete [] ref_img;

// pragma keyword
void filter11x11_strm(int width, int height,
hls::stream<data_t> &src, hls::stream<data_t> &dst)
{
#pragma HLS INTERFACE axis port=&src
#pragma HLS INTERFACE axis port=&dst

#pragma HLS DATAFLOW
#pragma HLS INLINE // bring loops in sub-functions to this DATAFLOW region
}

void filter(data_t &x, coef_t coef[TAP], sum_t &y)
{
#pragma HLS INTERFACE ap_fifo port=x
#pragma HLS INTERFACE ap_fifo port=y
#pragma HLS INTERFACE ap_fifo port=coef
#pragma HLS PIPELINE II=4
}

#pragma HLS ARRAY_PARTITION variable = h complete dim = 1
#pragma HLS inline // Always inline this function

#pragma HLS pipeline rewind
#pragma HLS unroll factor = 2

// The dataflow directive will enable the concurrent
// execution of the two accumulators described below
#pragma HLS DATAFLOW

#pragma HLS STREAM variable = mux_in depth = 16

#pragma HLS INLINE off
#pragma HLS pipeline

#pragma HLS loop_tripcount max=93 // max strm_in_len / decimation rate

#pragma HLS ARRAY_RESHAPE variable=res1 dim=1 factor=2 block
#pragma HLS BIND_STORAGE variable=res1 type=ram_s2p impl=uram_ecc

#pragma HLS DEPENDENCE variable=buffer inter WAR false
#pragma HLS BIND_STORAGE variable=buffer type=ram_2p impl=uram
 

沒有留言:

發佈留言