#
# Makefile in directory src
#
# resource usage:
#
# To compile a dynamic module
# (1) nvcc cannot accept -fPIC, so compile .cu to .cu.cpp first
# nvcc -arch=sm_50 -cuda ../src/PFAC_kernel.cu
#
# (2) then use g++ to comple PFAC_notex_shared_reorder.cu.cpp
# g++ -fPIC -c PFAC_kernel.cu.cpp
#
# (3) finally combine two object files to a .so library
# g++ -shared -o libpfac.so $(LIBS) PFAC_kernel.cu.o ...
#
# $(LIBS) is necessary when compiling PFAC library on 32-bit machine
#
include ../common.mk
INC_DIR = ../include
LIB_DIR = ../lib
OBJ_DIR = ../obj
INCPATH += -I../include/
CU_SRC = PFAC_kernel.cu
CU_SRC += PFAC_reduce_kernel.cu
CU_SRC += PFAC_reduce_inplace_kernel.cu
CU_SRC += PFAC_kernel_spaceDriven.cu
CPP_SRC = PFAC_reorder_Table.cpp
CPP_SRC += PFAC_CPU.cpp
CPP_SRC += PFAC_CPU_OMP.cpp
CPP_SRC += PFAC.cpp
inc_files = $(INC_DIR)/PFAC_P.h $(INC_DIR)/PFAC.h
35
CU_OBJ = $(patsubst %.cu,%.o,$(CU_SRC))
CU_CPP = $(patsubst %.cu,%.cu.cpp,$(CU_SRC))
CPP_OBJ = $(patsubst %.cpp,%.o,$(CPP_SRC))
cppobj_loc = $(patsubst %.o,$(OBJ_DIR)/%.o,$(CPP_OBJ))
cppobj_fpic_loc = $(patsubst %.o,$(OBJ_DIR)/%_fpic.o,$(CPP_OBJ))
cu_cpp_sm50_loc = $(patsubst %.cpp,$(OBJ_DIR)/sm50_%.cpp,$(CU_CPP))
cu_cpp_obj_sm50_loc = $(patsubst %.cpp,$(OBJ_DIR)/sm50_%.cpp.o,$(CU_CPP))
all: mk_libso_no50 mk_lib_fpic
mk_libso_no50: $(cu_cpp_sm50_loc)
$(CXX) -shared -o $(LIB_DIR)/libpfac_sm50.so $(LIBS) $(cu_cpp_obj_sm50_loc)
mk_liba: $(cppobj_loc)
ar cru $(LIB_DIR)/libpfac.a $(cppobj_loc)
ranlib $(LIB_DIR)/libpfac.a
mk_lib_fpic: $(cppobj_fpic_loc)
$(CXX) -shared -o $(LIB_DIR)/libpfac.so $(cppobj_fpic_loc) $(LIBS)
$(OBJ_DIR)/%_fpic.o: %.cpp $(inc_files)
$(CXX) -fPIC -c $(CXXFLAGS) $(INCPATH) -o $@ $<
$(OBJ_DIR)/PFAC_CPU_OMP_reorder_fpic.o: PFAC_CPU_OMP_reorder.cpp $(inc_files)
$(CXX) -fPIC -c $(CXXFLAGS) $(INCPATH) -o $@ $<
$(OBJ_DIR)/PFAC_CPU_OMP_reorder.o: PFAC_CPU_OMP_reorder.cpp $(inc_files)
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o $@ $<
$(OBJ_DIR)/%.o: %.cpp $(inc_files)
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o $@ $<
$(OBJ_DIR)/sm50_%.cu.cpp: %.cu
$(NVCC) -arch=sm_50 -cuda $(INCPATH) -o $@ $<
$(CXX) -fPIC -O2 -c -o $@.o $@
#clean :
# rm -f *.linkinfo
# rm -f $(OBJ_DIR)/*
# rm -f $(EXE_DIR)/*
####### Implicit rules
.SUFFIXES: .o .c .cpp .cc .cxx .C
.cpp.o:
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
.cc.o:
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
.cxx.o:
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
.C.o:
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
.c.o:
$(CC) -c $(CFLAGS) $(INCPATH) -o "$@" "$<"
####### Build rules
simple_example.cpp:64: int main(int, char**): Assertion `PFAC_STATUS_SUCCESS == PFAC_status' failed.
int device_no = 10*deviceProp.major + deviceProp.minor ;
if ( 30 == device_no ){
strcpy (modulepath, "libpfac_sm30.so");
}else if ( 21 == device_no ){
strcpy (modulepath, "libpfac_sm21.so");
}else if ( 20 == device_no ){
strcpy (modulepath, "libpfac_sm20.so");
}else if ( 13 == device_no ){
strcpy (modulepath, "libpfac_sm13.so");
}else if ( 12 == device_no ){
strcpy (modulepath, "libpfac_sm12.so");
}else if ( 11 == device_no ){
strcpy (modulepath, "libpfac_sm11.so");
}else{
return PFAC_STATUS_ARCH_MISMATCH ;
}
int device_no = 10*deviceProp.major + deviceProp.minor ;
if ( 11 > device_no )
return PFAC_STATUS_ARCH_MISMATCH ;
sprintf(modulepath, "libpfac_sm%d.so", device_no);
Error: fails to PFAC_matchFromHost, PFAC_STATUS_CUDA_ALLOC_FAILED: allocation fails on device memory.
#define PFAC_PRINTF( ... ) printf( __VA_ARGS__ )
//#define PFAC_PRINTF(...)
Error: cannot bind texture, 11264 bytes invalid texture reference
Error: fails to PFAC_matchFromHost, PFAC_STATUS_CUDA_ALLOC_FAILED: allocation fails on device memory
PFAC_setTextureMode(handle, PFAC_TEXTURE_OFF ) ;
At position 0, match pattern 1
At position 1, match pattern 3
At position 2, match pattern 4
At position 4, match pattern 4
At position 6, match pattern 2
cuda_status = cudaBindTexture( &offset, (const struct textureReference*) texRefTable,
(const void*) handle->d_PFAC_table,
(const struct cudaChannelFormatDesc*) &channelDesc,
handle->sizeOfTableInBytes ) ;
cuda_status = cudaBindTexture( &offset, tex_PFAC_table,
(const void*) handle->d_PFAC_table, handle->sizeOfTableInBytes ) ;
PFAC_setTextureMode(handle, PFAC_TEXTURE_ON ) ;
dd if=main.cvd of=main.tar.gz bs=512 skip=1
tar xzvf main.tar.gz
#include <stdio.h>
#include <iostream>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <chrono>
#include <PFAC.h>
int main(int argc, char **argv)
{
if(argc < 2){
printf("args input file, input pattern\n" );
return 0;
}
char dumpTableFile[] = "table.txt" ;
char *inputFile = argv[1]; //"../test/data/example_input" ;
char *patternFile = argv[2];//"../test/pattern/example_pattern" ;
PFAC_handle_t handle ;
PFAC_status_t PFAC_status ;
int input_size ;
char *h_inputString = NULL ;
int *h_matched_result = NULL ;
// step 1: create PFAC handle
PFAC_status = PFAC_create( &handle ) ;
PFAC_status = PFAC_setTextureMode(handle, PFAC_TEXTURE_OFF);
printf("%d\n", PFAC_status);
assert( PFAC_STATUS_SUCCESS == PFAC_status );
// step 2: read patterns and dump transition table
PFAC_status = PFAC_readPatternFromFile( handle, patternFile) ;
if ( PFAC_STATUS_SUCCESS != PFAC_status ){
printf("Error: fails to read pattern from file, %s\n", PFAC_getErrorString(PFAC_status) );
exit(1) ;
}
// dump transition table
FILE *table_fp = fopen( dumpTableFile, "w") ;
assert( NULL != table_fp ) ;
PFAC_status = PFAC_dumpTransitionTable( handle, table_fp );
fclose( table_fp ) ;
if ( PFAC_STATUS_SUCCESS != PFAC_status ){
printf("Error: fails to dump transition table, %s\n", PFAC_getErrorString(PFAC_status) );
exit(1) ;
}
//step 3: prepare input stream
FILE* fpin = fopen( inputFile, "rb");
assert ( NULL != fpin ) ;
// obtain file size
fseek (fpin , 0 , SEEK_END);
input_size = ftell (fpin);
rewind (fpin);
// allocate memory to contain the whole file
h_inputString = (char *) malloc (sizeof(char)*input_size);
assert( NULL != h_inputString );
h_matched_result = (int *) malloc (sizeof(int)*input_size);
assert( NULL != h_matched_result );
memset( h_matched_result, 0, sizeof(int)*input_size ) ;
// copy the file into the buffer
input_size = fread (h_inputString, 1, input_size, fpin);
fclose(fpin);
auto started = std::chrono::high_resolution_clock::now();
// step 4: run PFAC on GPU
PFAC_status = PFAC_matchFromHost( handle, h_inputString, input_size, h_matched_result ) ;
if ( PFAC_STATUS_SUCCESS != PFAC_status ){
printf("Error: fails to PFAC_matchFromHost, %s\n", PFAC_getErrorString(PFAC_status) );
exit(1) ;
}
auto done = std::chrono::high_resolution_clock::now();
std::cout << "gpu_time: " << std::chrono::duration_cast<std::chrono::milliseconds>(done-started).count()<< std::endl;
memset( h_matched_result, 0, sizeof(int)*input_size ) ;
PFAC_setPlatform(handle, PFAC_PLATFORM_CPU);
started = std::chrono::high_resolution_clock::now();
// step 4: run PFAC on CPU
PFAC_status = PFAC_matchFromHost( handle, h_inputString, input_size, h_matched_result ) ;
if ( PFAC_STATUS_SUCCESS != PFAC_status ){
printf("Error: fails to PFAC_matchFromHost, %s\n", PFAC_getErrorString(PFAC_status) );
exit(1) ;
}
done = std::chrono::high_resolution_clock::now();
std::cout << "cpu_time: " << std::chrono::duration_cast<std::chrono::milliseconds>(done-started).count()<< std::endl;
memset( h_matched_result, 0, sizeof(int)*input_size ) ;
PFAC_setPlatform(handle, PFAC_PLATFORM_CPU_OMP);
started = std::chrono::high_resolution_clock::now();
// step 4: run PFAC on OMP
PFAC_status = PFAC_matchFromHost( handle, h_inputString, input_size, h_matched_result ) ;
if ( PFAC_STATUS_SUCCESS != PFAC_status ){
printf("Error: fails to PFAC_matchFromHost, %s\n", PFAC_getErrorString(PFAC_status) );
exit(1) ;
}
done = std::chrono::high_resolution_clock::now();
std::cout << "omp_time: " << std::chrono::duration_cast<std::chrono::milliseconds>(done-started).count() << std::endl;
PFAC_status = PFAC_destroy( handle ) ;
assert( PFAC_STATUS_SUCCESS == PFAC_status );
free(h_inputString);
free(h_matched_result);
return 0;
}
export OMP_NUM_THREADS=4
К сожалению, не доступен сервер mySQL