Skip to content

Repairs and clean-ups regarding tuning and profiling #581

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ tuning_list
etc/tune
2kprime.1
drprimes.txt
etc/multiplying*
etc/squaring*

# ignore stuff generated by "make manual" and "make poster"
*.aux
Expand Down Expand Up @@ -134,3 +136,20 @@ build*/
# kdevelop section
.kdev4/
*.kdev4

# ignore cmake files
CMakeFiles
Makefile
cmake_install.cmake












11 changes: 10 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ include(sources.cmake)
# Options
#-----------------------------------------------------------------------------
option(BUILD_SHARED_LIBS "Build shared library and only the shared library if \"ON\", default is static" OFF)

option(BUILD_TUNING "Run a tuning program for the fast multiplication/squaring algorithms if \"ON\"" OFF)
option(BUILD_GRAPHS "Run a benchmark of the fast multiplication/squaring algorithms and make graphics if \"ON\"" OFF)
#-----------------------------------------------------------------------------
# Compose CFLAGS
#-----------------------------------------------------------------------------
Expand Down Expand Up @@ -137,6 +138,14 @@ if(BUILD_TESTING)
add_subdirectory(demo)
endif()

#-----------------------------------------------------------------------------
# tuning and benchmark targets
#-----------------------------------------------------------------------------

if(BUILD_TUNING OR BUILD_GRAPHS)
add_subdirectory(etc ${CMAKE_CURRENT_SOURCE_DIR}/etc)
endif()

#-----------------------------------------------------------------------------
# Install/export targets and files
#-----------------------------------------------------------------------------
Expand Down
72 changes: 41 additions & 31 deletions demo/timing.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,42 +55,35 @@ static unsigned int lbit(void)
}
}

/* RDTSC from Scott Duplichan */
static uint64_t TIMFUNC(void)
{
#if defined __GNUC__
#if defined(__i386__) || defined(__x86_64__)
/* version from http://www.mcs.anl.gov/~kazutomo/rdtsc.html
* the old code always got a warning issued by gcc, clang did not complain...
*/
unsigned hi, lo;
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64_t)lo)|(((uint64_t)hi)<<32);
#else /* gcc-IA64 version */
unsigned long result;
__asm__ __volatile__("mov %0=ar.itc":"=r"(result)::"memory");

while (__builtin_expect((int) result == -1, 0))
__asm__ __volatile__("mov %0=ar.itc":"=r"(result)::"memory");

return result;
#endif

/* Microsoft and Intel Windows compilers */
#elif defined _M_IX86
__asm rdtsc
#elif defined _M_AMD64
return __rdtsc();
#elif defined _M_IA64
#if defined __INTEL_COMPILER
#include <ia64intrin.h>
#if defined(_WIN32)
# include <windows.h>
#endif
return __getReg(3116);

static uint64_t TIMFUNC(void)
{
#if _POSIX_C_SOURCE >= 199309L
#define LTM_BILLION 1000000000
struct timespec ts;

/* TODO: Sets errno in case of error. Use? */
clock_gettime(CLOCK_MONOTONIC, &ts);
return (((uint64_t)ts.tv_sec) * LTM_BILLION + (uint64_t)ts.tv_nsec);
#elif defined(_WIN32)
LARGE_INTEGER ticks;
QueryPerformanceCounter(&ticks);
return (uint64_t)ticks.QuadPart;
#else
#error need rdtsc function for this build
clock_t t;
t = clock();
if (t < (clock_t)(0)) {
return (uint64_t)(0);
}
return (uint64_t)(t);
#endif
}


#define DO2(x) do { mp_err err = x; err = x; (void)err; }while(0)
#define DO4(x) DO2(x); DO2(x)
#define DO8(x) DO4(x); DO4(x)
Expand Down Expand Up @@ -141,6 +134,12 @@ int main(int argc, char **argv)
int n, cnt, ix, old_kara_m, old_kara_s, old_toom_m, old_toom_s;
unsigned rr;

#ifdef _WIN32
LARGE_INTEGER Frequency;
#else
struct timespec ts;
#endif

CHECK_OK(mp_init(&a));
CHECK_OK(mp_init(&b));
CHECK_OK(mp_init(&c));
Expand All @@ -150,10 +149,21 @@ int main(int argc, char **argv)

srand(LTM_TIMING_RAND_SEED);


#ifdef _WIN32
QueryPerformanceFrequency(&Frequency);
CLK_PER_SEC = (uint64) Frequency;
#elif _POSIX_C_SOURCE >= 199309L
/* returns -1 for an error and 0 for okay, sets errno (not used here) */
if (clock_getres(CLOCK_MONOTONIC, &ts)) {
fprintf(stderr, "%d, clock_getres failed\n", __LINE__);
exit(EXIT_FAILURE);
}
CLK_PER_SEC = LTM_BILLION / ts.tv_nsec;
#else
CLK_PER_SEC = TIMFUNC();
sleep(1);
CLK_PER_SEC = TIMFUNC() - CLK_PER_SEC;
#endif

printf("CLK_PER_SEC == %" PRIu64 "\n", CLK_PER_SEC);

Expand Down
76 changes: 73 additions & 3 deletions doc/bn.tex
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ \section{License}

\section{Building LibTomMath}


LibTomMath is meant to be very ``GCC friendly'' as it comes with a makefile well suited for GCC.
However, the library will also build in MSVC, Borland C out of the box. For any other ISO C
compiler a makefile will have to be made by the end
Expand Down Expand Up @@ -270,6 +271,53 @@ \subsection{Testing}
test was invoked. If an error is detected the program will exit with a dump of the relevant
numbers it was working with.

\subsection{CMake}
Some of the options above are also available with CMake.

\subsubsection{Shared Library}
The default is a static library. To produce a shared library use the CMake option
\begin{alltt}
-DBUILD_SHARED_LIBS=ON
\end{alltt}

\subsubsection{Testing}
To run the testsuite use option
\begin{alltt}
-DBUILD_TESTING=ON
\end{alltt}

\subsubsection{Tuning}
To run the tuning itself use option
\begin{alltt}
-DBUILD_TUNING=ON
\end{alltt}

To run a benchmark with the tuned library and print plots of the benchmark tables use option
\begin{alltt}
-DBUILD_GRAPHS=ON
\end{alltt}

To compile with LTO (Link Time Optimization) use option
\begin{alltt}
-DCOMPILE_LTO=ON
\end{alltt}

There are several build types available:
\begin{description}
\item[Debug] Build a library with debugging symbols (\texttt{-g3}) and no extra optimization
\item[Release] Build the normal release version (\texttt{-O3 -funroll-loops -fomit-frame-pointer}) (default)
\item[RelWithDebInfo] Build a library with debugging symbols (\texttt{-g3 -O2}) and a bit of optimization
\item[MinSizeRel] Build a small sized library (\texttt{-Os})
\end{description}
The build types are case-sensitive!

Choose one with:
\begin{alltt}
-DCMAKE_BUILD_TYPE=buildtype
\end{alltt}



\section{Build Configuration}
LibTomMath can configured at build time in two phases we shall call ``depends'' and
``trims''. Each phase changes how the library is built and they are applied one after another
Expand Down Expand Up @@ -1600,13 +1648,35 @@ \section{Tuning Polynomial Basis Routines}
make tune
\end{alltt}

This will run a benchmark, computes the medians, rewrites \texttt{bncore.c}, and recompiles
\texttt{bncore.c} and relinks the library.
With CMake
\begin{alltt}
cmake --build /path/to/build/dir -DBUILD_TUNING=ON
\end{alltt}


This will run a benchmark, computes the medians, rewrites \texttt{tommath\_cutoffs.h}, recompiles
\texttt{cutoffs.c}, and relinks the library.

The benchmark itself can be fine--tuned in the file \texttt{etc/tune\_it.sh}.

The program \texttt{etc/tune} is also able to print a list of values for printing curves with e.g.:
\texttt{gnuplot}. type \texttt{./etc/tune -h} to get a list of all available options.
\texttt{gnuplot}. Type \texttt{./etc/tune -h} to get a list of all the available options. There
are a lot.

To get some nice plots in \texttt{etc} try

\begin{alltt}
make graphs
\end{alltt}

With CMake
\begin{alltt}
cmake --build /path/to/build/dir -DBUILD_GRAPHS=ON
\end{alltt}

This will run a benchmark, computes the medians, rewrites \texttt{tommath\_cutoffs.h}, recompiles
\texttt{cutoffs.c}, relinks the library and runs gnuplot to print plots in the PNG format. The size
of the images is fixed in the file \texttt{etc/plot\_graphs.gp} and has to be changed manually.

\chapter{Modular Reduction}

Expand Down
51 changes: 51 additions & 0 deletions etc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# SPDX-License-Identifier: Unlicense
#
# LibTomMath, a free open source portable number theoretic multiple-precision
# integer (MPI) library written entirely in C.
#

cmake_minimum_required(VERSION 3.10)

set(LTM_TUNE tune-ltm)

# This file can be included from the top level or used stand-alone
if(PROJECT_NAME)
set(LIBRARY_NAME ${PROJECT_NAME})
else()
# Define an independent project and all the necessary stuff around
project(${LTM_TUNE}
LANGUAGES C)
set(LIBRARY_NAME libtommath)
find_package(${LIBRARY_NAME})
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE "Release")
endif()
endif()

add_executable(tune
${CMAKE_CURRENT_SOURCE_DIR}/tune.c
)

target_include_directories(tune PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/..
)

target_link_libraries(tune PRIVATE
${LIBRARY_NAME}
)

target_compile_options(tune PRIVATE
${LTM_C_FLAGS}
)
target_link_options(tune BEFORE PUBLIC
${LTM_LD_FLAGS}
)

if(BUILD_GRAPHS)
# used in tune_it.sh
find_program(GNUPLOT gnuplot)
add_custom_command(TARGET tune POST_BUILD COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tune_it.sh 1000 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} VERBATIM)
else()
add_custom_command(TARGET tune POST_BUILD COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tune_it.sh WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} VERBATIM)
endif()
9 changes: 7 additions & 2 deletions etc/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ LTM_TUNE_CFLAGS = $(CFLAGS) $(LTM_CFLAGS) -Wall -W -Wextra -Wshadow -O3 -I../
# libname when you can't install the lib with install
LIBNAME=../libtommath.a

all: pprime tune test_standalone mersenne drprime 2kprime mont
all: pprime tune test_standalone mersenne drprime 2kprime mont getlimbsize graph

#provable primes
pprime: pprime.o
Expand Down Expand Up @@ -36,10 +36,15 @@ drprime: drprime.o
mont: mont.o
$(CC) $(LTM_TUNE_CFLAGS) mont.o $(LIBNAME) -o mont

# Make pretty pictures (1000 is the maximum number of limbs to print for mul/sqr)
# "tune" runs twice because it runs automatically when build.
graphs: tune
./tune_it.sh 1000

clean:
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime mont 2kprime pprime.dat \
tuning_list multiplying squaring test *.da *.dyn *.dpi *~
tuning_list get_limbsize out *.da *.dyn *.dpi *~ cmake_install.cmake Makefile
rm -rf .libs
rm -rf CMakeFiles

.PHONY: tune
20 changes: 5 additions & 15 deletions etc/makefile.icc
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,10 @@ tune: tune.o
$(CC) $(CFLAGS) tune.o $(LIBNAME) -o tune
./tune_it.sh

# same app but using RDTSC for higher precision [requires 80586+], coff based gcc installs [e.g. ming, cygwin, djgpp]
tune86: tune.c
nasm -f coff timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86

# for cygwin
tune86c: tune.c
nasm -f gnuwin32 timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86

#make tune86 for linux or any ELF format
tune86l: tune.c
nasm -f elf -DUSE_ELF timer.asm
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86l
# Make pretty pictures (1000 is the maximum number of limbs to print for mul/sqr)
# "tune" runs twice because it runs automatically when build.
graphs: tune
./tune_it.sh 1000

# spits out mersenne primes
mersenne: mersenne.o
Expand All @@ -64,4 +54,4 @@ mont: mont.o


clean:
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat *.il tuning_list
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime mont 2kprime pprime.dat get_limbsize *.il tuning_list
19 changes: 19 additions & 0 deletions etc/plot_graphs.gp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
set term pngcairo size 720,540
# Good for most colorblinds
set colorsequence podo

set key top left;

set ylabel "Time"
set xlabel "Operand size (limbs)"

set output "multiplying".ARG1.".png";
set title "Comparing fast and slow multiplying [".ARG1." bits limbsize]";
plot "multiplying".ARG1."" using 1:2 w lines t "slow", "multiplying".ARG1."" using 1:3 w lines t "fast"

set output "squaring".ARG1.".png";
set title "Comparing fast and slow squaring [".ARG1." bits limbsize]";
plot "squaring".ARG1."" using 1:2 w lines t "slow", "squaring".ARG1."" using 1:3 w lines t "fast"



Loading
Loading