Skip to content

Commit e2fadbe

Browse files
committed
More reasonable settings for "make graphs"
Put it in all other makefiles where applicable
1 parent 6be8e10 commit e2fadbe

File tree

10 files changed

+119
-56
lines changed

10 files changed

+119
-56
lines changed

etc/makefile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,20 @@ drprime: drprime.o
3636
mont: mont.o
3737
$(CC) $(LTM_TUNE_CFLAGS) mont.o $(LIBNAME) -o mont
3838

39+
# Reads MP_DIGIT_BIT from tommath.h, so take care that there is the correct MP_xxBIT
40+
# in CFLAGS and/or LTM_CFLAGS when compiling from this directory ("libtommath/etc").
3941
getlimbsize: get_limbsize.o
4042
$(CC) $(LTM_TUNE_CFLAGS) get_limbsize.o $(LIBNAME) -o get_limbsize
4143

42-
# Make pretty pictures (500 is the maximum number of limbs)
44+
# Make pretty pictures (2000 is the maximum number of limbs to print for mul/sqr)
45+
# "tune" runs twice because it runs automatically when build.
4346
graphs: tune get_limbsize
44-
./tune -p -M 500
47+
./tune_it.sh 2000
4548
gnuplot -c plot_graphs.gp `./get_limbsize`
4649

4750
clean:
4851
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime mont 2kprime pprime.dat \
49-
tuning_list multiplying squaring readradix writeradix test get_limbsize *png *.da *.dyn *.dpi *~
52+
tuning_list multiplying squaring readradix writeradix get_limbsize *png *.da *.dyn *.dpi *~
5053
rm -rf .libs
5154

5255
.PHONY: tune

etc/makefile.icc

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,18 @@ tune: tune.o
3232
$(CC) $(CFLAGS) tune.o $(LIBNAME) -o tune
3333
./tune_it.sh
3434

35+
# Reads MP_DIGIT_BIT from tommath.h, so take care that there is the correct MP_xxBIT
36+
# in CFLAGS and/or LTM_CFLAGS when compiling from this directory ("libtommath/etc").
37+
getlimbsize: get_limbsize.o
38+
$(CC) $(LTM_TUNE_CFLAGS) get_limbsize.o $(LIBNAME) -o get_limbsize
39+
40+
# Make pretty pictures (2000 is the maximum number of limbs to print for mul/sqr)
41+
# "tune" runs twice because it runs automatically when build.
42+
graphs: tune get_limbsize
43+
./tune_it.sh 2000
44+
gnuplot -c plot_graphs.gp `./get_limbsize`
45+
46+
3547
# same app but using RDTSC for higher precision [requires 80586+], coff based gcc installs [e.g. ming, cygwin, djgpp]
3648
tune86: tune.c
3749
nasm -f coff timer.asm
@@ -64,4 +76,4 @@ mont: mont.o
6476

6577

6678
clean:
67-
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat *.il tuning_list
79+
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat multiplying squaring readradix writeradix test get_limbsize *png *.il tuning_list

etc/tune.c

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,14 @@ int main(int argc, char **argv)
702702
}
703703

704704
}
705+
if (printpreset == 1) {
706+
updated.MUL_KARATSUBA = MP_MUL_KARATSUBA_CUTOFF;
707+
updated.SQR_KARATSUBA = MP_SQR_KARATSUBA_CUTOFF;
708+
updated.MUL_TOOM = MP_MUL_TOOM_CUTOFF;
709+
updated.SQR_TOOM = MP_SQR_TOOM_CUTOFF;
710+
updated.RADIX_READ = MP_RADIX_READ_CUTOFF;
711+
updated.RADIX_WRITE = MP_RADIX_WRITE_CUTOFF;
712+
}
705713
if (args.terse == 1) {
706714
printf("%d %d %d %d %d %d\n",
707715
updated.MUL_KARATSUBA,
@@ -747,7 +755,8 @@ int main(int argc, char **argv)
747755
}
748756

749757

750-
for (x = 8; x < args.upper_limit_print; x += args.increment_print) {
758+
for (x = 1; x < args.upper_limit_print; x += args.increment_print) {
759+
printf("\r%d", (rgs.upper_limit_print - x));
751760
set_cutoffs(&max_cutoffs);
752761
t1 = s_time_mul(x);
753762
set_cutoffs(&orig);
@@ -768,29 +777,32 @@ int main(int argc, char **argv)
768777
printf("SQR %d: %9" PRIu64 " %9" PRIu64 ", %9" PRIi64 "\n", x, t1, t2, (int64_t)t2 - (int64_t)t1);
769778
fflush(stdout);
770779
}
771-
772-
773-
set_cutoffs(&max_cutoffs);
774-
t1 = s_time_radix_conversion_read(x);
775-
set_cutoffs(&orig);
776-
t2 = s_time_radix_conversion_read(x);
777-
fprintf(readradix,"%d: %9" PRIu64 " %9" PRIu64 ", %9" PRIi64 "\n", x * MP_DIGIT_BIT, t1, t2, (int64_t)t2 - (int64_t)t1);
778-
fflush(readradix);
779-
if (args.verbose == 1) {
780-
printf("RCR %d: %9" PRIu64 " %9" PRIu64 ", %9" PRIi64 "\n", x * MP_DIGIT_BIT, t1, t2, (int64_t)t2 - (int64_t)t1);
781-
fflush(stdout);
780+
/* The cutoffs are so low, we would see nothing interesting in the graphs with the default args.upper_limit_print */
781+
if ((x * MP_DIGIT_BIT) < (3 * updated.RADIX_READ)) {
782+
set_cutoffs(&max_cutoffs);
783+
t1 = s_time_radix_conversion_read(x);
784+
set_cutoffs(&orig);
785+
t2 = s_time_radix_conversion_read(x);
786+
fprintf(readradix,"%d: %9" PRIu64 " %9" PRIu64 ", %9" PRIi64 "\n", x * MP_DIGIT_BIT, t1, t2, (int64_t)t2 - (int64_t)t1);
787+
fflush(readradix);
788+
if (args.verbose == 1) {
789+
printf("RCR %d: %9" PRIu64 " %9" PRIu64 ", %9" PRIi64 "\n", x * MP_DIGIT_BIT, t1, t2, (int64_t)t2 - (int64_t)t1);
790+
fflush(stdout);
791+
}
782792
}
783793

784-
set_cutoffs(&max_cutoffs);
785-
t1 = s_time_radix_conversion_write(x);
786-
set_cutoffs(&orig);
787-
t2 = s_time_radix_conversion_write(x);
788-
fprintf(writeradix,"%d: %9" PRIu64 " %9" PRIu64 ", %9" PRIi64 "\n", x * MP_DIGIT_BIT, t1, t2,
789-
(int64_t)t2 - (int64_t)t1);
790-
fflush(writeradix);
791-
if (args.verbose == 1) {
792-
printf("RCW %d: %9" PRIu64 " %9" PRIu64 ", %9" PRIi64 "\n", x * MP_DIGIT_BIT, t1, t2, (int64_t)t2 - (int64_t)t1);
793-
fflush(stdout);
794+
if ((x * MP_DIGIT_BIT) < (5 * updated.RADIX_WRITE)) {
795+
set_cutoffs(&max_cutoffs);
796+
t1 = s_time_radix_conversion_write(x);
797+
set_cutoffs(&orig);
798+
t2 = s_time_radix_conversion_write(x);
799+
fprintf(writeradix,"%d: %9" PRIu64 " %9" PRIu64 ", %9" PRIi64 "\n", x * MP_DIGIT_BIT, t1, t2,
800+
(int64_t)t2 - (int64_t)t1);
801+
fflush(writeradix);
802+
if (args.verbose == 1) {
803+
printf("RCW %d: %9" PRIu64 " %9" PRIu64 ", %9" PRIi64 "\n", x * MP_DIGIT_BIT, t1, t2, (int64_t)t2 - (int64_t)t1);
804+
fflush(stdout);
805+
}
794806
}
795807

796808
}

etc/tune_it.sh

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -92,27 +92,35 @@ END_OF_INPUT
9292
i=$(tail -n +2 $FILE_NAME | wc -l)
9393
# our median point will be at $i entries
9494
i=$(( (i / 2) + 1 ))
95-
TMP=$(median $FILE_NAME 1 $i)
96-
echo "#define MP_DEFAULT_MUL_KARATSUBA_CUTOFF $TMP"
97-
echo "#define MP_DEFAULT_MUL_KARATSUBA_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(km) Appending to $TOMMATH_CUTOFFS_H" $?
98-
TMP=$(median $FILE_NAME 2 $i)
99-
echo "#define MP_DEFAULT_SQR_KARATSUBA_CUTOFF $TMP"
100-
echo "#define MP_DEFAULT_SQR_KARATSUBA_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(ks) Appending to $TOMMATH_CUTOFFS_H" $?
101-
TMP=$(median $FILE_NAME 3 $i)
102-
echo "#define MP_DEFAULT_MUL_TOOM_CUTOFF $TMP"
103-
echo "#define MP_DEFAULT_MUL_TOOM_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(tc3m) Appending to $TOMMATH_CUTOFFS_H" $?
104-
TMP=$(median $FILE_NAME 4 $i)
105-
echo "#define MP_DEFAULT_SQR_TOOM_CUTOFF $TMP"
106-
echo "#define MP_DEFAULT_SQR_TOOM_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(tc3s) Appending to $TOMMATH_CUTOFFS_H" $?
107-
108-
TMP=$(median $FILE_NAME 5 $i)
109-
echo "#define MP_DEFAULT_RADIX_READ_CUTOFF $TMP"
110-
echo "#define MP_DEFAULT_RADIX_READ_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(rcr) Appending to $TOMMATH_CUTOFFS_H" $?
111-
TMP=$(median $FILE_NAME 6 $i)
112-
echo "#define MP_DEFAULT_RADIX_WRITE_CUTOFF $TMP"
113-
echo "#define MP_DEFAULT_RADIX_WRITE_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(rcw) Appending to $TOMMATH_CUTOFFS_H" $?
114-
115-
95+
TMP0=$(median $FILE_NAME 1 $i)
96+
echo "#define MP_DEFAULT_MUL_KARATSUBA_CUTOFF $TMP0"
97+
echo "#define MP_DEFAULT_MUL_KARATSUBA_CUTOFF $TMP0" >> $TOMMATH_CUTOFFS_H || die "(km) Appending to $TOMMATH_CUTOFFS_H" $?
98+
TMP1=$(median $FILE_NAME 2 $i)
99+
echo "#define MP_DEFAULT_SQR_KARATSUBA_CUTOFF $TMP1"
100+
echo "#define MP_DEFAULT_SQR_KARATSUBA_CUTOFF $TMP1" >> $TOMMATH_CUTOFFS_H || die "(ks) Appending to $TOMMATH_CUTOFFS_H" $?
101+
TMP2=$(median $FILE_NAME 3 $i)
102+
echo "#define MP_DEFAULT_MUL_TOOM_CUTOFF $TMP2"
103+
echo "#define MP_DEFAULT_MUL_TOOM_CUTOFF $TMP2" >> $TOMMATH_CUTOFFS_H || die "(tc3m) Appending to $TOMMATH_CUTOFFS_H" $?
104+
TMP3=$(median $FILE_NAME 4 $i)
105+
echo "#define MP_DEFAULT_SQR_TOOM_CUTOFF $TMP3"
106+
echo "#define MP_DEFAULT_SQR_TOOM_CUTOFF $TMP3" >> $TOMMATH_CUTOFFS_H || die "(tc3s) Appending to $TOMMATH_CUTOFFS_H" $?
107+
108+
TMP4=$(median $FILE_NAME 5 $i)
109+
echo "#define MP_DEFAULT_RADIX_READ_CUTOFF $TMP4"
110+
echo "#define MP_DEFAULT_RADIX_READ_CUTOFF $TMP4" >> $TOMMATH_CUTOFFS_H || die "(rcr) Appending to $TOMMATH_CUTOFFS_H" $?
111+
TMP5=$(median $FILE_NAME 6 $i)
112+
echo "#define MP_DEFAULT_RADIX_WRITE_CUTOFF $TMP5"
113+
echo "#define MP_DEFAULT_RADIX_WRITE_CUTOFF $TMP5" >> $TOMMATH_CUTOFFS_H || die "(rcw) Appending to $TOMMATH_CUTOFFS_H" $?
114+
115+
# Print the tables for the graphs. Please do not change.
116+
if [ $# -eq 1 ]; then
117+
OPTION0=$1
118+
# Do not forget to raise if there are more fast algorithms with higher cutoffs.
119+
if [ $OPTION0 -lt 500 ]; then
120+
OPTION0=500
121+
fi
122+
"$MPWD"/tune -p -r $RLOOPS -L $LAG -S "$RNUM" -o $OFFSET -M $1 -s $TMP0,$TMP1,$TMP2,$TMP3,$TMP4,$TMP5
123+
fi
116124

117125

118126

makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ tune: $(LIBNAME)
109109
$(MAKE) -C etc tune CFLAGS="$(LTM_CFLAGS) -I../"
110110
$(MAKE)
111111

112+
graphs: $(LIBNAME)
113+
$(MAKE) -C etc graphs CFLAGS="$(LTM_CFLAGS) -I../"
114+
$(MAKE)
115+
112116
etc-all: $(LIBNAME)
113117
$(MAKE) -C etc all CFLAGS="$(LTM_CFLAGS) -I../"
114118
$(MAKE)

makefile.mingw

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,11 @@ tune: $(LIBNAME_S)
8787
$(MAKE) -C etc tune
8888
$(MAKE)
8989

90+
graphs: $(LIBNAME_S)
91+
$(MAKE) -C etc graphs
92+
$(MAKE)
9093
clean:
91-
@-cmd /c del /Q /S *.o *.a *.exe *.dll 2>nul
94+
@-cmd /c del /Q /S *.o *.a *.exe *.dll multiplying squaring readradix writeradix get_limbsize *png 2>nul
9295

9396
#Install the library + headers
9497
install: $(LIBMAIN_S) $(LIBMAIN_I) $(LIBMAIN_D)

makefile.msvc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,15 @@ tune: $(LIBMAIN_S)
8787
$(MAKE) -C etc tune
8888
$(MAKE)
8989

90+
graphs: $(LIBMAIN_S)
91+
$(MAKE) -C etc graphs
92+
$(MAKE)
93+
9094
clean-obj:
9195
@-cmd /c del /Q /S *.OBJ 2>nul
9296

9397
clean: clean-obj
94-
@-cmd /c del /Q /S *.LIB *.EXE *.DLL 2>nul
98+
@-cmd /c del /Q /S *.LIB *.EXE *.DLL multiplying squaring readradix writeradix *png 2>nul
9599

96100
#Install the library + headers
97101
install: $(LIBMAIN_S) $(LIBMAIN_I) $(LIBMAIN_D)

makefile.shared

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,17 @@ tune: $(LIBNAME)
9898
$(LTLINK) $(LTM_LDFLAGS) -o etc/tune etc/tune.o $(LIBNAME)
9999
cd etc/; /bin/sh tune_it.sh; cd ..
100100
$(MAKE) -f makefile.shared
101+
102+
103+
graphs: $(LIBNAME)
104+
$(LTCOMPILE) $(LTM_CFLAGS) -c etc/tune.c -o etc/tune.o
105+
$(LTLINK) $(LTM_LDFLAGS) -o etc/tune etc/tune.o $(LIBNAME)
106+
$(LTCOMPILE) $(LTM_CFLAGS) -c etc/get_limbsize.c -o etc/get_limbsize.o
107+
$(LTLINK) $(LTM_LDFLAGS) -o etc/get_limbsize etc/get_limbsize.o $(LIBNAME)
108+
cd etc/
109+
/bin/sh tune_it.sh 2000
110+
gnuplot -c plot_graphs.gp `./get_limbsize`
111+
cd ..
112+
$(MAKE) -f makefile.shared
113+
114+

makefile.unix

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ tune: $(LIBMAIN_S)
8686
$(MAKE) -C etc tune
8787
$(MAKE)
8888

89+
graphs: $(LIBMAIN_S)
90+
$(MAKE) -C etc graphs
91+
$(MAKE)
92+
8993
#NOTE: this makefile works also on cygwin, thus we need to delete *.exe
9094
clean:
9195
-@rm -f $(OBJECTS) $(LIBMAIN_S)

tommath_cutoffs.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,9 @@
66
be aware that it may take a long time. It took 2:30 minutes
77
on the aforementioned machine for example.
88
*/
9-
10-
#define MP_DEFAULT_MUL_KARATSUBA_CUTOFF 80
11-
#define MP_DEFAULT_SQR_KARATSUBA_CUTOFF 120
12-
#define MP_DEFAULT_MUL_TOOM_CUTOFF 350
13-
#define MP_DEFAULT_SQR_TOOM_CUTOFF 400
14-
#define MP_DEFAULT_RADIX_READ_CUTOFF 600
15-
#define MP_DEFAULT_RADIX_WRITE_CUTOFF 600
9+
#define MP_DEFAULT_MUL_KARATSUBA_CUTOFF 116
10+
#define MP_DEFAULT_SQR_KARATSUBA_CUTOFF 160
11+
#define MP_DEFAULT_MUL_TOOM_CUTOFF 139
12+
#define MP_DEFAULT_SQR_TOOM_CUTOFF 193
13+
#define MP_DEFAULT_RADIX_READ_CUTOFF 3000
14+
#define MP_DEFAULT_RADIX_WRITE_CUTOFF 480

0 commit comments

Comments
 (0)