Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CXXFLAGS = -Wall -O3
CXXFLAGS = -Wall -O3 -std=c++11 -msse4.2

all: split1 split2 split6 split7 split8 split9 splitc1 splitc2 splitc3 split_subparser
all: split1 split2 split6 split7 split8 split9 splitc1 splitc2 splitc3 split_subparser splithb1 splithb2

split7: split7.cpp | deps/strtk
$(CXX) $(LDFLAGS) -Ideps/strtk/ $(CXXFLAGS) split7.cpp -o split7
Expand Down
3 changes: 2 additions & 1 deletion run_all.bash
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ g++ --version | head -n1
python --version
echo "=== End System info"
echo

# warming up file
wc test_data
for i in `find . -maxdepth 1 -executable -iname 'split*.py' | sort ; find . -maxdepth 1 -executable \! -iname '*.py' -iname 'split*' | sort`
do
printf "%-18s " $i
Expand Down
54 changes: 54 additions & 0 deletions splithb1.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Contributed by Henrique Bucher (http://www.vitorian.com/)
// Requires SSE4.2
#include <iostream>
#include <iomanip>
#include <string>
#include <array>
#include <unistd.h>

int main()
{
long count = 0;
timespec start;
clock_gettime(CLOCK_MONOTONIC, &start);

std::cin.sync_with_stdio(false); //disable synchronous IO

std::size_t numWords = 0;
std::size_t numChars = 0;

unsigned bufsize = 1024*1024;
char* buf = (char*)aligned_alloc( 16, bufsize );

while ( true ) {
int nb = ::fread( buf, 1, bufsize, stdin );
if ( nb<=0 ) break;
for ( int j=0; j<nb; ++j ) {
if ( buf[j] == '\n' ) {
numWords++;
count++;
}
else if ( buf[j] == ' ' ) {
numWords++;
}
else {
numChars++;
}
}
}

timespec end;
clock_gettime(CLOCK_MONOTONIC, &end);
const double sec = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) * 1e-9;
std::cerr << "C++ : Saw " << count << " lines (" << numWords << " words/" << numChars << " chars) in " << std::fixed << std::setprecision(1) << sec << " seconds." ;

if (sec > 0)
{
const double lps = count / sec;
std::cerr << " Crunch speed: " << std::fixed << std::setprecision(1) << lps << std::endl;
}
else
std::cerr << std::endl;

return 0;
}
67 changes: 67 additions & 0 deletions splithb2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Contributed by Henrique Bucher (http://www.vitorian.com/)
// Requires SSE4.2
#include <iostream>
#include <iomanip>
#include <string>
#include <array>
#include <unistd.h>
#include <smmintrin.h>

int main()
{
long count = 0;
timespec start;
clock_gettime(CLOCK_MONOTONIC, &start);

std::size_t numWords = 0;
std::size_t numChars = 0;

const __m128i LF = _mm_set1_epi8( '\n' );
const __m128i SPC = _mm_set1_epi8( ' ' );

unsigned bufsize = 1024*1024;
char* buf = (char*)aligned_alloc( 16, bufsize );

while ( true ) {
int nb = ::fread( buf, 1, bufsize, stdin );
if ( nb<=0 ) break;

int pos = 0;
while ( pos<nb ) {
union { __m128i res; uint16_t resu[8]; };
__m128i u = _mm_loadu_si128 ((__m128i *) &buf[pos] );
unsigned sz = nb-pos;
if ( sz>16 ) sz = 16;
res = _mm_cmpestrm( SPC, 1, u, sz,
(_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK) );
uint16_t spcbits = resu[0];
res = _mm_cmpestrm( LF, 1, u, sz,
(_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK) );
uint16_t lfbits = resu[0];

unsigned numspc = __builtin_popcount( spcbits );
unsigned numlf = __builtin_popcount( lfbits );

numWords += numspc + numlf;
count += numlf;
numChars += sz - (numspc+numlf);

pos += sz;
}
}

timespec end;
clock_gettime(CLOCK_MONOTONIC, &end);
const double sec = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) * 1e-9;
std::cerr << "C++ : Saw " << count << " lines (" << numWords << " words/" << numChars << " chars) in " << std::fixed << std::setprecision(1) << sec << " seconds." ;

if (sec > 0)
{
const double lps = count / sec;
std::cerr << " Crunch speed: " << std::fixed << std::setprecision(1) << lps << std::endl;
}
else
std::cerr << std::endl;

return 0;
}