diff --git a/src/ent.c b/src/ent.c index bf981bc..47e6971 100644 --- a/src/ent.c +++ b/src/ent.c @@ -59,6 +59,7 @@ static void help(void) printf("\n -c Print occurrence counts"); printf("\n -f Fold upper to lower case letters"); printf("\n -t Terse output in CSV format"); + printf("\n -p Include Chi-square p-value in terse output (as decimal)"); printf("\n -u Print this message\n"); printf("\nVersion " VERSION); printf("\nBy John Walker"); @@ -107,9 +108,10 @@ int main(int argc, char *argv[]) int counts = FALSE, /* Print character counts */ fold = FALSE, /* Fold upper to lower */ binary = FALSE, /* Treat input as a bitstream */ - terse = FALSE; /* Terse (CSV format) output */ + terse = FALSE, /* Terse (CSV format) output */ + csp = FALSE; /* Terse includes Chi^2 p-value */ - while ((opt = getopt(argc, argv, "bcftuv?BCFTUV")) != -1) { + while ((opt = getopt(argc, argv, "bcfptuv?BCFPTUV")) != -1) { switch (toISOlower(opt)) { case 'b': binary = TRUE; @@ -123,6 +125,10 @@ int main(int argc, char *argv[]) fold = TRUE; break; + case 'p': + csp = TRUE; + break; + case 't': terse = TRUE; break; @@ -200,22 +206,31 @@ int main(int argc, char *argv[]) } fclose(fp); - /* Complete calculation and return sequence metrics */ + /* Complete calculation */ rt_end(&ent, &chisq, &mean, &montepi, &scc); - if (terse) { - printf("0,File-%ss,Entropy,Chi-square,Mean,Monte-Carlo-Pi,Serial-Correlation\n", - binary ? "bit" : "byte"); - printf("1,%ld,%f,%f,%f,%f,%f\n", - totalc, ent, chisq, mean, montepi, scc); - } - /* Calculate probability of observed distribution occurring from the results of the Chi-Square test */ chip = pochisq(chisq, (binary ? 1 : 255)); + /* Return sequence metrics */ + + if (terse) { + if (csp) { + printf("0,File-%ss,Entropy,Chi-square,Chi-square-p-val,Mean,Monte-Carlo-Pi,Serial-Correlation\n", + binary ? "bit" : "byte"); + printf("1,%ld,%f,%f,%f,%f,%f,%f\n", + totalc, ent, chisq, chip, mean, montepi, scc); + } else { + printf("0,File-%ss,Entropy,Chi-square,Mean,Monte-Carlo-Pi,Serial-Correlation\n", + binary ? "bit" : "byte"); + printf("1,%ld,%f,%f,%f,%f,%f\n", + totalc, ent, chisq, mean, montepi, scc); + } + } + /* Print bin counts if requested */ if (counts) { diff --git a/src/ent.html b/src/ent.html index 77e2e59..f153758 100644 --- a/src/ent.html +++ b/src/ent.html @@ -127,7 +127,7 @@

NAME

SYNOPSIS

- ent [ -b -c -f -t -u ] [ infile ] + ent [ -b -c -f -p -t -u ] [ infile ]

DESCRIPTION

@@ -304,6 +304,12 @@

OPTIONS

Terse Mode Output Format below for additional details. +
-p
Used in conjunction with -t to + include the Chi-squared p-value in the terse + output (as decimal). See + Terse Mode Output Format + below for additional details.
+
-u
Print how-to-call information.
@@ -340,7 +346,20 @@

TERSE MODE OUTPUT FORMAT

column title record. If the -b option is specified, the second field of the type 0 record will be “File-bits”, and the file_length field in type 1 record will be given -in bits instead of bytes. If the -c option is specified, +in bits instead of bytes. +

+ +

+Specifying -p in conjunction with -t includes the Chi-squared p-value in the CSV output. Note that it is provided as decimal, not as a percentage. When specified, the output becomes: +

+ +
+0,File-bytes,Entropy,Chi-square,Chi-square-p-val,Mean,Monte-Carlo-Pi,Serial-Correlation
+1,file_length,entropy,chi_square,chi_square_p_val,mean,Pi_value,correlation
+
+ +

+If the -c option is specified, additional records are appended to the terse mode output which contain the character counts: