diff --git a/src/ent.c b/src/ent.c index bf981bc..47e6971 100644 --- a/src/ent.c +++ b/src/ent.c @@ -59,6 +59,7 @@ static void help(void) printf("\n -c Print occurrence counts"); printf("\n -f Fold upper to lower case letters"); printf("\n -t Terse output in CSV format"); + printf("\n -p Include Chi-square p-value in terse output (as decimal)"); printf("\n -u Print this message\n"); printf("\nVersion " VERSION); printf("\nBy John Walker"); @@ -107,9 +108,10 @@ int main(int argc, char *argv[]) int counts = FALSE, /* Print character counts */ fold = FALSE, /* Fold upper to lower */ binary = FALSE, /* Treat input as a bitstream */ - terse = FALSE; /* Terse (CSV format) output */ + terse = FALSE, /* Terse (CSV format) output */ + csp = FALSE; /* Terse includes Chi^2 p-value */ - while ((opt = getopt(argc, argv, "bcftuv?BCFTUV")) != -1) { + while ((opt = getopt(argc, argv, "bcfptuv?BCFPTUV")) != -1) { switch (toISOlower(opt)) { case 'b': binary = TRUE; @@ -123,6 +125,10 @@ int main(int argc, char *argv[]) fold = TRUE; break; + case 'p': + csp = TRUE; + break; + case 't': terse = TRUE; break; @@ -200,22 +206,31 @@ int main(int argc, char *argv[]) } fclose(fp); - /* Complete calculation and return sequence metrics */ + /* Complete calculation */ rt_end(&ent, &chisq, &mean, &montepi, &scc); - if (terse) { - printf("0,File-%ss,Entropy,Chi-square,Mean,Monte-Carlo-Pi,Serial-Correlation\n", - binary ? "bit" : "byte"); - printf("1,%ld,%f,%f,%f,%f,%f\n", - totalc, ent, chisq, mean, montepi, scc); - } - /* Calculate probability of observed distribution occurring from the results of the Chi-Square test */ chip = pochisq(chisq, (binary ? 1 : 255)); + /* Return sequence metrics */ + + if (terse) { + if (csp) { + printf("0,File-%ss,Entropy,Chi-square,Chi-square-p-val,Mean,Monte-Carlo-Pi,Serial-Correlation\n", + binary ? "bit" : "byte"); + printf("1,%ld,%f,%f,%f,%f,%f,%f\n", + totalc, ent, chisq, chip, mean, montepi, scc); + } else { + printf("0,File-%ss,Entropy,Chi-square,Mean,Monte-Carlo-Pi,Serial-Correlation\n", + binary ? "bit" : "byte"); + printf("1,%ld,%f,%f,%f,%f,%f\n", + totalc, ent, chisq, mean, montepi, scc); + } + } + /* Print bin counts if requested */ if (counts) { diff --git a/src/ent.html b/src/ent.html index 77e2e59..f153758 100644 --- a/src/ent.html +++ b/src/ent.html @@ -127,7 +127,7 @@
+Specifying -p in conjunction with -t includes the Chi-squared p-value in the CSV output. Note that it is provided as decimal, not as a percentage. When specified, the output becomes: +
+ ++0,File-bytes,Entropy,Chi-square,Chi-square-p-val,Mean,Monte-Carlo-Pi,Serial-Correlation +1,file_length,entropy,chi_square,chi_square_p_val,mean,Pi_value,correlation ++ +
+If the -c option is specified, additional records are appended to the terse mode output which contain the character counts: