99#include "ccx_encoders_helpers.h"
1010#include "ccx_encoders_spupng.h"
1111#include "ocr.h"
12- #undef OCR_DEBUG
1312
1413struct ocrCtx
1514{
@@ -686,7 +685,6 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
686685 TessResultIteratorDelete (ri );
687686 }
688687 // End Color Detection
689- freep (& text_out );
690688 boxDestroy (& crop_points );
691689
692690 pixDestroy (& pix );
@@ -698,47 +696,31 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
698696 return text_out ;
699697}
700698
701- void erode (png_color * palette , png_byte * alpha , uint8_t * bitmap , int w , int h , int nb_color )
699+ void erode (png_color * palette , png_byte * alpha , uint8_t * bitmap , int w , int h , int nb_color , int background_index )
702700{
703- int background_index ;
704- for (background_index = 0 ; background_index < nb_color ; background_index ++ )
705- {
706- if (alpha [background_index ])
707- {
708- break ;
709- }
710- }
711701 // we will use a 2*2 kernel for the erosion
712702 for (int row = 0 ; row < h - 1 ; row ++ )
713703 {
714704 for (int col = 0 ; col < w - 1 ; col ++ )
715705 {
716- if (alpha [ bitmap [row * w + col ]] || alpha [ bitmap [(row + 1 ) * w + col ]] ||
717- alpha [ bitmap [row * w + (col + 1 )]] || alpha [ bitmap [(row + 1 ) * w + (col + 1 )]] )
706+ if (bitmap [row * w + col ] == background_index || bitmap [(row + 1 ) * w + col ] == background_index ||
707+ bitmap [row * w + (col + 1 )] == background_index || bitmap [(row + 1 ) * w + (col + 1 )] == background_index )
718708 {
719709 bitmap [row * w + col ] = background_index ;
720710 }
721711 }
722712 }
723713}
724714
725- void dilate (png_color * palette , png_byte * alpha , uint8_t * bitmap , int w , int h , int nb_color )
715+ void dilate (png_color * palette , png_byte * alpha , uint8_t * bitmap , int w , int h , int nb_color , int foreground_index )
726716{
727- int foreground_index ;
728- for (foreground_index = 0 ; foreground_index < nb_color ; foreground_index ++ )
729- {
730- if (!alpha [foreground_index ])
731- {
732- break ;
733- }
734- }
735717 // we will use a 2*2 kernel for the erosion
736718 for (int row = 0 ; row < h - 1 ; row ++ )
737719 {
738720 for (int col = 0 ; col < w - 1 ; col ++ )
739721 {
740- if (!( alpha [ bitmap [row * w + col ]] && alpha [ bitmap [(row + 1 ) * w + col ]] &&
741- alpha [ bitmap [row * w + (col + 1 )]] && alpha [ bitmap [(row + 1 ) * w + (col + 1 )]] ))
722+ if (( bitmap [row * w + col ] == foreground_index && bitmap [(row + 1 ) * w + col ] == foreground_index &&
723+ bitmap [row * w + (col + 1 )] == foreground_index && bitmap [(row + 1 ) * w + (col + 1 )] == foreground_index ))
742724 {
743725 bitmap [row * w + col ] = foreground_index ;
744726 }
@@ -769,6 +751,7 @@ static int quantize_map(png_byte *alpha, png_color *palette,
769751 */
770752 uint32_t * mcit = NULL ;
771753 struct transIntensity ti = {alpha , palette };
754+ int text_color , text_bg_color ;
772755
773756 int ret = 0 ;
774757
@@ -835,6 +818,14 @@ static int quantize_map(png_byte *alpha, png_color *palette,
835818 max_ind = j ;
836819 }
837820 }
821+
822+ // Assume second most frequent color to be text background (first is alpha channel)
823+ if (i == 1 )
824+ text_bg_color = iot [max_ind ];
825+ // Assume third most frequent color to be text color
826+ if (i == 2 )
827+ text_color = iot [max_ind ];
828+
838829 for (j = i ; j > 0 && max_ind < mcit [j - 1 ]; j -- )
839830 {
840831 mcit [j ] = mcit [j - 1 ];
@@ -878,8 +869,8 @@ static int quantize_map(png_byte *alpha, png_color *palette,
878869 palette [iot [i ]].green = palette [index ].green ;
879870 }
880871 }
881- erode (palette , alpha , bitmap , w , h , nb_color );
882- dilate (palette , alpha , bitmap , w , h , nb_color );
872+ erode (palette , alpha , bitmap , w , h , nb_color , text_bg_color );
873+ dilate (palette , alpha , bitmap , w , h , nb_color , text_color );
883874#ifdef OCR_DEBUG
884875 ccx_common_logging .log_ftn ("Colors present in quantized Image\n" );
885876 for (int i = 0 ; i < nb_color ; i ++ )
@@ -1062,7 +1053,13 @@ char *paraof_ocrtext(struct cc_subtitle *sub, struct encoder_ctx *context)
10621053 len += strlen (rect -> ocr_text );
10631054 }
10641055 if (len <= 0 )
1056+ {
1057+ for (i = 0 , rect = sub -> data ; i < sub -> nb_data ; i ++ , rect ++ )
1058+ {
1059+ freep (& rect -> ocr_text );
1060+ }
10651061 return NULL ;
1062+ }
10661063 else
10671064 {
10681065 str = malloc (len + 1 + 10 ); // Extra space for possible trailing '/n's at the end of tesseract UTF8 text
@@ -1076,7 +1073,7 @@ char *paraof_ocrtext(struct cc_subtitle *sub, struct encoder_ctx *context)
10761073 if (!rect -> ocr_text )
10771074 continue ;
10781075 add_ocrtext2str (str , rect -> ocr_text , context -> encoded_crlf , context -> encoded_crlf_length );
1079- free ( rect -> ocr_text );
1076+ freep ( & rect -> ocr_text );
10801077 }
10811078 return str ;
10821079}
0 commit comments