diff --git a/README.md b/README.md index a535e4f..de37c78 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,16 @@ # ocr-space-api + Allow to access ORC.SPACE api to send images and get the result More Details: https://ocr.space/ocrapi -**IMPORTANT** The OCR is provided by ocr space. I don't have anything with them, I just want to help sharing this library. +**IMPORTANT** The OCR is provided by ocr space. I don't have anything with them, I just want to help sharing this library. ## Instalation ### First - Register and Get your API key -Get you API key at https://ocr.space/ocrapi ( Direct link : http://eepurl.com/bOLOcf ). Just, follow their steps. +Get you API key at https://ocr.space/ocrapi ( Direct link : http://eepurl.com/bOLOcf ). Just, follow their steps. ### Second - Install npm package @@ -24,7 +25,7 @@ You can see and example at the folder `example` ```javascript const ocrSpaceApi = require('ocr-space-api'); -var options = { +var options = { apikey: '', language: 'por', // Português imageFormat: 'image/png', // Image Type (Only png ou gif is acceptable at the moment i wrote this) @@ -43,20 +44,34 @@ ocrSpaceApi.parseImageFromLocalFile(imageFilePath, options) console.log('ERROR:', err); }); + +// To process RAW data image use the parseImageFromBuffer call +// for example in puppeteer scripts you could use the following for img + +{ + const buffer = await response.buffer(); + //console.log('data:image/png;base64,' + buffer.toString('base64')); + try { + let parsedResult = await ocrSpaceApi.parseImageFromBuffer(buffer, options); + let captcha = parsedResult.parsedText.replace(/\s/g, '').trim(); + } catch (error) { + console.log('OCR ERROR:', error); + } +} + ``` ### Options ##### Language - * Portuguese = `por` - * English = `eng` - * German = `ger` - * Italian = `ita` - * and mode details go to: https://ocr.space/ocrapi#PostParameters + +- Portuguese = `por` +- English = `eng` +- German = `ger` +- Italian = `ita` +- and mode details go to: https://ocr.space/ocrapi#PostParameters ##### isOverlayRequired `Default = False` Allows you to specify if the image/pdf text overlay is required. Overlay could be used to show the text over the image - - diff --git a/lib/ocrSpaceApi.js b/lib/ocrSpaceApi.js index 46724f5..178fb0a 100644 --- a/lib/ocrSpaceApi.js +++ b/lib/ocrSpaceApi.js @@ -1,16 +1,18 @@ // Load Modules const fs = require('fs'); -const path = require('path'); const request = require('request'); const util = require('util'); +//const Q = require('q'); + // Set default data const _defaultOcrSpaceUrl = 'https://api.ocr.space/parse/image'; const _base64ImagePattern = 'data:%s;base64,%s'; const _defaultImageType = 'image/gif'; -const _defaultLanguade = 'eng'; -const _isOverlayRequired = 'false'; +const _defaultLanguade = 'por'; +const _isOverlayRequired = false; + /** * Run the request to OCR.SPACE and return the result. @@ -18,61 +20,125 @@ const _isOverlayRequired = 'false'; * * Object {options} * - * { - * apikey: '', - * language: 'por', - * isOverlayRequired: true, - * url: 'https://api.ocr.space/parse/image' , - * imageFormat: 'image/gif' - * } + * { + * apikey: '', + * language: 'por', + * isOverlayRequired: true, + * url: 'https://api.ocr.space/parse/image' , + * imageFormat: 'image/gif' + * } * * @param {string} localFile path to local image file * @param {string} url url to image * @param {object} options object with the options * @throws {string} error */ -var _sendRequestToOcrSpace = function(localFile, url, options) { - return new Promise(function(resolve, reject) { - if (!options.apikey) - reject("API key required"); - if (localFile && !fs.existsSync(localFile)) - reject("File not found: " + localFile); - - let req = request.post(_defaultOcrSpaceUrl, (error, response) => { - if (error) - reject(error); - - let data = response.toJSON(); - if (data.statusCode === 200) - resolve(data.body); - else - reject({error: { statusCode: data.statusCode, name: "Error", message: data.body}}); - }); +var _sendRequestToOcrSpace = function(localFile, url, buffer, options) { + return new Promise(function(resolve, reject) { + try { + + if (!options || !options.apikey) { + deferred.reject("API key required"); + } + + // Initialize options, to avoid errors. + if (!options) { + options = {}; + } + + const formOptions = { + language: options.language ? options.language : _defaultLanguade, + apikey: options.apikey, + isOverlayRequired: options.isOverlayRequired ? options.isOverlayRequired : false + }; + + // make string base64 from a local file + if (localFile) { + + if (!fs.existsSync(localFile)) { + deferred.reject("File not exists: " + localFile); + } + + var bitmap = fs.readFileSync(localFile); + var stringBase64File = new Buffer(bitmap).toString('base64'); + formOptions.Base64Image = util.format(_base64ImagePattern, (options.imageFormat) ? options.imageFormat : _defaultImageType, stringBase64File); + + } else if (url) { + formOptions.url = url; + } else if (buffer) { + var stringBase64File = buffer.toString('base64'); + formOptions.Base64Image = util.format(_base64ImagePattern, (options.imageFormat) ? options.imageFormat : _defaultImageType, stringBase64File); + + } else { + deferred.reject("URL image or File image is required."); + } + + const uri = { + method: 'post', + url: options.url ? options.url : _defaultOcrSpaceUrl, + form: formOptions, + headers: { + "content-type": "application/json", + }, + json: true, + }; + + request(uri, function (error, response, ocrParsedResult) { + if (error) { + deferred.reject(error); + } else { - let form = req.form(); - form.append('language', options.language || _defaultLanguade); - form.append('isOverlayRequired', options.isOverlayRequired.toString() || 'false'); - form.append('apikey', options.apikey); - if (url) - form.append('url', url); - else { - switch (localFile.split('.').pop()) { - case 'pdf': - form.append('file', fs.createReadStream(localFile)); - break; - case 'png': - case 'jpg': - case 'jpeg': - let bitmap = fs.readFileSync(localFile); - let stringBase64File = new Buffer(bitmap).toString('base64'); - form.append('Base64Image', util.format(_base64ImagePattern, (options.imageFormat) ? options.imageFormat : _defaultImageType, stringBase64File)); - break; - default: - reject('Filetype not supported.'); - break; - } - } - }); + //Get the parsed results, exit code and error message and details + var parsedResults = ocrParsedResult["ParsedResults"]; + //var ocrExitCode = ocrParsedResult["OCRExitCode"]; + //var isErroredOnProcessing = ocrParsedResult["IsErroredOnProcessing"]; + //var errorMessage = ocrParsedResult["ErrorMessage"]; + //var errorDetails = ocrParsedResult["ErrorDetails"]; + //var processingTimeInMilliseconds = ocrParsedResult["ProcessingTimeInMilliseconds"]; + + if (parsedResults) { + var pageText = ''; + + parsedResults.forEach(function(value) { + var exitCode = value["FileParseExitCode"]; + var parsedText = value["ParsedText"]; + //var errorMessage = responseBody["ParsedTextFileName"]; + //var errorDetails = responseBody["ErrorDetails"]; + + //var textOverlay = responseBody["TextOverlay"]; + + switch (+exitCode) { + case 1: + pageText = parsedText; + break; + case 0: + case -10: + case -20: + case -30: + case -99: + default: + pageText += "Error: " + errorMessage; + break; + } + + }, this); + + // Monta o objeto com os resultados + const result = { + parsedText: pageText, + ocrParsedResult: ocrParsedResult + } + + resolve(result); + } else { + reject(ocrParsedResult); + } + } + }); + } catch (error) { + reject(error); + } + }); } /** @@ -81,20 +147,20 @@ var _sendRequestToOcrSpace = function(localFile, url, options) { * * Object {options} * - * { - * apikey: '', - * language: 'por', - * isOverlayRequired: true, - * url: 'https://api.ocr.space/parse/image' , - * imageFormat: 'image/gif' - * } + * { + * apikey: '', + * language: 'por', + * isOverlayRequired: true, + * url: 'https://api.ocr.space/parse/image' , + * imageFormat: 'image/gif' + * } * * @param {string} localFile path to local image file * @param {object} options object with the options * @throws {string} error */ -exports.parseFromLocalFile = function(localFile, options) { - return _sendRequestToOcrSpace(localFile, undefined, options); +exports.parseImageFromLocalFile = function(localFile, options) { + return _sendRequestToOcrSpace(localFile, undefined, undefined, options); } /** @@ -103,19 +169,26 @@ exports.parseFromLocalFile = function(localFile, options) { * * Object {options} * - * { - * apikey: '', - * language: 'por', - * isOverlayRequired: true, - * url: 'https://api.ocr.space/parse/image' , - * imageFormat: 'image/gif' - * } + * { + * apikey: '', + * language: 'por', + * isOverlayRequired: true, + * url: 'https://api.ocr.space/parse/image' , + * imageFormat: 'image/gif' + * } * * @param {string} imageUrl url to a image file * @param {object} options object with the options * @throws {string} error */ exports.parseImageFromUrl = function(imageUrl, options) { - return _sendRequestToOcrSpace(undefined, imageUrl, options); + return _sendRequestToOcrSpace(undefined, imageUrl, undefined, options); +} + +/** + * Send a buffer to parse to TEXT with the OCR.SPACE.API + */ +exports.parseImageFromBuffer = function(buffer, options) { + return _sendRequestToOcrSpace(undefined, undefined, buffer, options); }