Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 25 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
# ocr-space-api

Allow to access ORC.SPACE api to send images and get the result

More Details: https://ocr.space/ocrapi

**IMPORTANT** The OCR is provided by ocr space. I don't have anything with them, I just want to help sharing this library.
**IMPORTANT** The OCR is provided by ocr space. I don't have anything with them, I just want to help sharing this library.

## Instalation

### First - Register and Get your API key

Get you API key at https://ocr.space/ocrapi ( Direct link : http://eepurl.com/bOLOcf ). Just, follow their steps.
Get you API key at https://ocr.space/ocrapi ( Direct link : http://eepurl.com/bOLOcf ). Just, follow their steps.

### Second - Install npm package

Expand All @@ -24,7 +25,7 @@ You can see and example at the folder `example`
```javascript
const ocrSpaceApi = require('ocr-space-api');

var options = {
var options = {
apikey: '<your_api_key_here>',
language: 'por', // Português
imageFormat: 'image/png', // Image Type (Only png ou gif is acceptable at the moment i wrote this)
Expand All @@ -43,20 +44,34 @@ ocrSpaceApi.parseImageFromLocalFile(imageFilePath, options)
console.log('ERROR:', err);
});


// To process RAW data image use the parseImageFromBuffer call
// for example in puppeteer scripts you could use the following for img

{
const buffer = await response.buffer();
//console.log('data:image/png;base64,' + buffer.toString('base64'));
try {
let parsedResult = await ocrSpaceApi.parseImageFromBuffer(buffer, options);
let captcha = parsedResult.parsedText.replace(/\s/g, '').trim();
} catch (error) {
console.log('OCR ERROR:', error);
}
}

```

### Options

##### Language
* Portuguese = `por`
* English = `eng`
* German = `ger`
* Italian = `ita`
* and mode details go to: https://ocr.space/ocrapi#PostParameters

- Portuguese = `por`
- English = `eng`
- German = `ger`
- Italian = `ita`
- and mode details go to: https://ocr.space/ocrapi#PostParameters

##### isOverlayRequired

`Default = False`
Allows you to specify if the image/pdf text overlay is required. Overlay could be used to show the text over the image


209 changes: 141 additions & 68 deletions lib/ocrSpaceApi.js
Original file line number Diff line number Diff line change
@@ -1,78 +1,144 @@

// Load Modules
const fs = require('fs');
const path = require('path');
const request = require('request');
const util = require('util');
//const Q = require('q');


// Set default data
const _defaultOcrSpaceUrl = 'https://api.ocr.space/parse/image';
const _base64ImagePattern = 'data:%s;base64,%s';
const _defaultImageType = 'image/gif';
const _defaultLanguade = 'eng';
const _isOverlayRequired = 'false';
const _defaultLanguade = 'por';
const _isOverlayRequired = false;


/**
* Run the request to OCR.SPACE and return the result.
* @example
*
* Object {options}
*
* {
* apikey: '<YOUR_API_KEY_HERE>',
* language: 'por',
* isOverlayRequired: true,
* url: 'https://api.ocr.space/parse/image' ,
* imageFormat: 'image/gif'
* }
* {
* apikey: '<YOUR_API_KEY_HERE>',
* language: 'por',
* isOverlayRequired: true,
* url: 'https://api.ocr.space/parse/image' ,
* imageFormat: 'image/gif'
* }
*
* @param {string} localFile path to local image file
* @param {string} url url to image
* @param {object} options object with the options
* @throws {string} error
*/
var _sendRequestToOcrSpace = function(localFile, url, options) {
return new Promise(function(resolve, reject) {
if (!options.apikey)
reject("API key required");
if (localFile && !fs.existsSync(localFile))
reject("File not found: " + localFile);

let req = request.post(_defaultOcrSpaceUrl, (error, response) => {
if (error)
reject(error);

let data = response.toJSON();
if (data.statusCode === 200)
resolve(data.body);
else
reject({error: { statusCode: data.statusCode, name: "Error", message: data.body}});
});
var _sendRequestToOcrSpace = function(localFile, url, buffer, options) {
return new Promise(function(resolve, reject) {
try {

if (!options || !options.apikey) {
deferred.reject("API key required");
}

// Initialize options, to avoid errors.
if (!options) {
options = {};
}

const formOptions = {
language: options.language ? options.language : _defaultLanguade,
apikey: options.apikey,
isOverlayRequired: options.isOverlayRequired ? options.isOverlayRequired : false
};

// make string base64 from a local file
if (localFile) {

if (!fs.existsSync(localFile)) {
deferred.reject("File not exists: " + localFile);
}

var bitmap = fs.readFileSync(localFile);
var stringBase64File = new Buffer(bitmap).toString('base64');
formOptions.Base64Image = util.format(_base64ImagePattern, (options.imageFormat) ? options.imageFormat : _defaultImageType, stringBase64File);

} else if (url) {
formOptions.url = url;
} else if (buffer) {
var stringBase64File = buffer.toString('base64');
formOptions.Base64Image = util.format(_base64ImagePattern, (options.imageFormat) ? options.imageFormat : _defaultImageType, stringBase64File);

} else {
deferred.reject("URL image or File image is required.");
}

const uri = {
method: 'post',
url: options.url ? options.url : _defaultOcrSpaceUrl,
form: formOptions,
headers: {
"content-type": "application/json",
},
json: true,
};

request(uri, function (error, response, ocrParsedResult) {
if (error) {
deferred.reject(error);
} else {

let form = req.form();
form.append('language', options.language || _defaultLanguade);
form.append('isOverlayRequired', options.isOverlayRequired.toString() || 'false');
form.append('apikey', options.apikey);
if (url)
form.append('url', url);
else {
switch (localFile.split('.').pop()) {
case 'pdf':
form.append('file', fs.createReadStream(localFile));
break;
case 'png':
case 'jpg':
case 'jpeg':
let bitmap = fs.readFileSync(localFile);
let stringBase64File = new Buffer(bitmap).toString('base64');
form.append('Base64Image', util.format(_base64ImagePattern, (options.imageFormat) ? options.imageFormat : _defaultImageType, stringBase64File));
break;
default:
reject('Filetype not supported.');
break;
}
}
});
//Get the parsed results, exit code and error message and details
var parsedResults = ocrParsedResult["ParsedResults"];
//var ocrExitCode = ocrParsedResult["OCRExitCode"];
//var isErroredOnProcessing = ocrParsedResult["IsErroredOnProcessing"];
//var errorMessage = ocrParsedResult["ErrorMessage"];
//var errorDetails = ocrParsedResult["ErrorDetails"];
//var processingTimeInMilliseconds = ocrParsedResult["ProcessingTimeInMilliseconds"];

if (parsedResults) {
var pageText = '';

parsedResults.forEach(function(value) {
var exitCode = value["FileParseExitCode"];
var parsedText = value["ParsedText"];
//var errorMessage = responseBody["ParsedTextFileName"];
//var errorDetails = responseBody["ErrorDetails"];

//var textOverlay = responseBody["TextOverlay"];

switch (+exitCode) {
case 1:
pageText = parsedText;
break;
case 0:
case -10:
case -20:
case -30:
case -99:
default:
pageText += "Error: " + errorMessage;
break;
}

}, this);

// Monta o objeto com os resultados
const result = {
parsedText: pageText,
ocrParsedResult: ocrParsedResult
}

resolve(result);
} else {
reject(ocrParsedResult);
}
}
});
} catch (error) {
reject(error);
}
});
}

/**
Expand All @@ -81,20 +147,20 @@ var _sendRequestToOcrSpace = function(localFile, url, options) {
*
* Object {options}
*
* {
* apikey: '<YOUR_API_KEY_HERE>',
* language: 'por',
* isOverlayRequired: true,
* url: 'https://api.ocr.space/parse/image' ,
* imageFormat: 'image/gif'
* }
* {
* apikey: '<YOUR_API_KEY_HERE>',
* language: 'por',
* isOverlayRequired: true,
* url: 'https://api.ocr.space/parse/image' ,
* imageFormat: 'image/gif'
* }
*
* @param {string} localFile path to local image file
* @param {object} options object with the options
* @throws {string} error
*/
exports.parseFromLocalFile = function(localFile, options) {
return _sendRequestToOcrSpace(localFile, undefined, options);
exports.parseImageFromLocalFile = function(localFile, options) {
return _sendRequestToOcrSpace(localFile, undefined, undefined, options);
}

/**
Expand All @@ -103,19 +169,26 @@ exports.parseFromLocalFile = function(localFile, options) {
*
* Object {options}
*
* {
* apikey: '<YOUR_API_KEY_HERE>',
* language: 'por',
* isOverlayRequired: true,
* url: 'https://api.ocr.space/parse/image' ,
* imageFormat: 'image/gif'
* }
* {
* apikey: '<YOUR_API_KEY_HERE>',
* language: 'por',
* isOverlayRequired: true,
* url: 'https://api.ocr.space/parse/image' ,
* imageFormat: 'image/gif'
* }
*
* @param {string} imageUrl url to a image file
* @param {object} options object with the options
* @throws {string} error
*/
exports.parseImageFromUrl = function(imageUrl, options) {
return _sendRequestToOcrSpace(undefined, imageUrl, options);
return _sendRequestToOcrSpace(undefined, imageUrl, undefined, options);
}

/**
* Send a buffer to parse to TEXT with the OCR.SPACE.API
*/
exports.parseImageFromBuffer = function(buffer, options) {
return _sendRequestToOcrSpace(undefined, undefined, buffer, options);
}