From 5cb7c1dba78b025ff333a202b27f04a2230c9da9 Mon Sep 17 00:00:00 2001
From: Samo Penic <samo.penic@gmail.com>
Date: Fri, 16 Nov 2018 21:12:32 +0000
Subject: [PATCH] recognition is a bit more robust....
---
aoiOcr.py | 2
sid_process.py | 56 ++++++++++++++++-----------
Ocr.py | 4 +
3 files changed, 37 insertions(+), 25 deletions(-)
diff --git a/Ocr.py b/Ocr.py
index f680669..662cb0b 100644
--- a/Ocr.py
+++ b/Ocr.py
@@ -220,7 +220,7 @@
return "x"
if self.settings is not None:
sid_mask=self.settings.get("sid_mask", None)
- es = getSID(
+ es,err,warn = getSID(
self.img[
int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight),
int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
@@ -228,6 +228,8 @@
self.sid_classifier,
sid_mask
)
+ [self.errors.append(e) for e in err]
+ [self.warnings.append(w) for w in warn]
return es
diff --git a/aoiOcr.py b/aoiOcr.py
index 02eef68..41b45f0 100644
--- a/aoiOcr.py
+++ b/aoiOcr.py
@@ -2,7 +2,7 @@
from sklearn.externals import joblib
-settings = {"sid_mask": "11xx0xxx", "answer_treshold": 0.25}
+settings = {"sid_mask": "61xx0xxx", "answer_treshold": 0.25}
classifier = joblib.load("filename.joblib")
#p = Paper(filename="testpage300dpi_scan1.png")
diff --git a/sid_process.py b/sid_process.py
index 48326c0..14beb68 100644
--- a/sid_process.py
+++ b/sid_process.py
@@ -79,23 +79,32 @@
return sid_no
-def segment_by_sid_len(image,sid_len, classifier):
- sid_no=""
- #find biggest block of pixels
+def segment_by_sid_len(image, sid_mask, classifier):
+ sid_no = ""
+ sid_len = len(sid_mask)
+ if sid_mask[0] == "1":
+ move_left = 45
+ elif sid_mask[0] == "x":
+ move_left = 55
+ else:
+ move_left = 0
+ # find biggest block of pixels
- image1=cv2.morphologyEx(image,cv2.MORPH_DILATE, kernel(5,25), iterations=3)
- cv2.imwrite("sidblock1.png",image1)
+ image1 = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 25), iterations=3)
+ cv2.imwrite("sidblock1.png", image1)
im2, ctrs, hier = cv2.findContours(
image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
- sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.contourArea(ctr)) #get bigges contour
+ sorted_ctrs = sorted(
+ ctrs, key=lambda ctr: cv2.contourArea(ctr)
+ ) # get bigges contour
x, y, w, h = cv2.boundingRect(sorted_ctrs[-1])
- image=image[y:y+h,x+25:x+w-25]
- cv2.imwrite("sidblock2.png",image)
+ image = image[y : y + h, x + 25 - move_left : x + w - 25]
+ cv2.imwrite("sidblock2.png", image)
imgHeight, imgWidth = image.shape[0:2]
- numWidth=int(imgWidth/(sid_len))
- for i in range(0,sid_len):
- num=image[:,i*numWidth:(i+1)*numWidth]
+ numWidth = int(imgWidth / (sid_len))
+ for i in range(0, sid_len):
+ num = image[:, i * numWidth : (i + 1) * numWidth]
num = img_as_ubyte(num < 128)
num = cv2.resize(num, (32, 32))
@@ -106,6 +115,7 @@
def getSID(image, classifier, sid_mask):
+ sid_warn = []
image = 255 - image
image = img_as_ubyte(image > 100)
cv2.imwrite("enSID0.png", image)
@@ -115,7 +125,8 @@
image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4)
# Again noise removal after closing
- #image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
+ # image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1)
+ # don't do too much noise removal.
image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1)
# Skeletonization
@@ -129,21 +140,20 @@
# Thining again
image = img_as_ubyte(morphology.skeletonize(image > 0.5))
image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10))
- cv2.imwrite("enhancedSID.png",image)
+ cv2.imwrite("enhancedSID.png", image)
im2, ctrs, hier = cv2.findContours(
image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
sid_no = ""
- #sid_len = len(sid_mask)
- #sid_no = segment_by_sid_len(image, sid_len, classifier)
- #if sid_mask is not None:
- print(len(sid_mask),len(sorted_ctrs))
- #if len(sid_mask)==len(sorted_ctrs):
- sid_no=segment_by_contours(image,sorted_ctrs[1:],classifier)
+ print(len(sid_mask), len(sorted_ctrs))
+ sid_no = segment_by_contours(
+ image, sorted_ctrs[1:], classifier
+ ) # we remove largest contour that surrounds whole image
print(sid_no)
- if(len(sid_no)!=len(sid_mask)):
- print("Ooops have to find another way")
- sid_no=segment_by_sid_len(image,len(sid_mask),classifier)
- return sid_no
+ if len(sid_no) != len(sid_mask):
+ #print("Ooops have to find another way")
+ sid_warn.append("Trying second SID algorithm.")
+ sid_no = segment_by_sid_len(image, sid_mask, classifier)
+ return (sid_no, [], sid_warn)
--
Gitblit v1.8.0