From d5c694ac711ca3b434bf16bd920b90d1a7e758c4 Mon Sep 17 00:00:00 2001
From: Samo Penic <samo.penic@gmail.com>
Date: Sat, 17 Nov 2018 09:57:31 +0000
Subject: [PATCH] Improving the robustness of all three algorithms.
---
aoiOcr.py | 33 ++++++++++++----
sid_process.py | 82 ++++++++++++++++++++++++++++++++++++-----
Ocr.py | 2
template-8.png | 0
4 files changed, 97 insertions(+), 20 deletions(-)
diff --git a/Ocr.py b/Ocr.py
index 662cb0b..f4447e5 100644
--- a/Ocr.py
+++ b/Ocr.py
@@ -222,7 +222,7 @@
sid_mask=self.settings.get("sid_mask", None)
es,err,warn = getSID(
self.img[
- int(0.045 * self.imgHeight) : int(0.085 * self.imgHeight),
+ int(0.04 * self.imgHeight) : int(0.095 * self.imgHeight),
int(0.7 * self.imgWidth) : int(0.99 * self.imgWidth),
],
self.sid_classifier,
diff --git a/aoiOcr.py b/aoiOcr.py
index 0d59684..bb74503 100644
--- a/aoiOcr.py
+++ b/aoiOcr.py
@@ -3,14 +3,24 @@
from glob import glob
-settings = {"sid_mask": "61xx0xxx", "answer_treshold": 0.25}
+settings = {"sid_mask": "64xx0xxx", "answer_treshold": 0.25}
classifier = joblib.load("filename.joblib")
-#p = Paper(filename="testpage300dpi_scan1.png")
-#p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings=settings)
+# p = Paper(filename="testpage300dpi_scan1.png")
+#p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings={"sid_mask": "11xx0xxx", "answer_treshold": 0.25})
#p=Paper(filename='processed_scans/20141016095134535_0006.tif', sid_classifier=classifier, settings=settings)
-#p=Paper(filename='processed_scans/20151111080408825_0001.tif', sid_classifier=classifier, settings=settings)
-p=Paper(filename='processed_scans/20151028145444607_0028.tif', sid_classifier=classifier, settings=settings)
+#p = Paper(filename="processed_scans/20151111080408825_0001.tif",sid_classifier=classifier,settings=settings,)
+#p=Paper(filename='processed_scans/20151028145444607_0028.tif', sid_classifier=classifier, settings=settings)
+pa = [
+ "processed_scans/20141016095134535_0006.tif",
+ "processed_scans/20141016095134535_0028.tif",
+ "processed_scans/20141016095134535_0028.tif",
+ "processed_scans/20141016095134535_0037.tif",
+ "processed_scans/20141021095744144_0005.tif",
+ "processed_scans/20141021095744144_0009.tif",
+ "processed_scans/20141028095553745_0018.tif",
+]
+p=Paper(filename=pa[6], sid_classifier=classifier, settings=settings)
# print(p.QRData)
# print(p.errors)
@@ -23,8 +33,13 @@
print(p.get_paper_ocr_data())
-exit(0)
-filelist = glob("processed_scans/*.tif")
-for f in filelist:
- print(f,Paper(filename=f, sid_classifier=classifier, settings=settings).get_paper_ocr_data())
+filelist = glob("processed_scans/*.tif")
+for f in sorted(filelist):
+ print("processing: {}".format(f))
+ print(
+ f,
+ Paper(
+ filename=f, sid_classifier=classifier, settings=settings
+ ).get_paper_ocr_data(),
+ )
diff --git a/sid_process.py b/sid_process.py
index 4674c0e..1f93d3c 100644
--- a/sid_process.py
+++ b/sid_process.py
@@ -57,6 +57,13 @@
def kernel(x, y):
return np.ones((x, y), np.uint8)
+def sid_compare(sid_no, sid_mask):
+ for s,es in zip(sid_mask,sid_no):
+ if s!='x' and s!=es:
+ return False
+ return True
+
+
def segment_by_contours(image, sorted_ctrs, classifier):
sid_no = ""
@@ -77,7 +84,7 @@
return sid_no
-def segment_by_sid_len(image, sid_mask, classifier):
+def segment_by_sid_len(image, original_image, sid_mask, classifier):
sid_no = ""
sid_len = len(sid_mask)
if sid_mask[0] == "1":
@@ -86,9 +93,11 @@
move_left = 55
else:
move_left = 0
+ # Remove noise
+ image2 = cv2.morphologyEx(original_image, cv2.MORPH_OPEN, kernel(2, 2), iterations=7)
# find biggest block of pixels
-
- image1 = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 25), iterations=4)
+ image1 = cv2.morphologyEx(image2, cv2.MORPH_DILATE, kernel(5, 25), iterations=4)
+ image1=img_as_ubyte(image1>50)
cv2.imwrite("sidblock1.png", image1)
im2, ctrs, hier = cv2.findContours(
image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
@@ -97,7 +106,7 @@
ctrs, key=lambda ctr: cv2.contourArea(ctr)
) # get bigges contour
x, y, w, h = cv2.boundingRect(sorted_ctrs[-1])
- image = image[y : y + h, x + 25 - move_left : x + w - 25]
+ image = image[y : y + h, x + 25 - move_left : x + w - 40] #+25,-25
cv2.imwrite("sidblock2.png", image)
imgHeight, imgWidth = image.shape[0:2]
numWidth = int(imgWidth / (sid_len))
@@ -111,14 +120,60 @@
sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
return sid_no
+def segment_by_7segments(image,original_image,sid_mask,classifier):
+ block_image = cv2.morphologyEx(original_image, cv2.MORPH_CLOSE, kernel(2, 2), iterations=10)
+ block_image =img_as_ubyte(block_image<50)
+ cv2.imwrite("sid_3rd1.png", block_image)
+ template = cv2.imread("template-8.png", 0)
+ w, h = template.shape[::-1]
+ res = cv2.matchTemplate(block_image, template, cv2.TM_CCOEFF_NORMED)
+ loc = np.where(res >= 0.75)
+ cimg = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+ loc_filtered_x=[]
+ loc_filtered_y=[]
+ for pt in zip(*loc[::-1]):
+ pt=(pt[0]-10,pt[1]-10)
+ loc_filtered_y.append(pt[1])
+ loc_filtered_x.append(pt[0])
+# points.append(pt)
+ #filter points
+ if(len(loc_filtered_x)==0):
+ return ""
+ loc_filtered_x, loc_filtered_y = zip(
+ *sorted(zip(loc_filtered_x, loc_filtered_y))
+ )
+ a = np.diff(loc_filtered_x) > int(w/2)
+ a = np.append(a, True)
+ loc_filtered_x = np.array(loc_filtered_x)
+ loc_filtered_y = np.array(loc_filtered_y)
+ points = [loc_filtered_y[a], loc_filtered_x[a]]
+ for pt in zip(*points[::-1]):
+ cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
+ cv2.imwrite("sid_3rd2.png", cimg)
+
+ sid_no=""
+ for i,pt in enumerate(zip(*points[::-1])):
+ num=image[pt[1]:pt[1] + h, pt[0]:pt[0]+w]
+ #cv2.imwrite("sid_3no_{}.png".format(i), num)
+ num = img_as_ubyte(num < 128)
+ try:
+ num = cv2.resize(num, (32, 32))
+ except:
+ return ""
+ cv2.imwrite("sid_3no_{}.png".format(i), num)
+ sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0])
+
+ return sid_no
def getSID(image, classifier, sid_mask):
sid_warn = []
+ sid_err=[]
image = 255 - image
+ image_original=image.copy()
image = img_as_ubyte(image > 100)
cv2.imwrite("enSID0.png", image)
# Remove noise
- image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=1)
+ image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3)
# Closing. Connect non connected parts
image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4)
# Again noise removal after closing
@@ -144,14 +199,21 @@
)
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
- sid_no = ""
print(len(sid_mask), len(sorted_ctrs))
sid_no = segment_by_contours(
image, sorted_ctrs[1:], classifier
) # we remove largest contour that surrounds whole image
print(sid_no)
- if len(sid_no) != len(sid_mask):
- #print("Ooops have to find another way")
+ if len(sid_no) != len(sid_mask) or not sid_compare(sid_no,sid_mask):
sid_warn.append("Trying second SID algorithm.")
- sid_no = segment_by_sid_len(image, sid_mask, classifier)
- return (sid_no, [], sid_warn)
+ sid_no = segment_by_7segments(image, image_original, sid_mask, classifier)
+ print(sid_no)
+ if(len(sid_no))!=len(sid_mask):
+ sid_no = segment_by_sid_len(image, image_original, sid_mask, classifier)
+ sid_warn.append("Trying third SID algorithm.")
+
+
+ if not sid_compare(sid_no, sid_mask):
+ sid_err=['Wrong SID!']
+
+ return (sid_no, sid_err, sid_warn)
diff --git a/template-8.png b/template-8.png
new file mode 100644
index 0000000..cb2063b
--- /dev/null
+++ b/template-8.png
Binary files differ
--
Gitblit v1.8.0