| commit | author | age | ||
| 9efc18 | 1 | import cv2 |
| SP | 2 | import numpy as np |
| 762a5e | 3 | from skimage import morphology, img_as_ubyte |
| 02e0f7 | 4 | |
| 0d97e9 | 5 | import pkg_resources |
| SP | 6 | |
| 7 | templatefile = '/template-8.png' # always use slash | |
| 8 | template8 = pkg_resources.resource_filename(__name__, templatefile) | |
| 9 | ||
| 9efc18 | 10 | |
| SP | 11 | def kernel(x, y): |
| 6fde5f | 12 | """ |
| SP | 13 | Function greates square kernel of size x and y |
| 14 | """ | |
| 9efc18 | 15 | return np.ones((x, y), np.uint8) |
| SP | 16 | |
| 6fde5f | 17 | |
| SP | 18 | def find_biggest_blob(image, original_image,sid_mask): |
| 19 | if sid_mask[0] == "1": | |
| 20 | move_left = 45 | |
| 21 | elif sid_mask[0] == "x": | |
| 22 | move_left = 55 | |
| 23 | else: | |
| 24 | move_left = 0 | |
| 25 | # Remove noise | |
| 26 | image2 = cv2.morphologyEx( | |
| 27 | original_image, cv2.MORPH_OPEN, kernel(2, 2), iterations=7 | |
| 28 | ) | |
| 29 | # find biggest block of pixels | |
| 30 | image1 = cv2.morphologyEx(image2, cv2.MORPH_DILATE, kernel(5, 25), iterations=4) | |
| 31 | image1 = img_as_ubyte(image1 > 50) | |
| 0d97e9 | 32 | cv2.imwrite("/tmp/sidblock1.png", image1) |
| 6fde5f | 33 | im2, ctrs, hier = cv2.findContours( |
| SP | 34 | image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE |
| 35 | ) | |
| 36 | sorted_ctrs = sorted( | |
| 37 | ctrs, key=lambda ctr: cv2.contourArea(ctr) | |
| 38 | ) # get bigges contour | |
| 39 | x, y, w, h = cv2.boundingRect(sorted_ctrs[-1]) | |
| 40 | image = image[y : y + h, x + 25 - move_left : x + w - 40] # +25,-25 | |
| 41 | return image | |
| 42 | ||
| d5c694 | 43 | def sid_compare(sid_no, sid_mask): |
| 6fde5f | 44 | """ |
| SP | 45 | Function compares student id number with student id mask if the recognised number is valid according to the mask |
| 46 | :param sid_no: | |
| 47 | :param sid_mask: | |
| 48 | :return: True if they match, else False | |
| 49 | """ | |
| 50 | for s, es in zip(sid_mask, sid_no): | |
| 51 | if s != "x" and s != es: | |
| d5c694 | 52 | return False |
| SP | 53 | return True |
| 54 | ||
| 55 | ||
| 6fde5f | 56 | def segment_by_contours(image, original_image, classifier,sid_mask): |
| SP | 57 | """ |
| 58 | First algorithm. it segments numerals with contours. It works with numbers where individual numerals does not touch. | |
| 59 | :param image: | |
| 60 | :param original_image: | |
| 61 | :param classifier: | |
| 62 | :return: student id as a string | |
| 63 | """ | |
| 9efc18 | 64 | |
| 762a5e | 65 | sid_no = "" |
| 6fde5f | 66 | image=find_biggest_blob(image,original_image,sid_mask) |
| c1968c | 67 | cv2.imwrite("/tmp/sid_contour1.png",image) |
| 6fde5f | 68 | im2, ctrs, hier = cv2.findContours( |
| SP | 69 | image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE |
| 70 | ) | |
| 71 | sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0]) | |
| 72 | ||
| 762a5e | 73 | for i, ctr in enumerate(sorted_ctrs): |
| SP | 74 | # Get bounding box |
| 75 | x, y, w, h = cv2.boundingRect(ctr) | |
| 76 | # Getting ROI | |
| 77 | if w < h / 2: | |
| 78 | sid_no = sid_no + "1" | |
| 79 | continue | |
| 80 | roi = image[y : y + h, x : x + w] | |
| 81 | roi = img_as_ubyte(roi < 128) | |
| 82 | roi = cv2.resize(roi, (32, 32)) | |
| 83 | ||
| 84 | # cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2) | |
| 0d97e9 | 85 | cv2.imwrite("/tmp/sid_no_{}.png".format(i), roi) |
| 762a5e | 86 | sid_no = sid_no + str(classifier.predict(roi.reshape(1, -1) / 255.0)[0]) |
| SP | 87 | return sid_no |
| 88 | ||
| 89 | ||
| d5c694 | 90 | def segment_by_sid_len(image, original_image, sid_mask, classifier): |
| 6fde5f | 91 | """ |
| SP | 92 | Third algorithm. It trys to get biggest "blob" in the image and then it cuts it into individual numbers by force. |
| 93 | It has some problems with finding individual numbers, so some tweaking must be done! | |
| 94 | ||
| 95 | :param image: | |
| 96 | :param original_image: | |
| 97 | :param sid_mask: | |
| 98 | :param classifier: | |
| 99 | :return: student id as a string | |
| 100 | """ | |
| 5cb7c1 | 101 | sid_no = "" |
| SP | 102 | sid_len = len(sid_mask) |
| 6fde5f | 103 | image=find_biggest_blob(image,original_image,sid_mask) |
| 0d97e9 | 104 | cv2.imwrite("/tmp/sidblock2.png", image) |
| ac766e | 105 | imgHeight, imgWidth = image.shape[0:2] |
| 5cb7c1 | 106 | numWidth = int(imgWidth / (sid_len)) |
| SP | 107 | for i in range(0, sid_len): |
| 108 | num = image[:, i * numWidth : (i + 1) * numWidth] | |
| ac766e | 109 | num = img_as_ubyte(num < 128) |
| SP | 110 | num = cv2.resize(num, (32, 32)) |
| 0d97e9 | 111 | cv2.imwrite("/tmp/sid_no_{}.png".format(i), num) |
| ac766e | 112 | sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0]) |
| SP | 113 | return sid_no |
| 114 | ||
| 6fde5f | 115 | |
| SP | 116 | def segment_by_7segments(image, original_image, sid_mask, classifier): |
| 117 | """ | |
| 118 | Second attempt. It dilates the image to get all 7 segments wisible as 8888888 then it does pattern matching of 8 with | |
| 119 | pattern image. It works if the scaned gray level is high enough. | |
| 120 | ||
| 121 | :param image: | |
| 122 | :param original_image: | |
| 123 | :param sid_mask: | |
| 124 | :param classifier: | |
| 125 | :return: student id number as a string | |
| 126 | """ | |
| 127 | block_image = cv2.morphologyEx( | |
| 128 | original_image, cv2.MORPH_CLOSE, kernel(2, 2), iterations=10 | |
| 129 | ) | |
| 130 | block_image = img_as_ubyte(block_image < 50) | |
| 0d97e9 | 131 | cv2.imwrite("/tmp/sid_3rd1.png", block_image) |
| SP | 132 | template = cv2.imread(template8, 0) |
| d5c694 | 133 | w, h = template.shape[::-1] |
| SP | 134 | res = cv2.matchTemplate(block_image, template, cv2.TM_CCOEFF_NORMED) |
| 135 | loc = np.where(res >= 0.75) | |
| 136 | cimg = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) | |
| 6fde5f | 137 | loc_filtered_x = [] |
| SP | 138 | loc_filtered_y = [] |
| d5c694 | 139 | for pt in zip(*loc[::-1]): |
| 6fde5f | 140 | pt = (pt[0] - 10, pt[1] - 10) |
| d5c694 | 141 | loc_filtered_y.append(pt[1]) |
| SP | 142 | loc_filtered_x.append(pt[0]) |
| 6fde5f | 143 | # points.append(pt) |
| SP | 144 | # filter points |
| 145 | if len(loc_filtered_x) == 0: | |
| d5c694 | 146 | return "" |
| 6fde5f | 147 | loc_filtered_x, loc_filtered_y = zip(*sorted(zip(loc_filtered_x, loc_filtered_y))) |
| SP | 148 | a = np.diff(loc_filtered_x) > int(w / 2) |
| d5c694 | 149 | a = np.append(a, True) |
| SP | 150 | loc_filtered_x = np.array(loc_filtered_x) |
| 151 | loc_filtered_y = np.array(loc_filtered_y) | |
| 152 | points = [loc_filtered_y[a], loc_filtered_x[a]] | |
| 153 | for pt in zip(*points[::-1]): | |
| 154 | cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2) | |
| 0d97e9 | 155 | cv2.imwrite("/tmp/sid_3rd2.png", cimg) |
| d5c694 | 156 | |
| 6fde5f | 157 | sid_no = "" |
| SP | 158 | for i, pt in enumerate(zip(*points[::-1])): |
| 159 | num = image[pt[1] : pt[1] + h, pt[0] : pt[0] + w] | |
| 160 | # cv2.imwrite("sid_3no_{}.png".format(i), num) | |
| d5c694 | 161 | num = img_as_ubyte(num < 128) |
| SP | 162 | try: |
| 163 | num = cv2.resize(num, (32, 32)) | |
| 164 | except: | |
| 165 | return "" | |
| 0d97e9 | 166 | cv2.imwrite("/tmp/sid_3no_{}.png".format(i), num) |
| d5c694 | 167 | sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0]) |
| SP | 168 | |
| 169 | return sid_no | |
| ac766e | 170 | |
| 6fde5f | 171 | |
| 762a5e | 172 | def getSID(image, classifier, sid_mask): |
| 6fde5f | 173 | """ |
| SP | 174 | Tries different approaches on image to get student id number. Firstly clears image of noise and then skeletonizes |
| 175 | numbers and thickens it until it gets normalized image. It sends it to the segmentation and recognition functions. | |
| 176 | ||
| 177 | Tweak both MORPH_OPEN lines.... | |
| 178 | ||
| 179 | :param image: | |
| 180 | :param classifier: | |
| 181 | :param sid_mask: | |
| 182 | :return: (student_id, error, warning) student id as a string, list of errors and list of warnings during the recognition | |
| 183 | ||
| 184 | """ | |
| 5cb7c1 | 185 | sid_warn = [] |
| 6fde5f | 186 | sid_err = [] |
| 762a5e | 187 | image = 255 - image |
| 6fde5f | 188 | image_original = image.copy() |
| 762a5e | 189 | image = img_as_ubyte(image > 100) |
| 0d97e9 | 190 | cv2.imwrite("/tmp/enSID0.png", image) |
| 6fde5f | 191 | |
| 9efc18 | 192 | # Remove noise |
| d5c694 | 193 | image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3) |
| 6fde5f | 194 | |
| 9efc18 | 195 | # Closing. Connect non connected parts |
| 02e0f7 | 196 | image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 3), iterations=4) |
| SP | 197 | |
| 6fde5f | 198 | # Again noise removal after closing |
| 5cb7c1 | 199 | # image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1) |
| SP | 200 | # don't do too much noise removal. |
| ac766e | 201 | image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1) |
| SP | 202 | |
| 9efc18 | 203 | # Skeletonization |
| 762a5e | 204 | image = img_as_ubyte(morphology.thin(image > 128)) |
| 0d97e9 | 205 | cv2.imwrite("/tmp/enSID1.png", image) |
| 6fde5f | 206 | |
| 9efc18 | 207 | # Stub removal (might not be necessary if thinning instead of skeletonize is used above |
| SP | 208 | # Making lines stronger |
| 209 | image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 5), iterations=1) | |
| 210 | image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(10, 10)) | |
| 6fde5f | 211 | |
| 9efc18 | 212 | # Thining again |
| 762a5e | 213 | image = img_as_ubyte(morphology.skeletonize(image > 0.5)) |
| 9efc18 | 214 | image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10)) |
| 0d97e9 | 215 | cv2.imwrite("/tmp/enhancedSID.png", image) |
| 02e0f7 | 216 | |
| 6fde5f | 217 | sid_no = segment_by_contours(image, image_original, classifier, sid_mask) |
| e0996e | 218 | |
| 6fde5f | 219 | if len(sid_no) != len(sid_mask) or not sid_compare(sid_no, sid_mask): |
| 5cb7c1 | 220 | sid_warn.append("Trying second SID algorithm.") |
| d5c694 | 221 | sid_no = segment_by_7segments(image, image_original, sid_mask, classifier) |
| e0996e | 222 | |
| 6fde5f | 223 | if (len(sid_no)) != len(sid_mask): |
| d5c694 | 224 | sid_no = segment_by_sid_len(image, image_original, sid_mask, classifier) |
| SP | 225 | sid_warn.append("Trying third SID algorithm.") |
| 226 | ||
| 227 | if not sid_compare(sid_no, sid_mask): | |
| 6fde5f | 228 | sid_err = ["Wrong SID!"] |
| d5c694 | 229 | |
| 6fde5f | 230 | return sid_no, sid_err, sid_warn |