| commit | author | age | ||
| 9efc18 | 1 | import cv2 |
| SP | 2 | import numpy as np |
| 762a5e | 3 | from skimage import morphology, img_as_ubyte |
| 02e0f7 | 4 | |
| 0d97e9 | 5 | import pkg_resources |
| SP | 6 | |
| 5460bf | 7 | templatefile = "/template-8.png" # always use slash |
| 0d97e9 | 8 | template8 = pkg_resources.resource_filename(__name__, templatefile) |
| SP | 9 | |
| 9efc18 | 10 | |
| SP | 11 | def kernel(x, y): |
| 6fde5f | 12 | """ |
| SP | 13 | Function greates square kernel of size x and y |
| 14 | """ | |
| 9efc18 | 15 | return np.ones((x, y), np.uint8) |
| SP | 16 | |
| 6fde5f | 17 | |
| 5460bf | 18 | def find_biggest_blob(image, original_image, sid_mask): |
| 6fde5f | 19 | if sid_mask[0] == "1": |
| 9c222b | 20 | move_left = 35 |
| 6fde5f | 21 | elif sid_mask[0] == "x": |
| 9c222b | 22 | move_left = 40 |
| 6fde5f | 23 | else: |
| SP | 24 | move_left = 0 |
| 5460bf | 25 | # Remove noise |
| 6fde5f | 26 | image2 = cv2.morphologyEx( |
| 9c222b | 27 | original_image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3 |
| 6fde5f | 28 | ) |
| SP | 29 | # find biggest block of pixels |
| 30 | image1 = cv2.morphologyEx(image2, cv2.MORPH_DILATE, kernel(5, 25), iterations=4) | |
| 31 | image1 = img_as_ubyte(image1 > 50) | |
| 0d97e9 | 32 | cv2.imwrite("/tmp/sidblock1.png", image1) |
| 6fde5f | 33 | im2, ctrs, hier = cv2.findContours( |
| SP | 34 | image1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE |
| 35 | ) | |
| 36 | sorted_ctrs = sorted( | |
| 37 | ctrs, key=lambda ctr: cv2.contourArea(ctr) | |
| 38 | ) # get bigges contour | |
| 39 | x, y, w, h = cv2.boundingRect(sorted_ctrs[-1]) | |
| 40 | image = image[y : y + h, x + 25 - move_left : x + w - 40] # +25,-25 | |
| 41 | return image | |
| 42 | ||
| 5460bf | 43 | |
| d5c694 | 44 | def sid_compare(sid_no, sid_mask): |
| 6fde5f | 45 | """ |
| SP | 46 | Function compares student id number with student id mask if the recognised number is valid according to the mask |
| 47 | :param sid_no: | |
| 48 | :param sid_mask: | |
| 49 | :return: True if they match, else False | |
| 50 | """ | |
| 51 | for s, es in zip(sid_mask, sid_no): | |
| 52 | if s != "x" and s != es: | |
| d5c694 | 53 | return False |
| SP | 54 | return True |
| 55 | ||
| 56 | ||
| 5460bf | 57 | def segment_by_contours(image, original_image, classifier, sid_mask): |
| 6fde5f | 58 | """ |
| SP | 59 | First algorithm. it segments numerals with contours. It works with numbers where individual numerals does not touch. |
| 60 | :param image: | |
| 61 | :param original_image: | |
| 62 | :param classifier: | |
| 63 | :return: student id as a string | |
| 64 | """ | |
| 9efc18 | 65 | |
| 762a5e | 66 | sid_no = "" |
| 5460bf | 67 | image = find_biggest_blob(image, original_image, sid_mask) |
| SP | 68 | cv2.imwrite("/tmp/sid_contour1.png", image) |
| 6fde5f | 69 | im2, ctrs, hier = cv2.findContours( |
| SP | 70 | image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE |
| 71 | ) | |
| 72 | sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0]) | |
| 73 | ||
| 762a5e | 74 | for i, ctr in enumerate(sorted_ctrs): |
| SP | 75 | # Get bounding box |
| 76 | x, y, w, h = cv2.boundingRect(ctr) | |
| 77 | # Getting ROI | |
| 78 | if w < h / 2: | |
| 79 | sid_no = sid_no + "1" | |
| 80 | continue | |
| 81 | roi = image[y : y + h, x : x + w] | |
| 82 | roi = img_as_ubyte(roi < 128) | |
| 83 | roi = cv2.resize(roi, (32, 32)) | |
| 84 | ||
| 85 | # cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2) | |
| 0d97e9 | 86 | cv2.imwrite("/tmp/sid_no_{}.png".format(i), roi) |
| 762a5e | 87 | sid_no = sid_no + str(classifier.predict(roi.reshape(1, -1) / 255.0)[0]) |
| SP | 88 | return sid_no |
| 89 | ||
| 90 | ||
| d5c694 | 91 | def segment_by_sid_len(image, original_image, sid_mask, classifier): |
| 6fde5f | 92 | """ |
| SP | 93 | Third algorithm. It trys to get biggest "blob" in the image and then it cuts it into individual numbers by force. |
| 94 | It has some problems with finding individual numbers, so some tweaking must be done! | |
| 95 | ||
| 96 | :param image: | |
| 97 | :param original_image: | |
| 98 | :param sid_mask: | |
| 99 | :param classifier: | |
| 100 | :return: student id as a string | |
| 101 | """ | |
| 5cb7c1 | 102 | sid_no = "" |
| SP | 103 | sid_len = len(sid_mask) |
| 5460bf | 104 | image = find_biggest_blob(image, original_image, sid_mask) |
| 0d97e9 | 105 | cv2.imwrite("/tmp/sidblock2.png", image) |
| ac766e | 106 | imgHeight, imgWidth = image.shape[0:2] |
| 5cb7c1 | 107 | numWidth = int(imgWidth / (sid_len)) |
| SP | 108 | for i in range(0, sid_len): |
| 109 | num = image[:, i * numWidth : (i + 1) * numWidth] | |
| ac766e | 110 | num = img_as_ubyte(num < 128) |
| SP | 111 | num = cv2.resize(num, (32, 32)) |
| 0d97e9 | 112 | cv2.imwrite("/tmp/sid_no_{}.png".format(i), num) |
| ac766e | 113 | sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0]) |
| SP | 114 | return sid_no |
| 115 | ||
| 6fde5f | 116 | |
| SP | 117 | def segment_by_7segments(image, original_image, sid_mask, classifier): |
| 118 | """ | |
| 119 | Second attempt. It dilates the image to get all 7 segments wisible as 8888888 then it does pattern matching of 8 with | |
| 120 | pattern image. It works if the scaned gray level is high enough. | |
| 121 | ||
| 122 | :param image: | |
| 123 | :param original_image: | |
| 124 | :param sid_mask: | |
| 125 | :param classifier: | |
| 126 | :return: student id number as a string | |
| 127 | """ | |
| 128 | block_image = cv2.morphologyEx( | |
| 129 | original_image, cv2.MORPH_CLOSE, kernel(2, 2), iterations=10 | |
| 130 | ) | |
| 131 | block_image = img_as_ubyte(block_image < 50) | |
| 0d97e9 | 132 | cv2.imwrite("/tmp/sid_3rd1.png", block_image) |
| SP | 133 | template = cv2.imread(template8, 0) |
| d5c694 | 134 | w, h = template.shape[::-1] |
| SP | 135 | res = cv2.matchTemplate(block_image, template, cv2.TM_CCOEFF_NORMED) |
| 136 | loc = np.where(res >= 0.75) | |
| 137 | cimg = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) | |
| 6fde5f | 138 | loc_filtered_x = [] |
| SP | 139 | loc_filtered_y = [] |
| d5c694 | 140 | for pt in zip(*loc[::-1]): |
| 6fde5f | 141 | pt = (pt[0] - 10, pt[1] - 10) |
| d5c694 | 142 | loc_filtered_y.append(pt[1]) |
| SP | 143 | loc_filtered_x.append(pt[0]) |
| 6fde5f | 144 | # points.append(pt) |
| SP | 145 | # filter points |
| 146 | if len(loc_filtered_x) == 0: | |
| d5c694 | 147 | return "" |
| 6fde5f | 148 | loc_filtered_x, loc_filtered_y = zip(*sorted(zip(loc_filtered_x, loc_filtered_y))) |
| SP | 149 | a = np.diff(loc_filtered_x) > int(w / 2) |
| d5c694 | 150 | a = np.append(a, True) |
| SP | 151 | loc_filtered_x = np.array(loc_filtered_x) |
| 152 | loc_filtered_y = np.array(loc_filtered_y) | |
| 153 | points = [loc_filtered_y[a], loc_filtered_x[a]] | |
| 154 | for pt in zip(*points[::-1]): | |
| 155 | cv2.rectangle(cimg, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2) | |
| 0d97e9 | 156 | cv2.imwrite("/tmp/sid_3rd2.png", cimg) |
| d5c694 | 157 | |
| 6fde5f | 158 | sid_no = "" |
| SP | 159 | for i, pt in enumerate(zip(*points[::-1])): |
| 160 | num = image[pt[1] : pt[1] + h, pt[0] : pt[0] + w] | |
| 161 | # cv2.imwrite("sid_3no_{}.png".format(i), num) | |
| d5c694 | 162 | num = img_as_ubyte(num < 128) |
| SP | 163 | try: |
| 164 | num = cv2.resize(num, (32, 32)) | |
| 165 | except: | |
| 166 | return "" | |
| 0d97e9 | 167 | cv2.imwrite("/tmp/sid_3no_{}.png".format(i), num) |
| d5c694 | 168 | sid_no = sid_no + str(classifier.predict(num.reshape(1, -1) / 255.0)[0]) |
| SP | 169 | |
| 170 | return sid_no | |
| ac766e | 171 | |
| 6fde5f | 172 | |
| 762a5e | 173 | def getSID(image, classifier, sid_mask): |
| 6fde5f | 174 | """ |
| SP | 175 | Tries different approaches on image to get student id number. Firstly clears image of noise and then skeletonizes |
| 176 | numbers and thickens it until it gets normalized image. It sends it to the segmentation and recognition functions. | |
| 177 | ||
| 178 | Tweak both MORPH_OPEN lines.... | |
| 179 | ||
| 180 | :param image: | |
| 181 | :param classifier: | |
| 182 | :param sid_mask: | |
| 183 | :return: (student_id, error, warning) student id as a string, list of errors and list of warnings during the recognition | |
| 184 | ||
| 185 | """ | |
| 5cb7c1 | 186 | sid_warn = [] |
| 6fde5f | 187 | sid_err = [] |
| 762a5e | 188 | image = 255 - image |
| 6fde5f | 189 | image_original = image.copy() |
| 9c222b | 190 | image = img_as_ubyte(image > 70) |
| 0d97e9 | 191 | cv2.imwrite("/tmp/enSID0.png", image) |
| 6fde5f | 192 | |
| 9efc18 | 193 | # Remove noise |
| 5460bf | 194 | # image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(2, 2), iterations=3) |
| 6fde5f | 195 | |
| 9efc18 | 196 | # Closing. Connect non connected parts |
| 9c222b | 197 | image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(5, 1), iterations=4) |
| 02e0f7 | 198 | |
| 6fde5f | 199 | # Again noise removal after closing |
| 5cb7c1 | 200 | # image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(8, 8), iterations=1) |
| SP | 201 | # don't do too much noise removal. |
| ac766e | 202 | image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel(3, 3), iterations=1) |
| SP | 203 | |
| 9efc18 | 204 | # Skeletonization |
| 762a5e | 205 | image = img_as_ubyte(morphology.thin(image > 128)) |
| 0d97e9 | 206 | cv2.imwrite("/tmp/enSID1.png", image) |
| 6fde5f | 207 | |
| 9efc18 | 208 | # Stub removal (might not be necessary if thinning instead of skeletonize is used above |
| SP | 209 | # Making lines stronger |
| 210 | image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(5, 5), iterations=1) | |
| 211 | image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel(10, 10)) | |
| 6fde5f | 212 | |
| 9efc18 | 213 | # Thining again |
| 762a5e | 214 | image = img_as_ubyte(morphology.skeletonize(image > 0.5)) |
| 9efc18 | 215 | image = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel(10, 10)) |
| 0d97e9 | 216 | cv2.imwrite("/tmp/enhancedSID.png", image) |
| 02e0f7 | 217 | |
| 6fde5f | 218 | sid_no = segment_by_contours(image, image_original, classifier, sid_mask) |
| e0996e | 219 | |
| 6fde5f | 220 | if len(sid_no) != len(sid_mask) or not sid_compare(sid_no, sid_mask): |
| 5cb7c1 | 221 | sid_warn.append("Trying second SID algorithm.") |
| d5c694 | 222 | sid_no = segment_by_7segments(image, image_original, sid_mask, classifier) |
| e0996e | 223 | |
| 6fde5f | 224 | if (len(sid_no)) != len(sid_mask): |
| d5c694 | 225 | sid_no = segment_by_sid_len(image, image_original, sid_mask, classifier) |
| SP | 226 | sid_warn.append("Trying third SID algorithm.") |
| 227 | ||
| 228 | if not sid_compare(sid_no, sid_mask): | |
| 6fde5f | 229 | sid_err = ["Wrong SID!"] |
| d5c694 | 230 | |
| 6fde5f | 231 | return sid_no, sid_err, sid_warn |