| commit | author | age | ||
| e555c0 | 1 | from Ocr import Paper |
| 02e0f7 | 2 | from sklearn.externals import joblib |
| e555c0 | 3 | |
| e2fa6a | 4 | from glob import glob |
| 762a5e | 5 | |
| e0996e | 6 | settings = {"sid_mask": "64xx0xxx", "answer_threshold": 0.25} |
| 0436f6 | 7 | classifier = joblib.load("filename.joblib") |
| e555c0 | 8 | |
| 6fde5f | 9 | #p = Paper(filename="testpage300dpi_scan1.png") |
| e0996e | 10 | #p=Paper(filename='sizif111.tif', sid_classifier=classifier, settings={"sid_mask": "11xx0xxx", "answer_threshold": 0.25}) |
| e2fa6a | 11 | #p=Paper(filename='processed_scans/20141016095134535_0006.tif', sid_classifier=classifier, settings=settings) |
| d5c694 | 12 | #p = Paper(filename="processed_scans/20151111080408825_0001.tif",sid_classifier=classifier,settings=settings,) |
| SP | 13 | #p=Paper(filename='processed_scans/20151028145444607_0028.tif', sid_classifier=classifier, settings=settings) |
| 14 | pa = [ | |
| 15 | "processed_scans/20141016095134535_0006.tif", | |
| 16 | "processed_scans/20141016095134535_0028.tif", | |
| 17 | "processed_scans/20141016095134535_0028.tif", | |
| 18 | "processed_scans/20141016095134535_0037.tif", | |
| 19 | "processed_scans/20141021095744144_0005.tif", | |
| 20 | "processed_scans/20141021095744144_0009.tif", | |
| 21 | "processed_scans/20141028095553745_0018.tif", | |
| cf921b | 22 | "processed_scans/20151013180545275_0011.tif", |
| e0996e | 23 | "processed_scans/20160408140801098_0004.tif", |
| SP | 24 | "processed_scans/20160510075445995_0026.tif" |
| d5c694 | 25 | ] |
| e0996e | 26 | p=Paper(filename=pa[9], sid_classifier=classifier, settings=settings) |
| 02e0f7 | 27 | |
| 0436f6 | 28 | # print(p.QRData) |
| SP | 29 | # print(p.errors) |
| e555c0 | 30 | |
| 0436f6 | 31 | # print(p.getSkewAngle()) |
| SP | 32 | # print(p.locateUpMarkers()) |
| 33 | # print(p.locateRightMarkers()) | |
| 34 | # print(p.answerMatrix) | |
| 35 | # p.get_enhanced_sid() | |
| 36 | ||
| 37 | ||
| 38 | print(p.get_paper_ocr_data()) | |
| e0996e | 39 | |
| SP | 40 | |
| d5c694 | 41 | filelist = glob("processed_scans/*.tif") |
| e0996e | 42 | wrong_sid=0; |
| SP | 43 | total=0 |
| d5c694 | 44 | for f in sorted(filelist): |
| SP | 45 | print("processing: {}".format(f)) |
| e0996e | 46 | p=Paper(filename=f, sid_classifier=classifier, settings=settings).get_paper_ocr_data() |
| SP | 47 | print(f,p) |
| 48 | if(p['page_no']==0): | |
| 49 | total+=1 | |
| 50 | if(len(p['errors'])!=0): | |
| 51 | wrong_sid+=1 | |
| 52 | if total%10 == 0: | |
| 53 | print("Total:{}, wrong SID: {}".format(total,wrong_sid)) | |
| 54 | ||
| 55 | print("Total:{}, wrong SID: {}".format(total,wrong_sid)) | |