| commit | author | age | ||
| 83c3f6 | 1 | import os |
| SP | 2 | import django |
| 3 | ||
| 4 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "aoi.settings") | |
| 5 | django.setup() | |
| 6 | from exam import models as exmodel | |
| 7 | from scan import models as scmodel | |
| 8 | from aoi_ocr.Ocr import Paper as ocr_paper | |
| 9 | from sklearn.externals import joblib | |
| 10 | from django.core.files import File | |
| 11 | import json | |
| 12 | import pkg_resources | |
| 13 | from glob import glob | |
| 14 | import pathlib | |
| 15 | import time | |
| 16 | import collections | |
| 17 | import shutil | |
| 18 | ||
| 19 | path = "/filename.joblib" # always use slash | |
| 20 | filepath = pkg_resources.resource_filename("aoi_ocr", path) | |
| 21 | ||
| 22 | ||
| 23 | ||
| 24 | settings = {"sid_mask": "11x00xxx", "answer_threshold": 0.25} | |
| 25 | classifier = joblib.load(filepath) | |
| 26 | processed_dir="processedscans" | |
| 27 | ||
| 28 | while True: | |
| 29 | c=collections.Counter(p.suffix for p in pathlib.Path("inputscans").glob('*.tif')) | |
| 30 | if c.get(".tif"): | |
| 31 | filelist = glob("inputscans/*.tif") | |
| 32 | wrong_sid = 0 | |
| 33 | total = 0 | |
| 34 | for f in sorted(filelist): | |
| 35 | print("processing: {}".format(f)) | |
| 36 | p = ocr_paper( | |
| 37 | filename=f, sid_classifier=classifier, settings=settings | |
| 38 | ).get_paper_ocr_data() | |
| 39 | # print(f,p) | |
| 40 | if p is None: | |
| 41 | print("We got an empty page... Continuing."); | |
| 42 | continue | |
| 43 | if p["page_no"] == 1: | |
| 44 | total += 1 | |
| 45 | if len(p["errors"]) != 0: | |
| 46 | wrong_sid += 1 | |
| 47 | if total % 10 == 0: | |
| 48 | print("Total:{}, wrong SID: {}".format(total, wrong_sid)) | |
| 49 | print(p["exam_id"], p["paper_id"], p["page_no"]) | |
| 50 | ex = exmodel.Exam.objects.get(pk=int(p["exam_id"])) | |
| 51 | #pa = ex.generated.get(serial_no=int(p["paper_id"])) | |
| 52 | pa=exmodel.GeneratedPaper.objects.get(serial_no=int(p["paper_id"]), exam=ex) | |
| 53 | exists = scmodel.Scan.objects.filter(exam=ex, paper=pa, page_no=p["page_no"]).all() | |
| 54 | if len(exists) > 0: | |
| 55 | print("paper already exists in the scan list") | |
| 56 | continue | |
| 57 | sc = scmodel.Scan() | |
| 58 | ||
| 59 | sc.answer_matrix = p["ans_matrix"] | |
| 60 | sc.student_id = p["sid"] | |
| 61 | sc.ocr_debug = json.dumps(p) | |
| 62 | sc.exam = ex | |
| 63 | sc.paper = pa | |
| 64 | sc.page_no = p["page_no"] | |
| 65 | sc.scan_image.save( | |
| 66 | p["output_filename"].split("/")[-1], File(open(p["output_filename"], "rb")) | |
| 67 | ) | |
| 68 | ||
| 69 | sc.save() | |
| 70 | plib_src=pathlib.Path(f) | |
| 71 | plib_dest=pathlib.Path(processed_dir).joinpath(plib_src.stem+"__"+str(int(time.time()))+plib_src.suffix) | |
| 72 | if not plib_dest.exists(): | |
| 73 | # plib_src.replace(plib_dest) | |
| 74 | #plib probably doesn't work since in docker different directories are different filesystems? Verify. | |
| 75 | shutil.move(str(plib_src),str(plib_dest)) | |
| 76 | print("Total:{}, wrong SID: {}".format(total, wrong_sid)) | |
| 77 | else: | |
| 78 | time.sleep(10) | |