Stripped personal data from development repository
Samo Penic
2019-02-20 83c3f647c35477564b77cbc5b36d37d793d5442a
commit | author | age
83c3f6 1 import os
SP 2 import django
3
4 os.environ.setdefault("DJANGO_SETTINGS_MODULE", "aoi.settings")
5 django.setup()
6 from exam import models as exmodel
7 from scan import models as scmodel
8 from aoi_ocr.Ocr import Paper as ocr_paper
9 from sklearn.externals import joblib
10 from django.core.files import File
11 import json
12 import pkg_resources
13 from glob import glob
14 import pathlib
15 import time
16 import collections
17 import shutil
18
19 path = "/filename.joblib"  # always use slash
20 filepath = pkg_resources.resource_filename("aoi_ocr", path)
21
22
23
24 settings = {"sid_mask": "11x00xxx", "answer_threshold": 0.25}
25 classifier = joblib.load(filepath)
26 processed_dir="processedscans"
27
28 while True:
29     c=collections.Counter(p.suffix for p in pathlib.Path("inputscans").glob('*.tif'))
30     if c.get(".tif"):
31         filelist = glob("inputscans/*.tif")
32         wrong_sid = 0
33         total = 0
34         for f in sorted(filelist):
35             print("processing: {}".format(f))
36             p = ocr_paper(
37                 filename=f, sid_classifier=classifier, settings=settings
38             ).get_paper_ocr_data()
39             # print(f,p)
40             if p is None:
41                 print("We got an empty page... Continuing.");
42                 continue
43             if p["page_no"] == 1:
44                 total += 1
45             if len(p["errors"]) != 0:
46                 wrong_sid += 1
47             if total % 10 == 0:
48                 print("Total:{}, wrong SID: {}".format(total, wrong_sid))
49             print(p["exam_id"], p["paper_id"], p["page_no"])
50             ex = exmodel.Exam.objects.get(pk=int(p["exam_id"]))
51             #pa = ex.generated.get(serial_no=int(p["paper_id"]))
52             pa=exmodel.GeneratedPaper.objects.get(serial_no=int(p["paper_id"]), exam=ex)
53             exists = scmodel.Scan.objects.filter(exam=ex, paper=pa, page_no=p["page_no"]).all()
54             if len(exists) > 0:
55                 print("paper already exists in the scan list")
56                 continue
57             sc = scmodel.Scan()
58
59             sc.answer_matrix = p["ans_matrix"]
60             sc.student_id = p["sid"]
61             sc.ocr_debug = json.dumps(p)
62             sc.exam = ex
63             sc.paper = pa
64             sc.page_no = p["page_no"]
65             sc.scan_image.save(
66                 p["output_filename"].split("/")[-1], File(open(p["output_filename"], "rb"))
67             )
68
69             sc.save()
70             plib_src=pathlib.Path(f)
71             plib_dest=pathlib.Path(processed_dir).joinpath(plib_src.stem+"__"+str(int(time.time()))+plib_src.suffix)
72             if not plib_dest.exists():
73 #                plib_src.replace(plib_dest)
74                 #plib probably doesn't work since in docker different directories are different filesystems? Verify.
75                 shutil.move(str(plib_src),str(plib_dest))
76         print("Total:{}, wrong SID: {}".format(total, wrong_sid))
77     else:
78         time.sleep(10)