import os
|
import django
|
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "aoi.settings")
|
django.setup()
|
from exam import models as exmodel
|
from scan import models as scmodel
|
from aoi_ocr.Ocr import Paper as ocr_paper
|
from sklearn.externals import joblib
|
from django.core.files import File
|
import json
|
import pkg_resources
|
from glob import glob
|
import pathlib
|
import time
|
import collections
|
import shutil
|
|
path = "/filename.joblib" # always use slash
|
filepath = pkg_resources.resource_filename("aoi_ocr", path)
|
|
|
|
settings = {"sid_mask": "11x00xxx", "answer_threshold": 0.25}
|
classifier = joblib.load(filepath)
|
processed_dir="processedscans"
|
|
while True:
|
c=collections.Counter(p.suffix for p in pathlib.Path("inputscans").glob('*.tif'))
|
if c.get(".tif"):
|
filelist = glob("inputscans/*.tif")
|
wrong_sid = 0
|
total = 0
|
for f in sorted(filelist):
|
print("processing: {}".format(f))
|
p = ocr_paper(
|
filename=f, sid_classifier=classifier, settings=settings
|
).get_paper_ocr_data()
|
# print(f,p)
|
if p is None:
|
print("We got an empty page... Continuing.");
|
continue
|
if p["page_no"] == 1:
|
total += 1
|
if len(p["errors"]) != 0:
|
wrong_sid += 1
|
if total % 10 == 0:
|
print("Total:{}, wrong SID: {}".format(total, wrong_sid))
|
print(p["exam_id"], p["paper_id"], p["page_no"])
|
ex = exmodel.Exam.objects.get(pk=int(p["exam_id"]))
|
#pa = ex.generated.get(serial_no=int(p["paper_id"]))
|
pa=exmodel.GeneratedPaper.objects.get(serial_no=int(p["paper_id"]), exam=ex)
|
exists = scmodel.Scan.objects.filter(exam=ex, paper=pa, page_no=p["page_no"]).all()
|
if len(exists) > 0:
|
print("paper already exists in the scan list")
|
continue
|
sc = scmodel.Scan()
|
|
sc.answer_matrix = p["ans_matrix"]
|
sc.student_id = p["sid"]
|
sc.ocr_debug = json.dumps(p)
|
sc.exam = ex
|
sc.paper = pa
|
sc.page_no = p["page_no"]
|
sc.scan_image.save(
|
p["output_filename"].split("/")[-1], File(open(p["output_filename"], "rb"))
|
)
|
|
sc.save()
|
plib_src=pathlib.Path(f)
|
plib_dest=pathlib.Path(processed_dir).joinpath(plib_src.stem+"__"+str(int(time.time()))+plib_src.suffix)
|
if not plib_dest.exists():
|
# plib_src.replace(plib_dest)
|
#plib probably doesn't work since in docker different directories are different filesystems? Verify.
|
shutil.move(str(plib_src),str(plib_dest))
|
print("Total:{}, wrong SID: {}".format(total, wrong_sid))
|
else:
|
time.sleep(10)
|