So I’ve trained a model with almost 89% accuracy and 36% loss on EMNIST balanced dataset and it seems that most labels are predicted correctly. So I’m trying to upload a handwritten image and split it into an array of X letters that’s going to be resized to 28×28 and predict each one seperately. What’s the best way to do that?
Part of my code is:
def predict(image): img = resize_image(image) img = img[:,:,0] img = img.reshape((1,28,28)) prediction = model.predict(img[:]) return class_names[np.argmax(prediction)]
def printPrediction(image): img = cv2.imread(image) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.bitwise_not(img) img = img.astype('float32') img /= 255 # Get the size of the image height = img.shape width = img.shape prediction = '' foundStartingPoint = False foundEndingPoint = False threshold = 0.8 for column in range(width): blackColorPixels = 0 for row in range(height): if check(img[row,column], threshold) and not foundStartingPoint: foundStartingPoint = True startingPoint = [0, column-2] if foundStartingPoint and not check(img[row,column], threshold): blackColorPixels += 1 if blackColorPixels == height: foundEndingPoint = True endingPoint = [row, column+2] if foundStartingPoint and foundEndingPoint: crop_img = img[startingPoint:endingPoint, startingPoint:endingPoint] prediction = prediction + predict(crop_img) foundStartingPoint = False foundEndingPoint = False print("nPrediction of the OCR system is: ") print(prediction) print("nPossible word from the dictionary is: ") printPossibleWord(prediction.lower()) def check(list, threshold): counter = 0 for x in list: if x >= threshold: counter += 1 if counter == 3: return True else: return False
Then I’m using something like a word dictionary like this
def printPossibleWord(prediction): #import dictionary (JSON file) as a list with open('words_dictionary.json', 'r') as f: words_dict = json.load(f) # find the closest match word with our input matches = get_close_matches(prediction, words_dict, n=3, cutoff=0.6) #find the match with most similar characters with the input max_value = 0 similar_character_counter = zerolistmaker(len(matches)) for i in range(len(matches)): if len(matches[i]) != len(prediction): continue for j in range(len(prediction)): if matches[i][j] == prediction[j]: similar_character_counter[i] += 1 max_value = max(similar_character_counter) max_value_list = [i for i, j in enumerate(similar_character_counter) if j == max_value] # Print the possible word from the dictionary for i in max_value_list: print(matches[i].upper()) def zerolistmaker(n): listofzeros =  * n return listofzeros
The main problem is that the whole word image is resized into 28×28 and each letters is "broken" as it get shrinked down. What’s the best way to deal with this ?
Source: Python Questions