Skip to content

Commit

Permalink
-Move StoryBookCreateFromEPubController method to a new Util class (C…
Browse files Browse the repository at this point in the history
…loses elimu-ai#1825)
  • Loading branch information
Aryant-Tripathi committed Oct 22, 2024
1 parent e65a7d4 commit 1a03cae
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 40 deletions.
12 changes: 12 additions & 0 deletions src/main/java/ai/elimu/util/AppConstants.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package ai.elimu.util;

public class AppConstants {

public static class READING_LEVEL_CONSTANTS {
public static final String CHAPTER_COUNT_KEY = "chapter_count";
public static final String PARAGRAPH_COUNT_KEY = "paragraph_count";
public static final String WORD_COUNT_KEY = "word_count";
public static final String LEVEL = "LEVEL";
}

}
41 changes: 41 additions & 0 deletions src/main/java/ai/elimu/util/ml/ReadingLevelUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package ai.elimu.util.ml;

import ai.elimu.model.v2.enums.ReadingLevel;
import org.pmml4s.model.Model;

import java.util.Arrays;
import java.util.Map;

import static ai.elimu.util.AppConstants.READING_LEVEL_CONSTANTS.*;

public class ReadingLevelUtil {

public static ReadingLevel predictReadingLevel(
int chapterCount,
int paragraphCount,
int wordCount,
String modelFilePath
) {

Model model = Model.fromFile(modelFilePath);
Map<String, Double> features = Map.of(
CHAPTER_COUNT_KEY, (double) chapterCount,
PARAGRAPH_COUNT_KEY, (double) paragraphCount,
WORD_COUNT_KEY, (double) wordCount
);

Object[] valuesMap = Arrays.stream(model.inputNames())
.map(features::get)
.toArray();

Object[] results = model.predict(valuesMap);

Object result = results[0];
Double resultAsDouble = (Double) result;
int resultAsInteger = resultAsDouble.intValue();

String readingLevelAsString = LEVEL + resultAsInteger;
return ReadingLevel.valueOf(readingLevelAsString);

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import ai.elimu.util.epub.EPubImageExtractionHelper;
import ai.elimu.util.epub.EPubMetadataExtractionHelper;
import ai.elimu.util.epub.EPubParagraphExtractionHelper;
import ai.elimu.util.ml.ReadingLevelUtil;
import ai.elimu.web.context.EnvironmentContextLoaderListener;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
Expand Down Expand Up @@ -52,11 +53,7 @@
import java.io.InputStream;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

Expand Down Expand Up @@ -542,44 +539,19 @@ private void storeImageContributionEvent(Image image, HttpSession session, HttpS
}

private ReadingLevel predictReadingLevel(int chapterCount, int paragraphCount, int wordCount) {
logger.info("predictReadingLevel");

// Load the machine learning model (https://github.com/elimu-ai/ml-storybook-reading-level)
String modelFilePath = getClass().getResource("step2_2_model.pmml").getFile();
logger.info("modelFilePath: " + modelFilePath);
org.pmml4s.model.Model model = org.pmml4s.model.Model.fromFile(modelFilePath);
logger.info("model: " + model);

// Prepare values (features) to pass to the model
Map<String, Double> values = Map.of(
"chapter_count", Double.valueOf(chapterCount),
"paragraph_count", Double.valueOf(paragraphCount),
"word_count", Double.valueOf(wordCount)

String modelFilePath = Objects.requireNonNull(getClass().getResource("step2_2_model.pmml")).getFile();

logger.info(
"Predicting reading level for chapter: {}, paragraph: {}, word: {}, modelPath: {} ",
chapterCount, paragraphCount, wordCount, modelFilePath
);
logger.info("values: " + values);

// Make prediction
logger.info("Arrays.toString(model.inputNames()): " + Arrays.toString(model.inputNames()));
Object[] valuesMap = Arrays.stream(model.inputNames())
.map(values::get)
.toArray();
logger.info("valuesMap: " + valuesMap);
Object[] results = model.predict(valuesMap);
logger.info("results: " + results);
logger.info("Arrays.toString(results): " + Arrays.toString(results));
Object result = results[0];
logger.info("result: " + result);
logger.info("result.getClass().getSimpleName(): " + result.getClass().getSimpleName());
Double resultAsDouble = (Double) result;
logger.info("resultAsDouble: " + resultAsDouble);
Integer resultAsInteger = resultAsDouble.intValue();
logger.info("resultAsInteger: " + resultAsInteger);

// Convert from number to ReadingLevel enum (e.g. "LEVEL2")
String readingLevelAsString = "LEVEL" + resultAsInteger;
logger.info("readingLevelAsString: " + readingLevelAsString);
ReadingLevel readingLevel = ReadingLevel.valueOf(readingLevelAsString);
logger.info("readingLevel: " + readingLevel);

ReadingLevel readingLevel = ReadingLevelUtil.predictReadingLevel(chapterCount, paragraphCount, wordCount, modelFilePath);
logger.info("Predicted reading level: {}", readingLevel);

return readingLevel;
}
}
59 changes: 59 additions & 0 deletions src/test/java/ai/elimu/util/ml/ReadingLevelUtilTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package ai.elimu.util.ml;

import ai.elimu.model.v2.enums.ReadingLevel;
import org.junit.jupiter.api.Test;

import java.io.IOException;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;

public class ReadingLevelUtilTest {

@Test
public void testPredictReadingLevel_Level1() {

String modelFilePath = "src/test/resources/ai/elimu/util/reading_level/model1.pmml";
int chapterCount = 5;
int paragraphCount = 20;
int wordCount = 100;

ReadingLevel result = ReadingLevelUtil.predictReadingLevel(chapterCount, paragraphCount, wordCount, modelFilePath);
assertEquals(ReadingLevel.LEVEL1, result, "Expected ReadingLevel to be LEVEL1, but got: " + result);

}

@Test
public void testPredictReadingLevel_Level2() {

String modelFilePath = "src/test/resources/ai/elimu/util/reading_level/model1.pmml";
int chapterCount = 12;
int paragraphCount = 22;
int wordCount = 250;

ReadingLevel result = ReadingLevelUtil.predictReadingLevel(chapterCount, paragraphCount, wordCount, modelFilePath);
assertEquals(ReadingLevel.LEVEL2, result, "Expected ReadingLevel to be LEVEL2, but got: " + result);

}

@Test
public void testPredictReadingLevel_Level3() {

String modelFilePath = "src/test/resources/ai/elimu/util/reading_level/model1.pmml";
int chapterCount = 12;
int paragraphCount = 25;
int wordCount = 350;

ReadingLevel result = ReadingLevelUtil.predictReadingLevel(chapterCount, paragraphCount, wordCount, modelFilePath);
assertEquals(ReadingLevel.LEVEL3, result, "Expected ReadingLevel to be LEVEL3, but got: " + result);

}

@Test
public void testPredictReadingLevel_InvalidModelFile() {

assertThrows(IOException.class, () -> {
ReadingLevelUtil.predictReadingLevel(1, 1, 1, "invalidPath");
}, "Expected IOException when loading an invalid model file path");
}
}
41 changes: 41 additions & 0 deletions src/test/resources/ai/elimu/util/reading_level/model1.pmml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<?xml version="1.0" encoding="UTF-8"?>
<PMML version="4.4" xmlns="http://www.dmg.org/PMML-4_4">
<Header>
<Application name="Decision Tree Regressor" version="1.0"/>
</Header>
<DataDictionary numberOfFields="4">
<DataField name="chapter_count" optype="continuous" dataType="integer"/>
<DataField name="paragraph_count" optype="continuous" dataType="integer"/>
<DataField name="word_count" optype="continuous" dataType="integer"/>
<DataField name="reading_level" optype="continuous" dataType="double"/>
</DataDictionary>
<TreeModel functionName="regression" algorithmName="DecisionTree" missingValueStrategy="none">
<MiningSchema>
<MiningField name="chapter_count"/>
<MiningField name="paragraph_count"/>
<MiningField name="word_count"/>
<MiningField name="reading_level" usageType="target"/>
</MiningSchema>
<Node score="1.0">
<True/>
<Node score="1.0">
<SimplePredicate field="chapter_count" operator="lessThan" value="10"/>
<Node score="1.0">
<SimplePredicate field="paragraph_count" operator="lessOrEqual" value="20"/>
</Node>
<Node score="2.0">
<SimplePredicate field="paragraph_count" operator="greaterThan" value="20"/>
</Node>
</Node>
<Node score="2.0">
<SimplePredicate field="chapter_count" operator="greaterOrEqual" value="10"/>
<Node score="2.0">
<SimplePredicate field="word_count" operator="lessThan" value="300"/>
</Node>
<Node score="3.0">
<SimplePredicate field="word_count" operator="greaterOrEqual" value="300"/>
</Node>
</Node>
</Node>
</TreeModel>
</PMML>

0 comments on commit 1a03cae

Please sign in to comment.