Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor unicode emoji parsing #20

Closed
wants to merge 10 commits into from
227 changes: 111 additions & 116 deletions lib/src/main/java/net/fellbaum/jemoji/EmojiManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,42 @@
import java.util.stream.Collector;
import java.util.stream.Collectors;

import static net.fellbaum.jemoji.EmojiUtils.addColonToAlias;
import static net.fellbaum.jemoji.EmojiUtils.findEmojiByEitherAlias;
import static net.fellbaum.jemoji.EmojiUtils.isStringNullOrEmpty;
import static net.fellbaum.jemoji.EmojiUtils.removeColonFromAlias;
import static net.fellbaum.jemoji.EmojiUtils.*;

@SuppressWarnings("unused")
public final class EmojiManager {

private static class FunctionResult<T> {
private final T result;
private final boolean keepLooking;

private FunctionResult(T result, boolean keepLooking) {
this.result = result;
this.keepLooking = keepLooking;
}

private static <T> FunctionResult<T> keepLooking() {
return new FunctionResult<>(null, true);
}

private static <T> FunctionResult<T> returnValue(T result) {
return new FunctionResult<>(result, false);
}
}

private interface EmojiProcessor<T> {
T getDefaultValue();

default void onCodepoint(int codepoint) {}

FunctionResult<T> onEmoji(int textIndex, Emoji emoji);
}

private interface VoidEmojiProcessor extends EmojiProcessor<Void> {
@Override
default Void getDefaultValue() { return null; }
}

private static final String PATH = "emoji_sources/emojis.json";

private static final Map<String, Emoji> EMOJI_UNICODE_TO_EMOJI;
Expand Down Expand Up @@ -235,33 +263,15 @@ public static Pattern getEmojiPattern() {
public static boolean containsEmoji(final String text) {
if (isStringNullOrEmpty(text)) return false;

final List<Emoji> emojis = new ArrayList<>();
return forEachEmoji(text, new EmojiProcessor<Boolean>() {
@Override
public Boolean getDefaultValue() { return Boolean.FALSE; }

final int[] textCodePointsArray = text.codePoints().toArray();
final long textCodePointsLength = textCodePointsArray.length;

for (int textIndex = 0; textIndex < textCodePointsLength; textIndex++) {
final List<Emoji> emojisByCodePoint = EMOJI_FIRST_CODEPOINT_TO_EMOJIS_ORDER_CODEPOINT_LENGTH_DESCENDING.get(textCodePointsArray[textIndex]);
if (emojisByCodePoint == null) continue;
for (final Emoji emoji : emojisByCodePoint) {
final int[] emojiCodePointsArray = emoji.getEmoji().codePoints().toArray();
final int emojiCodePointsLength = emojiCodePointsArray.length;
// Emoji code points are in bounds of the text code points
if (!((textIndex + emojiCodePointsLength) <= textCodePointsLength)) {
continue;
}

for (int i = 0; i < emojiCodePointsLength; i++) {
if (textCodePointsArray[textIndex + i] != emojiCodePointsArray[i]) {
break;
}
if (i == emojiCodePointsLength - 1) {
return true;
}
}
@Override
public FunctionResult<Boolean> onEmoji(int textIndex, Emoji emoji) {
return FunctionResult.returnValue(Boolean.TRUE);
}
}
return false;
});
}

/**
Expand All @@ -274,37 +284,10 @@ public static List<Emoji> extractEmojisInOrder(final String text) {
if (isStringNullOrEmpty(text)) return Collections.emptyList();

final List<Emoji> emojis = new ArrayList<>();

final int[] textCodePointsArray = text.codePoints().toArray();
final long textCodePointsLength = textCodePointsArray.length;

// JDK 21 Characters.isEmoji

nextTextIteration:
for (int textIndex = 0; textIndex < textCodePointsLength; textIndex++) {
final int currentCodepoint = textCodePointsArray[textIndex];
final List<Emoji> emojisByCodePoint = EMOJI_FIRST_CODEPOINT_TO_EMOJIS_ORDER_CODEPOINT_LENGTH_DESCENDING.get(currentCodepoint);
if (emojisByCodePoint == null) continue;
for (final Emoji emoji : emojisByCodePoint) {
final int[] emojiCodePointsArray = emoji.getEmoji().codePoints().toArray();
final int emojiCodePointsLength = emojiCodePointsArray.length;
// Emoji code points are in bounds of the text code points
if (!((textIndex + emojiCodePointsLength) <= textCodePointsLength)) {
continue;
}

for (int emojiCodePointIndex = 0; emojiCodePointIndex < emojiCodePointsLength; emojiCodePointIndex++) {
if (textCodePointsArray[textIndex + emojiCodePointIndex] != emojiCodePointsArray[emojiCodePointIndex]) {
break;
}
if (emojiCodePointIndex == (emojiCodePointsLength - 1)) {
emojis.add(emoji);
textIndex += emojiCodePointsLength - 1;
continue nextTextIteration;
}
}
}
}
forEachEmoji(text, (VoidEmojiProcessor) (textIndex, emoji) -> {
emojis.add(emoji);
return FunctionResult.keepLooking();
});
return Collections.unmodifiableList(emojis);
}

Expand Down Expand Up @@ -372,45 +355,31 @@ public static String removeAllEmojisExcept(final String text, final Emoji... emo
*/
public static String removeAllEmojisExcept(final String text, final Collection<Emoji> emojisToKeep) {
if (isStringNullOrEmpty(text)) return "";
final int[] textCodePointsArray = text.codePoints().toArray();
final long textCodePointsLength = textCodePointsArray.length;

final StringBuilder sb = new StringBuilder();

nextTextIteration:
for (int textIndex = 0; textIndex < textCodePointsLength; textIndex++) {
final int currentCodepoint = textCodePointsArray[textIndex];
sb.appendCodePoint(currentCodepoint);
forEachEmoji(text, new VoidEmojiProcessor() {
private int currentCodepoint;

final List<Emoji> emojisByCodePoint = EMOJI_FIRST_CODEPOINT_TO_EMOJIS_ORDER_CODEPOINT_LENGTH_DESCENDING.get(currentCodepoint);
if (emojisByCodePoint == null) continue;
for (final Emoji emoji : emojisByCodePoint) {
final int[] emojiCodePointsArray = emoji.getEmoji().codePoints().toArray();
final int emojiCodePointsLength = emojiCodePointsArray.length;
// Check if Emoji code points are in bounds of the text code points
if (!((textIndex + emojiCodePointsLength) <= textCodePointsLength)) {
continue;
}
@Override
public void onCodepoint(int codepoint) {
currentCodepoint = codepoint;

for (int emojiCodePointIndex = 0; emojiCodePointIndex < emojiCodePointsLength; emojiCodePointIndex++) {
//break out because the emoji is not the same
if (textCodePointsArray[textIndex + emojiCodePointIndex] != emojiCodePointsArray[emojiCodePointIndex]) {
break;
}
sb.appendCodePoint(currentCodepoint);
}

if (emojiCodePointIndex == (emojiCodePointsLength - 1)) {
textIndex += emojiCodePointsLength - 1;
sb.delete(sb.length() - Character.charCount(currentCodepoint), sb.length());
@Override
public FunctionResult<Void> onEmoji(int textIndex, Emoji emoji) {
sb.delete(sb.length() - Character.charCount(currentCodepoint), sb.length());

if (emojisToKeep.contains(emoji)) {
// if the emoji should be kept, add it again
sb.append(emoji.getEmoji());
}
continue nextTextIteration;
}
if (emojisToKeep.contains(emoji)) {
// if the emoji should be kept, add it again
sb.append(emoji.getEmoji());
}

return FunctionResult.keepLooking();
}
}
});

return sb.toString();
}
Expand Down Expand Up @@ -472,22 +441,63 @@ public static String replaceEmojis(final String text, final String replacementSt
public static String replaceEmojis(final String text, Function<Emoji, String> replacementFunction, final Collection<Emoji> emojisToReplace) {
if (isStringNullOrEmpty(text)) return "";

final StringBuilder sb = new StringBuilder(text.length());

forEachEmoji(text, new VoidEmojiProcessor() {
private int currentCodepoint;

@Override
public void onCodepoint(int codepoint) {
currentCodepoint = codepoint;

sb.appendCodePoint(codepoint);
}

@Override
public FunctionResult<Void> onEmoji(int textIndex, Emoji emoji) {
sb.delete(sb.length() - Character.charCount(currentCodepoint), sb.length());

if (emojisToReplace.contains(emoji)) {
sb.append(replacementFunction.apply(emoji));
} else {
sb.append(emoji.getEmoji());
}

return FunctionResult.keepLooking();
}
});

return sb.toString();
}

/**
* Replaces all emojis in the text with the given replacement function.
*
* @param text The text to replace emojis from.
* @param replacementFunction The replacement function.
* @param emojisToReplace The emojis to replace.
* @return The text with all emojis replaced.
*/
public static String replaceEmojis(final String text, Function<Emoji, String> replacementFunction, final Emoji... emojisToReplace) {
return replaceEmojis(text, replacementFunction, Arrays.asList(emojisToReplace));
}

private static <T> T forEachEmoji(final String text,
final EmojiProcessor<T> processor) {
final int[] textCodePointsArray = text.codePoints().toArray();
final long textCodePointsLength = textCodePointsArray.length;

final StringBuilder sb = new StringBuilder();

nextTextIteration:
for (int textIndex = 0; textIndex < textCodePointsLength; textIndex++) {
final int currentCodepoint = textCodePointsArray[textIndex];
sb.appendCodePoint(currentCodepoint);
processor.onCodepoint(currentCodepoint);

final List<Emoji> emojisByCodePoint = EMOJI_FIRST_CODEPOINT_TO_EMOJIS_ORDER_CODEPOINT_LENGTH_DESCENDING.get(currentCodepoint);
if (emojisByCodePoint == null) continue;
for (final Emoji emoji : emojisByCodePoint) {
final int[] emojiCodePointsArray = emoji.getEmoji().codePoints().toArray();
final int emojiCodePointsLength = emojiCodePointsArray.length;
// Check if Emoji code points are in bounds of the text code points
// Emoji code points are in bounds of the text code points
if (!((textIndex + emojiCodePointsLength) <= textCodePointsLength)) {
continue;
}
Expand All @@ -498,35 +508,20 @@ public static String replaceEmojis(final String text, Function<Emoji, String> re
break;
}

if (emojiCodePointIndex == (emojiCodePointsLength - 1)) {
textIndex += emojiCodePointsLength - 1;
sb.delete(sb.length() - Character.charCount(currentCodepoint), sb.length());

if (emojisToReplace.contains(emoji)) {
sb.append(replacementFunction.apply(emoji));
if (emojiCodePointIndex == emojiCodePointsLength - 1) {
final FunctionResult<T> functionResult = processor.onEmoji(textIndex, emoji);
if (functionResult.keepLooking) {
textIndex += emojiCodePointsLength - 1;
continue nextTextIteration;
} else {
sb.append(emoji.getEmoji());
return functionResult.result;
}

continue nextTextIteration;
}
}
}
}

return sb.toString();
}

/**
* Replaces all emojis in the text with the given replacement function.
*
* @param text The text to replace emojis from.
* @param replacementFunction The replacement function.
* @param emojisToReplace The emojis to replace.
* @return The text with all emojis replaced.
*/
public static String replaceEmojis(final String text, Function<Emoji, String> replacementFunction, final Emoji... emojisToReplace) {
return replaceEmojis(text, replacementFunction, Arrays.asList(emojisToReplace));
return processor.getDefaultValue();
}

/*public static List<Emoji> testEmojiPattern(final String text) {
Expand Down