From 52ee9f8e5389789c6dd95f7282e308f94b3e7358 Mon Sep 17 00:00:00 2001 From: Andrea Cavalli Date: Thu, 16 May 2024 15:52:59 +0200 Subject: [PATCH] First commit --- .gitignore | 2 + Dockerfile | 15 ++ docker-compose.yml | 7 + pom.xml | 167 ++++++++++++++++++ .../languageblockbot/LanguageBlockBot.java | 152 ++++++++++++++++ .../it/cavallium/languageblockbot/Main.java | 39 ++++ 6 files changed, 382 insertions(+) create mode 100644 Dockerfile create mode 100644 docker-compose.yml create mode 100644 pom.xml create mode 100644 src/main/java/it/cavallium/languageblockbot/LanguageBlockBot.java create mode 100644 src/main/java/it/cavallium/languageblockbot/Main.java diff --git a/.gitignore b/.gitignore index 9154f4c..a6273db 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,5 @@ hs_err_pid* replay_pid* +*.env +.idea/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b4538e2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM maven:3-eclipse-temurin-22 as build +WORKDIR /build +COPY --link src src +COPY --link pom.xml pom.xml +RUN mvn package -DskipTests + +FROM eclipse-temurin:22-jdk-alpine +WORKDIR /build +COPY --from=build --link /build/target/language-block-bot-*.jar language-block-bot.jar +ENTRYPOINT ["java", "-jar", "language-block-bot.jar"] + + + + + diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..10d3073 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,7 @@ +services: + language-block-bot: + build: + context: . + dockerfile: Dockerfile + environment: + JAVA_TOOL_OPTIONS: "-Dlanguageblockbot.bot.token=$TOKEN" diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..2540a76 --- /dev/null +++ b/pom.xml @@ -0,0 +1,167 @@ + + + 4.0.0 + + it.cavallium + language-block-bot + 1.0.0-SNAPSHOT + + + UTF-8 + + 3.12.1 + 3.2.4 + 3.0.0-M7 + + + + + central + https://repo.maven.apache.org/maven2/ + + + + + + org.jetbrains + annotations + 24.1.0 + + + org.slf4j + slf4j-api + 2.0.12 + + + org.telegram + telegrambots-abilities + 7.2.1 + + + org.jetbrains + annotations + + + org.slf4j + slf4j-api + + + + + org.telegram + telegrambots-longpolling + 7.2.1 + + + org.jetbrains.kotlin + kotlin-stdlib-jdk8 + + + + + org.telegram + telegrambots-client + 7.2.1 + + + com.github.pemistahl + lingua + 1.2.2 + + + com.squareup.okio + okio + + + org.jetbrains.kotlin + kotlin-stdlib + + + org.slf4j + slf4j-api + + + org.jetbrains.kotlin + kotlin-stdlib-jdk8 + + + + + org.slf4j + slf4j-jdk14 + 2.0.12 + + + + + + + maven-compiler-plugin + ${maven-compiler-plugin.version} + + 21 + + + + maven-shade-plugin + ${maven-shade-plugin.version} + + + maven-surefire-plugin + ${maven-surefire-plugin.version} + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.0.0-M3 + + + enforce + + + + + + + enforce + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.5.3 + + + package + + shade + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + it.cavallium.languageblockbot.Main + + + + + + + + + + + diff --git a/src/main/java/it/cavallium/languageblockbot/LanguageBlockBot.java b/src/main/java/it/cavallium/languageblockbot/LanguageBlockBot.java new file mode 100644 index 0000000..a0a4c15 --- /dev/null +++ b/src/main/java/it/cavallium/languageblockbot/LanguageBlockBot.java @@ -0,0 +1,152 @@ +package it.cavallium.languageblockbot; + +import com.github.pemistahl.lingua.api.Language; +import com.github.pemistahl.lingua.api.LanguageDetector; +import com.github.pemistahl.lingua.api.LanguageDetectorBuilder; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.stream.Collectors; +import org.telegram.telegrambots.abilitybots.api.bot.AbilityBot; +import org.telegram.telegrambots.meta.api.methods.groupadministration.GetChatAdministrators; +import org.telegram.telegrambots.meta.api.methods.updatingmessages.DeleteMessage; +import org.telegram.telegrambots.meta.api.objects.Update; +import org.telegram.telegrambots.meta.api.objects.chatmember.ChatMember; +import org.telegram.telegrambots.meta.api.objects.message.Message; +import org.telegram.telegrambots.meta.exceptions.TelegramApiException; + +public class LanguageBlockBot extends AbilityBot { + + private final LanguageDetector detector; + private final ConcurrentMap> chatAdmins = new ConcurrentHashMap<>(); + private final Set whitelistedLangs; + private final Set blacklistedLangs; + + public LanguageBlockBot(String token, Set whitelistedLangs, Set blacklistedLangs) { + super(new org.telegram.telegrambots.client.okhttp.OkHttpTelegramClient(token), "LanguageBlockBot"); + + var allLangs = new HashSet(); + allLangs.addAll(whitelistedLangs); + allLangs.addAll(blacklistedLangs); + this.detector = LanguageDetectorBuilder.fromLanguages(allLangs.toArray(Language[]::new)) + .withPreloadedLanguageModels() + .withMinimumRelativeDistance(0.8) + .build(); + this.whitelistedLangs = whitelistedLangs; + this.blacklistedLangs = blacklistedLangs; + } + + @Override + public void consume(Update update) { + if (update.hasMessage()) { + var msg = update.getMessage(); + String text; + if (msg.hasText()) { + text = msg.getText(); + } else if (msg.getCaption() != null) { + text = msg.getCaption(); + } else { + text = null; + } + if (text != null) { + var detectedLangs = detector.computeLanguageConfidenceValues(text); + double acceptableLanguageValue = whitelistedLangs + .stream() + .map(whitelistedLang -> detectedLangs.getOrDefault(whitelistedLang, 0.0)) + .reduce(Math::max) + .orElse(0d); + double blacklistedLanguageValue = blacklistedLangs + .stream() + .map(blacklistedLang -> detectedLangs.getOrDefault(blacklistedLang, 0.0)) + .reduce(Math::max) + .orElse(0d); + + var shouldBeDeletedByLanguage = !detectedLangs.isEmpty() && + (acceptableLanguageValue <= 0.75 + || (acceptableLanguageValue < blacklistedLanguageValue)); + + System.out.printf("Received message: \"%s\". shouldBeDeletedByLanguage: %b. Languages: \"%s\".%n", + text, + shouldBeDeletedByLanguage, + detectedLangs.entrySet().stream().map(e -> e.getKey() + "=" + e.getValue()).collect(Collectors.joining(", ", "[", "]")) + ); + if (shouldBeDeletedByLanguage) { + isMessageDeletable(msg).thenCompose(deletable -> { + var deleteMessageMethod = DeleteMessage + .builder() + .chatId(msg.getChatId()) + .messageId(msg.getMessageId()) + .build(); + try { + var client = this.getTelegramClient(); + return client.executeAsync(deleteMessageMethod); + } catch (TelegramApiException e) { + return CompletableFuture.failedFuture(e); + } + }).whenComplete((ok, ex) -> { + if (ex != null) { + ex.printStackTrace(); + } + }); + } + } + } + } + + private CompletableFuture isMessageDeletable(Message msg) { + var chatId = msg.getChatId(); + CompletableFuture canDeleteBecauseItsNotAdmin; + if (msg.getSenderChat() != null) { + canDeleteBecauseItsNotAdmin = getChatAdmins(chatId) + .thenApply(admins -> !admins.contains(msg.getSenderChat().getId())); + } else { + canDeleteBecauseItsNotAdmin = CompletableFuture.completedFuture(true); + } + boolean canDeleteBecauseItsNormalMessage = !(msg.getIsAutomaticForward() != null && msg.getIsAutomaticForward()) + && (msg.getSenderChat() != null && msg.getSenderChat().isUserChat()) && !msg.hasViaBot(); + + return CompletableFuture + .completedFuture(canDeleteBecauseItsNormalMessage) + .thenCombine(canDeleteBecauseItsNotAdmin, (a, b) -> a && b); + } + + private CompletableFuture> getChatAdmins(Long chatId) { + var client = this.getTelegramClient(); + if (chatId >= 0) { + return CompletableFuture.completedFuture(Set.of()); + } else { + try { + var cachedAdmins = this.chatAdmins.get(chatId); + if (cachedAdmins != null) { + return CompletableFuture.completedFuture(cachedAdmins); + } else { + return client + .executeAsync(GetChatAdministrators.builder().chatId(chatId).build()) + .>handle((result, err) -> { + if (err != null) { + err.printStackTrace(); + return List.of(); + } else { + return result; + } + }) + .thenApply(admins -> { + var newAdmins = admins.stream().map(n -> n.getUser().getId()).collect(Collectors.toCollection(HashSet::new)); + chatAdmins.put(chatId, newAdmins); + return newAdmins; + }); + } + } catch (TelegramApiException e) { + throw new RuntimeException(e); + } + } + } + + @Override + public long creatorId() { + return 40951146; + } +} diff --git a/src/main/java/it/cavallium/languageblockbot/Main.java b/src/main/java/it/cavallium/languageblockbot/Main.java new file mode 100644 index 0000000..ff82d15 --- /dev/null +++ b/src/main/java/it/cavallium/languageblockbot/Main.java @@ -0,0 +1,39 @@ +package it.cavallium.languageblockbot; + +import com.github.pemistahl.lingua.api.Language; +import java.util.Arrays; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.telegram.telegrambots.longpolling.TelegramBotsLongPollingApplication; + +public class Main { + + public static void main(String[] args) throws Exception { + String token = System.getProperty("languageblockbot.bot.token", "invalid-token"); + Set whitelistedLangs; + Set blacklistedLangs; + try { + whitelistedLangs = Stream + .of(System.getProperty("languageblockbot.langs.whitelisted", "ENGLISH,FRENCH,ITALIAN").split(",")) + .map(Language::valueOf) + .collect(Collectors.toUnmodifiableSet()); + blacklistedLangs = Stream + .of(System.getProperty("languageblockbot.langs.blacklisted", "SOMALI,ARABIC,RUSSIAN,UKRAINIAN,CHINESE,AZERBAIJANI,SPANISH,PORTUGUESE,HINDI,POLISH,VIETNAMESE").split(",")) + .map(Language::valueOf) + .collect(Collectors.toUnmodifiableSet()); + } catch (IllegalArgumentException ex) { + System.err.println("Invalid language value. Allowed values: " + Arrays.toString(Language.values())); + ex.printStackTrace(); + System.exit(1); + return; + } + try (var app = new TelegramBotsLongPollingApplication()) { + var bot = new LanguageBlockBot(token, whitelistedLangs, blacklistedLangs); + bot.onRegister(); + app.registerBot(token, bot); + Thread.currentThread().join(); + + } + } +}