First commit

This commit is contained in:
Andrea Cavalli 2024-05-16 15:52:59 +02:00
parent 1366a5803c
commit 52ee9f8e53
6 changed files with 382 additions and 0 deletions

2
.gitignore vendored
View File

@ -24,3 +24,5 @@
hs_err_pid* hs_err_pid*
replay_pid* replay_pid*
*.env
.idea/

15
Dockerfile Normal file
View File

@ -0,0 +1,15 @@
FROM maven:3-eclipse-temurin-22 as build
WORKDIR /build
COPY --link src src
COPY --link pom.xml pom.xml
RUN mvn package -DskipTests
FROM eclipse-temurin:22-jdk-alpine
WORKDIR /build
COPY --from=build --link /build/target/language-block-bot-*.jar language-block-bot.jar
ENTRYPOINT ["java", "-jar", "language-block-bot.jar"]

7
docker-compose.yml Normal file
View File

@ -0,0 +1,7 @@
services:
language-block-bot:
build:
context: .
dockerfile: Dockerfile
environment:
JAVA_TOOL_OPTIONS: "-Dlanguageblockbot.bot.token=$TOKEN"

167
pom.xml Normal file
View File

@ -0,0 +1,167 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>it.cavallium</groupId>
<artifactId>language-block-bot</artifactId>
<version>1.0.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven-compiler-plugin.version>3.12.1</maven-compiler-plugin.version>
<maven-shade-plugin.version>3.2.4</maven-shade-plugin.version>
<maven-surefire-plugin.version>3.0.0-M7</maven-surefire-plugin.version>
</properties>
<repositories>
<repository>
<id>central</id>
<url>https://repo.maven.apache.org/maven2/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.jetbrains</groupId>
<artifactId>annotations</artifactId>
<version>24.1.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>2.0.12</version>
</dependency>
<dependency>
<groupId>org.telegram</groupId>
<artifactId>telegrambots-abilities</artifactId>
<version>7.2.1</version>
<exclusions>
<exclusion>
<groupId>org.jetbrains</groupId>
<artifactId>annotations</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.telegram</groupId>
<artifactId>telegrambots-longpolling</artifactId>
<version>7.2.1</version>
<exclusions>
<exclusion>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-stdlib-jdk8</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.telegram</groupId>
<artifactId>telegrambots-client</artifactId>
<version>7.2.1</version>
</dependency>
<dependency>
<groupId>com.github.pemistahl</groupId>
<artifactId>lingua</artifactId>
<version>1.2.2</version>
<exclusions>
<exclusion>
<groupId>com.squareup.okio</groupId>
<artifactId>okio</artifactId>
</exclusion>
<exclusion>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-stdlib</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-stdlib-jdk8</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-jdk14</artifactId>
<version>2.0.12</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>${maven-compiler-plugin.version}</version>
<configuration>
<release>21</release>
</configuration>
</plugin>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<version>${maven-shade-plugin.version}</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>${maven-surefire-plugin.version}</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
<version>3.0.0-M3</version>
<executions>
<execution>
<id>enforce</id>
<configuration>
<rules>
<dependencyConvergence/>
</rules>
</configuration>
<goals>
<goal>enforce</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.5.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>it.cavallium.languageblockbot.Main</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,152 @@
package it.cavallium.languageblockbot;
import com.github.pemistahl.lingua.api.Language;
import com.github.pemistahl.lingua.api.LanguageDetector;
import com.github.pemistahl.lingua.api.LanguageDetectorBuilder;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.stream.Collectors;
import org.telegram.telegrambots.abilitybots.api.bot.AbilityBot;
import org.telegram.telegrambots.meta.api.methods.groupadministration.GetChatAdministrators;
import org.telegram.telegrambots.meta.api.methods.updatingmessages.DeleteMessage;
import org.telegram.telegrambots.meta.api.objects.Update;
import org.telegram.telegrambots.meta.api.objects.chatmember.ChatMember;
import org.telegram.telegrambots.meta.api.objects.message.Message;
import org.telegram.telegrambots.meta.exceptions.TelegramApiException;
public class LanguageBlockBot extends AbilityBot {
private final LanguageDetector detector;
private final ConcurrentMap<Long, Set<Long>> chatAdmins = new ConcurrentHashMap<>();
private final Set<Language> whitelistedLangs;
private final Set<Language> blacklistedLangs;
public LanguageBlockBot(String token, Set<Language> whitelistedLangs, Set<Language> blacklistedLangs) {
super(new org.telegram.telegrambots.client.okhttp.OkHttpTelegramClient(token), "LanguageBlockBot");
var allLangs = new HashSet<Language>();
allLangs.addAll(whitelistedLangs);
allLangs.addAll(blacklistedLangs);
this.detector = LanguageDetectorBuilder.fromLanguages(allLangs.toArray(Language[]::new))
.withPreloadedLanguageModels()
.withMinimumRelativeDistance(0.8)
.build();
this.whitelistedLangs = whitelistedLangs;
this.blacklistedLangs = blacklistedLangs;
}
@Override
public void consume(Update update) {
if (update.hasMessage()) {
var msg = update.getMessage();
String text;
if (msg.hasText()) {
text = msg.getText();
} else if (msg.getCaption() != null) {
text = msg.getCaption();
} else {
text = null;
}
if (text != null) {
var detectedLangs = detector.computeLanguageConfidenceValues(text);
double acceptableLanguageValue = whitelistedLangs
.stream()
.map(whitelistedLang -> detectedLangs.getOrDefault(whitelistedLang, 0.0))
.reduce(Math::max)
.orElse(0d);
double blacklistedLanguageValue = blacklistedLangs
.stream()
.map(blacklistedLang -> detectedLangs.getOrDefault(blacklistedLang, 0.0))
.reduce(Math::max)
.orElse(0d);
var shouldBeDeletedByLanguage = !detectedLangs.isEmpty() &&
(acceptableLanguageValue <= 0.75
|| (acceptableLanguageValue < blacklistedLanguageValue));
System.out.printf("Received message: \"%s\". shouldBeDeletedByLanguage: %b. Languages: \"%s\".%n",
text,
shouldBeDeletedByLanguage,
detectedLangs.entrySet().stream().map(e -> e.getKey() + "=" + e.getValue()).collect(Collectors.joining(", ", "[", "]"))
);
if (shouldBeDeletedByLanguage) {
isMessageDeletable(msg).thenCompose(deletable -> {
var deleteMessageMethod = DeleteMessage
.builder()
.chatId(msg.getChatId())
.messageId(msg.getMessageId())
.build();
try {
var client = this.getTelegramClient();
return client.executeAsync(deleteMessageMethod);
} catch (TelegramApiException e) {
return CompletableFuture.failedFuture(e);
}
}).whenComplete((ok, ex) -> {
if (ex != null) {
ex.printStackTrace();
}
});
}
}
}
}
private CompletableFuture<Boolean> isMessageDeletable(Message msg) {
var chatId = msg.getChatId();
CompletableFuture<Boolean> canDeleteBecauseItsNotAdmin;
if (msg.getSenderChat() != null) {
canDeleteBecauseItsNotAdmin = getChatAdmins(chatId)
.thenApply(admins -> !admins.contains(msg.getSenderChat().getId()));
} else {
canDeleteBecauseItsNotAdmin = CompletableFuture.completedFuture(true);
}
boolean canDeleteBecauseItsNormalMessage = !(msg.getIsAutomaticForward() != null && msg.getIsAutomaticForward())
&& (msg.getSenderChat() != null && msg.getSenderChat().isUserChat()) && !msg.hasViaBot();
return CompletableFuture
.completedFuture(canDeleteBecauseItsNormalMessage)
.thenCombine(canDeleteBecauseItsNotAdmin, (a, b) -> a && b);
}
private CompletableFuture<Set<Long>> getChatAdmins(Long chatId) {
var client = this.getTelegramClient();
if (chatId >= 0) {
return CompletableFuture.completedFuture(Set.of());
} else {
try {
var cachedAdmins = this.chatAdmins.get(chatId);
if (cachedAdmins != null) {
return CompletableFuture.completedFuture(cachedAdmins);
} else {
return client
.executeAsync(GetChatAdministrators.builder().chatId(chatId).build())
.<List<ChatMember>>handle((result, err) -> {
if (err != null) {
err.printStackTrace();
return List.of();
} else {
return result;
}
})
.thenApply(admins -> {
var newAdmins = admins.stream().map(n -> n.getUser().getId()).collect(Collectors.toCollection(HashSet<Long>::new));
chatAdmins.put(chatId, newAdmins);
return newAdmins;
});
}
} catch (TelegramApiException e) {
throw new RuntimeException(e);
}
}
}
@Override
public long creatorId() {
return 40951146;
}
}

View File

@ -0,0 +1,39 @@
package it.cavallium.languageblockbot;
import com.github.pemistahl.lingua.api.Language;
import java.util.Arrays;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.telegram.telegrambots.longpolling.TelegramBotsLongPollingApplication;
public class Main {
public static void main(String[] args) throws Exception {
String token = System.getProperty("languageblockbot.bot.token", "invalid-token");
Set<Language> whitelistedLangs;
Set<Language> blacklistedLangs;
try {
whitelistedLangs = Stream
.of(System.getProperty("languageblockbot.langs.whitelisted", "ENGLISH,FRENCH,ITALIAN").split(","))
.map(Language::valueOf)
.collect(Collectors.toUnmodifiableSet());
blacklistedLangs = Stream
.of(System.getProperty("languageblockbot.langs.blacklisted", "SOMALI,ARABIC,RUSSIAN,UKRAINIAN,CHINESE,AZERBAIJANI,SPANISH,PORTUGUESE,HINDI,POLISH,VIETNAMESE").split(","))
.map(Language::valueOf)
.collect(Collectors.toUnmodifiableSet());
} catch (IllegalArgumentException ex) {
System.err.println("Invalid language value. Allowed values: " + Arrays.toString(Language.values()));
ex.printStackTrace();
System.exit(1);
return;
}
try (var app = new TelegramBotsLongPollingApplication()) {
var bot = new LanguageBlockBot(token, whitelistedLangs, blacklistedLangs);
bot.onRegister();
app.registerBot(token, bot);
Thread.currentThread().join();
}
}
}