chore: Kafka topic명 변경 및 SignalKind 수집 스크립트 추가
- tp_SNP_AIS_Signal → tp_Global_AIS_Signal (3개 프로파일) - scripts/collect_signalkind_candidates.sh 추가 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
부모
178ac506bf
커밋
290933f94f
431
scripts/collect_signalkind_candidates.sh
Executable file
431
scripts/collect_signalkind_candidates.sh
Executable file
@ -0,0 +1,431 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
|
||||
PROFILE="local"
|
||||
DURATION_SEC=120
|
||||
MAX_MESSAGES=200000
|
||||
GROUP_ID="signalkind-collector-v1"
|
||||
OFFSET_RESET="latest"
|
||||
OUTPUT_DIR="$ROOT_DIR/docs/signalkind"
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Usage: collect_signalkind_candidates.sh [options]
|
||||
|
||||
Options:
|
||||
-p, --profile <name> Spring profile (default: local)
|
||||
-d, --duration-sec <sec> Consume duration seconds (default: 120)
|
||||
-m, --max-messages <count> Max messages per run (default: 200000)
|
||||
-g, --group-id <id> Kafka consumer group id (default: signalkind-collector-v1)
|
||||
-r, --offset-reset <value> auto.offset.reset: earliest|latest (default: latest)
|
||||
-o, --output-dir <dir> Output directory (default: docs/signalkind)
|
||||
-h, --help Show this help
|
||||
|
||||
Examples:
|
||||
scripts/collect_signalkind_candidates.sh -p local -d 600
|
||||
scripts/collect_signalkind_candidates.sh -p local -r earliest -m 50000
|
||||
USAGE
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-p|--profile)
|
||||
PROFILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
-d|--duration-sec)
|
||||
DURATION_SEC="$2"
|
||||
shift 2
|
||||
;;
|
||||
-m|--max-messages)
|
||||
MAX_MESSAGES="$2"
|
||||
shift 2
|
||||
;;
|
||||
-g|--group-id)
|
||||
GROUP_ID="$2"
|
||||
shift 2
|
||||
;;
|
||||
-r|--offset-reset)
|
||||
OFFSET_RESET="$2"
|
||||
shift 2
|
||||
;;
|
||||
-o|--output-dir)
|
||||
OUTPUT_DIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ "$OFFSET_RESET" != "earliest" && "$OFFSET_RESET" != "latest" ]]; then
|
||||
echo "Invalid --offset-reset value: $OFFSET_RESET"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CONFIG_FILE="$ROOT_DIR/src/main/resources/application-${PROFILE}.yml"
|
||||
if [[ ! -f "$CONFIG_FILE" ]]; then
|
||||
echo "Profile config not found: $CONFIG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BOOTSTRAP_SERVERS="$(awk '/^[[:space:]]*bootstrap-servers:/{print $2; exit}' "$CONFIG_FILE")"
|
||||
TOPIC_NAME="$(awk '/^[[:space:]]*topic:/{print $2; exit}' "$CONFIG_FILE")"
|
||||
|
||||
if [[ -z "${BOOTSTRAP_SERVERS:-}" || -z "${TOPIC_NAME:-}" ]]; then
|
||||
echo "Failed to read bootstrap/topic from $CONFIG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
CP_FILE="/tmp/snp-signalkind-cp.txt"
|
||||
mvn -q -DskipTests dependency:build-classpath -Dmdep.outputFile="$CP_FILE"
|
||||
CLASSPATH="$(cat "$CP_FILE")"
|
||||
|
||||
JAVA_FILE="/tmp/SignalkindCollector.java"
|
||||
cat >"$JAVA_FILE" <<'JAVA'
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.kafka.clients.consumer.ConsumerConfig;
|
||||
import org.apache.kafka.clients.consumer.ConsumerRecord;
|
||||
import org.apache.kafka.clients.consumer.ConsumerRecords;
|
||||
import org.apache.kafka.clients.consumer.KafkaConsumer;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.Instant;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class SignalkindCollector {
|
||||
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public static class Aggregate {
|
||||
public long totalConsumed = 0;
|
||||
public long totalParseError = 0;
|
||||
public long totalMissingVesselType = 0;
|
||||
public Map<String, KindStat> vesselTypeStats = new HashMap<>();
|
||||
public Map<String, KindStat> vesselTypeExtraStats = new HashMap<>();
|
||||
}
|
||||
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public static class KindStat {
|
||||
public long count = 0;
|
||||
public long lastTimestampMs = 0;
|
||||
public LinkedHashSet<String> sampleMmsi = new LinkedHashSet<>();
|
||||
}
|
||||
|
||||
public static class DecisionFields {
|
||||
String proposedCode = "";
|
||||
String status = "PENDING";
|
||||
String notes = "";
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
if (args.length < 7) {
|
||||
throw new IllegalArgumentException(
|
||||
"Usage: SignalkindCollector <bootstrap> <topic> <outputDir> <durationSec> <maxMessages> <groupId> <offsetReset>");
|
||||
}
|
||||
|
||||
String bootstrap = args[0];
|
||||
String topic = args[1];
|
||||
Path outputDir = Path.of(args[2]);
|
||||
int durationSec = Integer.parseInt(args[3]);
|
||||
int maxMessages = Integer.parseInt(args[4]);
|
||||
String groupId = args[5];
|
||||
String offsetReset = args[6];
|
||||
|
||||
Files.createDirectories(outputDir);
|
||||
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
Path aggregateJsonPath = outputDir.resolve("aggregate_store.json");
|
||||
Path vesselTypeTsvPath = outputDir.resolve("vesseltype_stats.tsv");
|
||||
Path vesselTypeExtraTsvPath = outputDir.resolve("vesseltype_extra_stats.tsv");
|
||||
Path mappingDraftTsvPath = outputDir.resolve("signalkind_mapping_draft.tsv");
|
||||
Path runSummaryPath = outputDir.resolve("last_run_summary.txt");
|
||||
|
||||
Aggregate aggregate = loadAggregate(mapper, aggregateJsonPath);
|
||||
Map<String, DecisionFields> existingDraft = loadExistingDraft(mappingDraftTsvPath);
|
||||
|
||||
Properties props = new Properties();
|
||||
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrap);
|
||||
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
|
||||
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
|
||||
props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
|
||||
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
|
||||
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, offsetReset);
|
||||
props.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, "2000");
|
||||
props.put(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG, "30000");
|
||||
|
||||
long runStart = System.currentTimeMillis();
|
||||
long runDeadline = runStart + (durationSec * 1000L);
|
||||
long runConsumed = 0;
|
||||
long runParseError = 0;
|
||||
long runMissingVesselType = 0;
|
||||
|
||||
try (KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props)) {
|
||||
consumer.subscribe(Collections.singleton(topic));
|
||||
|
||||
while (System.currentTimeMillis() < runDeadline && runConsumed < maxMessages) {
|
||||
ConsumerRecords<String, String> records = consumer.poll(java.time.Duration.ofMillis(1000));
|
||||
if (records.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (ConsumerRecord<String, String> record : records) {
|
||||
if (runConsumed >= maxMessages) {
|
||||
break;
|
||||
}
|
||||
runConsumed++;
|
||||
|
||||
try {
|
||||
JsonNode root = mapper.readTree(record.value());
|
||||
JsonNode payload = root.path("payload");
|
||||
|
||||
String vesselType = normalize(payload.path("vesselType").asText(null), "N/A");
|
||||
String extraInfo = normalize(payload.path("extraInfo").asText(null), "N/A");
|
||||
String mmsi = normalize(payload.path("mmsi").asText(null), "");
|
||||
|
||||
if ("N/A".equals(vesselType)) {
|
||||
runMissingVesselType++;
|
||||
}
|
||||
|
||||
updateStat(aggregate.vesselTypeStats, vesselType, record.timestamp(), mmsi);
|
||||
updateStat(aggregate.vesselTypeExtraStats, vesselType + "\u001F" + extraInfo, record.timestamp(), mmsi);
|
||||
} catch (Exception e) {
|
||||
runParseError++;
|
||||
}
|
||||
}
|
||||
|
||||
consumer.commitSync();
|
||||
}
|
||||
}
|
||||
|
||||
aggregate.totalConsumed += runConsumed;
|
||||
aggregate.totalParseError += runParseError;
|
||||
aggregate.totalMissingVesselType += runMissingVesselType;
|
||||
|
||||
mapper.writerWithDefaultPrettyPrinter().writeValue(aggregateJsonPath.toFile(), aggregate);
|
||||
|
||||
writeVesselTypeStats(aggregate, vesselTypeTsvPath);
|
||||
writeVesselTypeExtraStats(aggregate, vesselTypeExtraTsvPath);
|
||||
writeMappingDraft(aggregate, existingDraft, mappingDraftTsvPath);
|
||||
writeRunSummary(runSummaryPath, bootstrap, topic, groupId, offsetReset,
|
||||
runStart, System.currentTimeMillis(), runConsumed, runParseError, runMissingVesselType, aggregate.totalConsumed);
|
||||
|
||||
System.out.println("OUTPUT_DIR=" + outputDir);
|
||||
System.out.println("RUN_CONSUMED=" + runConsumed);
|
||||
System.out.println("RUN_PARSE_ERROR=" + runParseError);
|
||||
System.out.println("RUN_MISSING_VESSEL_TYPE=" + runMissingVesselType);
|
||||
System.out.println("TOTAL_CONSUMED=" + aggregate.totalConsumed);
|
||||
System.out.println("TOTAL_VESSEL_TYPES=" + aggregate.vesselTypeStats.size());
|
||||
System.out.println("TOTAL_VESSEL_TYPE_EXTRA=" + aggregate.vesselTypeExtraStats.size());
|
||||
}
|
||||
|
||||
private static Aggregate loadAggregate(ObjectMapper mapper, Path aggregateJsonPath) {
|
||||
try {
|
||||
if (Files.exists(aggregateJsonPath)) {
|
||||
return mapper.readValue(aggregateJsonPath.toFile(), Aggregate.class);
|
||||
}
|
||||
} catch (Exception ignored) {
|
||||
}
|
||||
return new Aggregate();
|
||||
}
|
||||
|
||||
private static Map<String, DecisionFields> loadExistingDraft(Path mappingDraftTsvPath) {
|
||||
Map<String, DecisionFields> map = new HashMap<>();
|
||||
if (!Files.exists(mappingDraftTsvPath)) {
|
||||
return map;
|
||||
}
|
||||
|
||||
try {
|
||||
List<String> lines = Files.readAllLines(mappingDraftTsvPath, StandardCharsets.UTF_8);
|
||||
boolean first = true;
|
||||
for (String line : lines) {
|
||||
if (first) {
|
||||
first = false;
|
||||
continue;
|
||||
}
|
||||
String[] arr = line.split("\t", -1);
|
||||
if (arr.length < 8) {
|
||||
continue;
|
||||
}
|
||||
String key = arr[0] + "\u001F" + arr[1];
|
||||
DecisionFields fields = new DecisionFields();
|
||||
fields.proposedCode = arr[5];
|
||||
fields.status = arr[6].isBlank() ? "PENDING" : arr[6];
|
||||
fields.notes = arr[7];
|
||||
map.put(key, fields);
|
||||
}
|
||||
} catch (Exception ignored) {
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
private static void updateStat(Map<String, KindStat> map, String key, long ts, String mmsi) {
|
||||
KindStat stat = map.computeIfAbsent(key, k -> new KindStat());
|
||||
stat.count += 1;
|
||||
stat.lastTimestampMs = Math.max(stat.lastTimestampMs, ts);
|
||||
if (mmsi != null && !mmsi.isBlank() && stat.sampleMmsi.size() < 5) {
|
||||
stat.sampleMmsi.add(mmsi);
|
||||
}
|
||||
}
|
||||
|
||||
private static String normalize(String value, String defaultValue) {
|
||||
if (value == null) {
|
||||
return defaultValue;
|
||||
}
|
||||
String v = value.trim();
|
||||
if (v.isEmpty() || "null".equalsIgnoreCase(v)) {
|
||||
return defaultValue;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
private static void writeVesselTypeStats(Aggregate aggregate, Path path) throws Exception {
|
||||
List<Map.Entry<String, KindStat>> rows = aggregate.vesselTypeStats.entrySet()
|
||||
.stream()
|
||||
.sorted((a, b) -> Long.compare(b.getValue().count, a.getValue().count))
|
||||
.toList();
|
||||
|
||||
try (BufferedWriter w = new BufferedWriter(new FileWriter(path.toFile(), false))) {
|
||||
w.write("vesselType\tcount\tratio\tlastSeenUtc\tsampleMmsi\n");
|
||||
long total = Math.max(1L, aggregate.totalConsumed);
|
||||
for (Map.Entry<String, KindStat> row : rows) {
|
||||
KindStat s = row.getValue();
|
||||
double ratio = (s.count * 100.0) / total;
|
||||
w.write(row.getKey() + "\t"
|
||||
+ s.count + "\t"
|
||||
+ String.format(Locale.US, "%.4f", ratio) + "\t"
|
||||
+ formatTs(s.lastTimestampMs) + "\t"
|
||||
+ String.join(",", s.sampleMmsi) + "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void writeVesselTypeExtraStats(Aggregate aggregate, Path path) throws Exception {
|
||||
List<Map.Entry<String, KindStat>> rows = aggregate.vesselTypeExtraStats.entrySet()
|
||||
.stream()
|
||||
.sorted((a, b) -> Long.compare(b.getValue().count, a.getValue().count))
|
||||
.toList();
|
||||
|
||||
try (BufferedWriter w = new BufferedWriter(new FileWriter(path.toFile(), false))) {
|
||||
w.write("vesselType\textraInfo\tcount\tratio\tlastSeenUtc\tsampleMmsi\n");
|
||||
long total = Math.max(1L, aggregate.totalConsumed);
|
||||
for (Map.Entry<String, KindStat> row : rows) {
|
||||
String[] keys = row.getKey().split("\u001F", 2);
|
||||
String vesselType = keys.length > 0 ? keys[0] : "N/A";
|
||||
String extraInfo = keys.length > 1 ? keys[1] : "N/A";
|
||||
KindStat s = row.getValue();
|
||||
double ratio = (s.count * 100.0) / total;
|
||||
w.write(vesselType + "\t"
|
||||
+ extraInfo + "\t"
|
||||
+ s.count + "\t"
|
||||
+ String.format(Locale.US, "%.4f", ratio) + "\t"
|
||||
+ formatTs(s.lastTimestampMs) + "\t"
|
||||
+ String.join(",", s.sampleMmsi) + "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void writeMappingDraft(Aggregate aggregate, Map<String, DecisionFields> existing, Path path) throws Exception {
|
||||
List<Map.Entry<String, KindStat>> rows = aggregate.vesselTypeExtraStats.entrySet()
|
||||
.stream()
|
||||
.sorted((a, b) -> Long.compare(b.getValue().count, a.getValue().count))
|
||||
.toList();
|
||||
|
||||
try (BufferedWriter w = new BufferedWriter(new FileWriter(path.toFile(), false))) {
|
||||
w.write("vesselType\textraInfo\tcount\tlastSeenUtc\tsampleMmsi\tproposedSignalKindCode\tdecisionStatus\tnotes\n");
|
||||
for (Map.Entry<String, KindStat> row : rows) {
|
||||
String key = row.getKey();
|
||||
String[] keys = key.split("\u001F", 2);
|
||||
String vesselType = keys.length > 0 ? keys[0] : "N/A";
|
||||
String extraInfo = keys.length > 1 ? keys[1] : "N/A";
|
||||
KindStat s = row.getValue();
|
||||
|
||||
DecisionFields d = existing.getOrDefault(key, new DecisionFields());
|
||||
w.write(vesselType + "\t"
|
||||
+ extraInfo + "\t"
|
||||
+ s.count + "\t"
|
||||
+ formatTs(s.lastTimestampMs) + "\t"
|
||||
+ String.join(",", s.sampleMmsi) + "\t"
|
||||
+ d.proposedCode + "\t"
|
||||
+ d.status + "\t"
|
||||
+ d.notes + "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void writeRunSummary(
|
||||
Path path,
|
||||
String bootstrap,
|
||||
String topic,
|
||||
String groupId,
|
||||
String offsetReset,
|
||||
long startMs,
|
||||
long endMs,
|
||||
long runConsumed,
|
||||
long runParseError,
|
||||
long runMissingVesselType,
|
||||
long totalConsumed
|
||||
) throws Exception {
|
||||
try (BufferedWriter w = new BufferedWriter(new FileWriter(path.toFile(), false))) {
|
||||
w.write("bootstrap=" + bootstrap + "\n");
|
||||
w.write("topic=" + topic + "\n");
|
||||
w.write("groupId=" + groupId + "\n");
|
||||
w.write("offsetReset=" + offsetReset + "\n");
|
||||
w.write("runStartUtc=" + Instant.ofEpochMilli(startMs) + "\n");
|
||||
w.write("runEndUtc=" + Instant.ofEpochMilli(endMs) + "\n");
|
||||
w.write("runDurationSec=" + ((endMs - startMs) / 1000) + "\n");
|
||||
w.write("runConsumed=" + runConsumed + "\n");
|
||||
w.write("runParseError=" + runParseError + "\n");
|
||||
w.write("runMissingVesselType=" + runMissingVesselType + "\n");
|
||||
w.write("totalConsumed=" + totalConsumed + "\n");
|
||||
}
|
||||
}
|
||||
|
||||
private static String formatTs(long ts) {
|
||||
if (ts <= 0) {
|
||||
return "";
|
||||
}
|
||||
return OffsetDateTime.ofInstant(Instant.ofEpochMilli(ts), ZoneOffset.UTC).toString();
|
||||
}
|
||||
}
|
||||
JAVA
|
||||
|
||||
javac -cp "$CLASSPATH" "$JAVA_FILE"
|
||||
|
||||
java -cp "$CLASSPATH:/tmp" SignalkindCollector \
|
||||
"$BOOTSTRAP_SERVERS" \
|
||||
"$TOPIC_NAME" \
|
||||
"$OUTPUT_DIR" \
|
||||
"$DURATION_SEC" \
|
||||
"$MAX_MESSAGES" \
|
||||
"$GROUP_ID" \
|
||||
"$OFFSET_RESET"
|
||||
|
||||
echo "[DONE] Output files:"
|
||||
echo " - $OUTPUT_DIR/aggregate_store.json"
|
||||
echo " - $OUTPUT_DIR/vesseltype_stats.tsv"
|
||||
echo " - $OUTPUT_DIR/vesseltype_extra_stats.tsv"
|
||||
echo " - $OUTPUT_DIR/signalkind_mapping_draft.tsv"
|
||||
echo " - $OUTPUT_DIR/last_run_summary.txt"
|
||||
@ -118,7 +118,7 @@ app:
|
||||
cron: "15 * * * * ?" # 매 분 15초 실행
|
||||
kafka:
|
||||
enabled: true
|
||||
topic: tp_SNP_AIS_Signal
|
||||
topic: tp_Global_AIS_Signal
|
||||
send-chunk-size: 5000
|
||||
fail-on-send-error: false
|
||||
# AIS Target 캐시 설정
|
||||
|
||||
@ -120,7 +120,7 @@ app:
|
||||
cron: "15 * * * * ?" # 매 분 15초 실행
|
||||
kafka:
|
||||
enabled: true
|
||||
topic: tp_SNP_AIS_Signal
|
||||
topic: tp_Global_AIS_Signal
|
||||
send-chunk-size: 5000
|
||||
fail-on-send-error: false
|
||||
# AIS Target 캐시 설정
|
||||
|
||||
@ -170,7 +170,7 @@ app:
|
||||
cron: "15 * * * * ?" # 매 분 15초 실행
|
||||
kafka:
|
||||
enabled: true
|
||||
topic: tp_SNP_AIS_Signal
|
||||
topic: tp_Global_AIS_Signal
|
||||
send-chunk-size: 5000
|
||||
fail-on-send-error: false
|
||||
|
||||
|
||||
불러오는 중...
Reference in New Issue
Block a user