add Parse Command

This commit is contained in:
Jürgen Mummert
2026-01-09 16:28:43 +01:00
parent 5cd8286286
commit f1c864dfca
+21 -24
View File
@@ -27,8 +27,7 @@ class MeilisearchFilesParseCommand extends Command
'limit', 'limit',
null, null,
InputOption::VALUE_OPTIONAL, InputOption::VALUE_OPTIONAL,
'Maximum number of files to parse per run', 'Maximum number of files to check per run (optional)'
20
) )
->addOption( ->addOption(
'dry-run', 'dry-run',
@@ -41,13 +40,16 @@ class MeilisearchFilesParseCommand extends Command
protected function execute(InputInterface $input, OutputInterface $output): int protected function execute(InputInterface $input, OutputInterface $output): int
{ {
$this->framework->initialize(); $this->framework->initialize();
$this->log('Parser gestartet'); $this->log('Parser gestartet');
$limit = max(1, (int) $input->getOption('limit'));
$dryRun = (bool) $input->getOption('dry-run'); $dryRun = (bool) $input->getOption('dry-run');
$tikaUrl = rtrim((string) $GLOBALS['TL_CONFIG']['meilisearch_tika_url'], '/'); // ---- LIMIT: nur wenn explizit gesetzt
$limitOption = $input->getOption('limit');
$limit = $limitOption !== null ? max(1, (int) $limitOption) : null;
// ---- Tika URL
$tikaUrl = rtrim((string) ($GLOBALS['TL_CONFIG']['meilisearch_tika_url'] ?? ''), '/');
if ($tikaUrl === '') { if ($tikaUrl === '') {
$output->writeln('<error>Tika URL not configured</error>'); $output->writeln('<error>Tika URL not configured</error>');
return Command::FAILURE; return Command::FAILURE;
@@ -55,14 +57,13 @@ class MeilisearchFilesParseCommand extends Command
$db = Database::getInstance(); $db = Database::getInstance();
$files = $db // ---- Files laden
->query( $sql = "SELECT * FROM tl_search_files ORDER BY tstamp ASC";
"SELECT * if ($limit !== null) {
FROM tl_search_files $sql .= " LIMIT " . (int) $limit;
ORDER BY tstamp ASC }
LIMIT " . (int) $limit
) $files = $db->query($sql)->fetchAllAssoc();
->fetchAllAssoc();
if (!$files) { if (!$files) {
$this->log('No files to parse'); $this->log('No files to parse');
@@ -70,7 +71,7 @@ class MeilisearchFilesParseCommand extends Command
} }
$client = HttpClient::create([ $client = HttpClient::create([
'timeout' => 120, 'timeout' => 180,
]); ]);
foreach ($files as $file) { foreach ($files as $file) {
@@ -79,7 +80,7 @@ class MeilisearchFilesParseCommand extends Command
$normalized = $originalUrl; $normalized = $originalUrl;
// ------------------------------------------------- // -------------------------------------------------
// 1) Query-URL behandeln (?file=files/...) // 1) ?file=files/
// ------------------------------------------------- // -------------------------------------------------
if (str_contains($normalized, '?')) { if (str_contains($normalized, '?')) {
$parts = parse_url($normalized); $parts = parse_url($normalized);
@@ -95,7 +96,7 @@ class MeilisearchFilesParseCommand extends Command
} }
// ------------------------------------------------- // -------------------------------------------------
// 2) Fragment entfernen (#...) // 2) Fragment entfernen
// ------------------------------------------------- // -------------------------------------------------
$normalized = strtok($normalized, '#'); $normalized = strtok($normalized, '#');
@@ -105,7 +106,7 @@ class MeilisearchFilesParseCommand extends Command
$normalized = rawurldecode($normalized); $normalized = rawurldecode($normalized);
// ------------------------------------------------- // -------------------------------------------------
// 4) Nur lokale files/… zulassen // 4) Nur lokale files/
// ------------------------------------------------- // -------------------------------------------------
$normalized = ltrim($normalized, '/'); $normalized = ltrim($normalized, '/');
if (!str_starts_with($normalized, 'files/')) { if (!str_starts_with($normalized, 'files/')) {
@@ -127,10 +128,9 @@ class MeilisearchFilesParseCommand extends Command
$checksum = md5($normalized . '|' . $mtime); $checksum = md5($normalized . '|' . $mtime);
// ------------------------------------------------- // -------------------------------------------------
// 5) Unveränderte Dateien überspringen // 5) Skip unchanged
// ------------------------------------------------- // -------------------------------------------------
if ($file['checksum'] === $checksum && !empty($file['text'])) { if ($file['checksum'] === $checksum && !empty($file['text'])) {
$this->log('Skip unchanged file', ['url' => $normalized]);
continue; continue;
} }
@@ -140,7 +140,7 @@ class MeilisearchFilesParseCommand extends Command
} }
// ------------------------------------------------- // -------------------------------------------------
// 6) Content-Type anhand Dateiendung // 6) MIME-Type
// ------------------------------------------------- // -------------------------------------------------
$ext = strtolower(pathinfo($normalized, PATHINFO_EXTENSION)); $ext = strtolower(pathinfo($normalized, PATHINFO_EXTENSION));
@@ -158,7 +158,7 @@ class MeilisearchFilesParseCommand extends Command
} }
// ------------------------------------------------- // -------------------------------------------------
// 7) Tika-Parsing // 7) Tika parse
// ------------------------------------------------- // -------------------------------------------------
try { try {
$this->log('Parsing file', ['url' => $normalized]); $this->log('Parsing file', ['url' => $normalized]);
@@ -206,9 +206,6 @@ class MeilisearchFilesParseCommand extends Command
return Command::SUCCESS; return Command::SUCCESS;
} }
/**
* Einheitliches Logging
*/
private function log(string $message, array $context = []): void private function log(string $message, array $context = []): void
{ {
$ctx = $context $ctx = $context