add Parse Command
This commit is contained in:
@@ -27,8 +27,7 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
'limit',
|
'limit',
|
||||||
null,
|
null,
|
||||||
InputOption::VALUE_OPTIONAL,
|
InputOption::VALUE_OPTIONAL,
|
||||||
'Maximum number of files to parse per run',
|
'Maximum number of files to check per run (optional)'
|
||||||
20
|
|
||||||
)
|
)
|
||||||
->addOption(
|
->addOption(
|
||||||
'dry-run',
|
'dry-run',
|
||||||
@@ -41,13 +40,16 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||||
{
|
{
|
||||||
$this->framework->initialize();
|
$this->framework->initialize();
|
||||||
|
|
||||||
$this->log('Parser gestartet');
|
$this->log('Parser gestartet');
|
||||||
|
|
||||||
$limit = max(1, (int) $input->getOption('limit'));
|
|
||||||
$dryRun = (bool) $input->getOption('dry-run');
|
$dryRun = (bool) $input->getOption('dry-run');
|
||||||
|
|
||||||
$tikaUrl = rtrim((string) $GLOBALS['TL_CONFIG']['meilisearch_tika_url'], '/');
|
// ---- LIMIT: nur wenn explizit gesetzt
|
||||||
|
$limitOption = $input->getOption('limit');
|
||||||
|
$limit = $limitOption !== null ? max(1, (int) $limitOption) : null;
|
||||||
|
|
||||||
|
// ---- Tika URL
|
||||||
|
$tikaUrl = rtrim((string) ($GLOBALS['TL_CONFIG']['meilisearch_tika_url'] ?? ''), '/');
|
||||||
if ($tikaUrl === '') {
|
if ($tikaUrl === '') {
|
||||||
$output->writeln('<error>Tika URL not configured</error>');
|
$output->writeln('<error>Tika URL not configured</error>');
|
||||||
return Command::FAILURE;
|
return Command::FAILURE;
|
||||||
@@ -55,14 +57,13 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
|
|
||||||
$db = Database::getInstance();
|
$db = Database::getInstance();
|
||||||
|
|
||||||
$files = $db
|
// ---- Files laden
|
||||||
->query(
|
$sql = "SELECT * FROM tl_search_files ORDER BY tstamp ASC";
|
||||||
"SELECT *
|
if ($limit !== null) {
|
||||||
FROM tl_search_files
|
$sql .= " LIMIT " . (int) $limit;
|
||||||
ORDER BY tstamp ASC
|
}
|
||||||
LIMIT " . (int) $limit
|
|
||||||
)
|
$files = $db->query($sql)->fetchAllAssoc();
|
||||||
->fetchAllAssoc();
|
|
||||||
|
|
||||||
if (!$files) {
|
if (!$files) {
|
||||||
$this->log('No files to parse');
|
$this->log('No files to parse');
|
||||||
@@ -70,7 +71,7 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
}
|
}
|
||||||
|
|
||||||
$client = HttpClient::create([
|
$client = HttpClient::create([
|
||||||
'timeout' => 120,
|
'timeout' => 180,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
foreach ($files as $file) {
|
foreach ($files as $file) {
|
||||||
@@ -79,7 +80,7 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
$normalized = $originalUrl;
|
$normalized = $originalUrl;
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// 1) Query-URL behandeln (?file=files/...)
|
// 1) ?file=files/…
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
if (str_contains($normalized, '?')) {
|
if (str_contains($normalized, '?')) {
|
||||||
$parts = parse_url($normalized);
|
$parts = parse_url($normalized);
|
||||||
@@ -95,7 +96,7 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// 2) Fragment entfernen (#...)
|
// 2) Fragment entfernen
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
$normalized = strtok($normalized, '#');
|
$normalized = strtok($normalized, '#');
|
||||||
|
|
||||||
@@ -105,7 +106,7 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
$normalized = rawurldecode($normalized);
|
$normalized = rawurldecode($normalized);
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// 4) Nur lokale files/… zulassen
|
// 4) Nur lokale files/
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
$normalized = ltrim($normalized, '/');
|
$normalized = ltrim($normalized, '/');
|
||||||
if (!str_starts_with($normalized, 'files/')) {
|
if (!str_starts_with($normalized, 'files/')) {
|
||||||
@@ -127,10 +128,9 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
$checksum = md5($normalized . '|' . $mtime);
|
$checksum = md5($normalized . '|' . $mtime);
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// 5) Unveränderte Dateien überspringen
|
// 5) Skip unchanged
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
if ($file['checksum'] === $checksum && !empty($file['text'])) {
|
if ($file['checksum'] === $checksum && !empty($file['text'])) {
|
||||||
$this->log('Skip unchanged file', ['url' => $normalized]);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -140,7 +140,7 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// 6) Content-Type anhand Dateiendung
|
// 6) MIME-Type
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
$ext = strtolower(pathinfo($normalized, PATHINFO_EXTENSION));
|
$ext = strtolower(pathinfo($normalized, PATHINFO_EXTENSION));
|
||||||
|
|
||||||
@@ -158,7 +158,7 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// 7) Tika-Parsing
|
// 7) Tika parse
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
try {
|
try {
|
||||||
$this->log('Parsing file', ['url' => $normalized]);
|
$this->log('Parsing file', ['url' => $normalized]);
|
||||||
@@ -206,9 +206,6 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
return Command::SUCCESS;
|
return Command::SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Einheitliches Logging
|
|
||||||
*/
|
|
||||||
private function log(string $message, array $context = []): void
|
private function log(string $message, array $context = []): void
|
||||||
{
|
{
|
||||||
$ctx = $context
|
$ctx = $context
|
||||||
|
|||||||
Reference in New Issue
Block a user