add Parse Command

This commit is contained in:
Jürgen Mummert
2026-01-09 16:16:44 +01:00
parent 0fa0642618
commit 5cd8286286
+24 -5
View File
@@ -100,7 +100,7 @@ class MeilisearchFilesParseCommand extends Command
$normalized = strtok($normalized, '#'); $normalized = strtok($normalized, '#');
// ------------------------------------------------- // -------------------------------------------------
// 3) URL-Decoding (Umlaute, Leerzeichen) // 3) URL-Decoding
// ------------------------------------------------- // -------------------------------------------------
$normalized = rawurldecode($normalized); $normalized = rawurldecode($normalized);
@@ -140,17 +140,36 @@ class MeilisearchFilesParseCommand extends Command
} }
// ------------------------------------------------- // -------------------------------------------------
// 6) Tika-Parsing // 6) Content-Type anhand Dateiendung
// -------------------------------------------------
$ext = strtolower(pathinfo($normalized, PATHINFO_EXTENSION));
$mimeType = match ($ext) {
'pdf' => 'application/pdf',
'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
default => null,
};
if ($mimeType === null) {
$this->log('Unsupported file type, skip', ['url' => $normalized]);
continue;
}
// -------------------------------------------------
// 7) Tika-Parsing
// ------------------------------------------------- // -------------------------------------------------
try { try {
$this->log('Parsing file', ['url' => $normalized]); $this->log('Parsing file', ['url' => $normalized]);
$response = $client->request( $response = $client->request(
'PUT', 'PUT',
$tikaUrl . '/tika', $tikaUrl . '/tika/main',
[ [
'headers' => [ 'headers' => [
'Accept' => 'text/plain', 'Accept' => 'text/plain',
'Content-Type' => $mimeType,
], ],
'body' => fopen($absolutePath, 'rb'), 'body' => fopen($absolutePath, 'rb'),
] ]
@@ -172,7 +191,7 @@ class MeilisearchFilesParseCommand extends Command
$this->log('File parsed', [ $this->log('File parsed', [
'url' => $normalized, 'url' => $normalized,
'chars' => strlen($text), 'chars' => mb_strlen($text),
]); ]);
} catch (\Throwable $e) { } catch (\Throwable $e) {