add Parse Command
This commit is contained in:
@@ -100,7 +100,7 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
$normalized = strtok($normalized, '#');
|
$normalized = strtok($normalized, '#');
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// 3) URL-Decoding (Umlaute, Leerzeichen)
|
// 3) URL-Decoding
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
$normalized = rawurldecode($normalized);
|
$normalized = rawurldecode($normalized);
|
||||||
|
|
||||||
@@ -140,17 +140,36 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// 6) Tika-Parsing
|
// 6) Content-Type anhand Dateiendung
|
||||||
|
// -------------------------------------------------
|
||||||
|
$ext = strtolower(pathinfo($normalized, PATHINFO_EXTENSION));
|
||||||
|
|
||||||
|
$mimeType = match ($ext) {
|
||||||
|
'pdf' => 'application/pdf',
|
||||||
|
'docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||||
|
'xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||||
|
'pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||||
|
default => null,
|
||||||
|
};
|
||||||
|
|
||||||
|
if ($mimeType === null) {
|
||||||
|
$this->log('Unsupported file type, skip', ['url' => $normalized]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------
|
||||||
|
// 7) Tika-Parsing
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
try {
|
try {
|
||||||
$this->log('Parsing file', ['url' => $normalized]);
|
$this->log('Parsing file', ['url' => $normalized]);
|
||||||
|
|
||||||
$response = $client->request(
|
$response = $client->request(
|
||||||
'PUT',
|
'PUT',
|
||||||
$tikaUrl . '/tika',
|
$tikaUrl . '/tika/main',
|
||||||
[
|
[
|
||||||
'headers' => [
|
'headers' => [
|
||||||
'Accept' => 'text/plain',
|
'Accept' => 'text/plain',
|
||||||
|
'Content-Type' => $mimeType,
|
||||||
],
|
],
|
||||||
'body' => fopen($absolutePath, 'rb'),
|
'body' => fopen($absolutePath, 'rb'),
|
||||||
]
|
]
|
||||||
@@ -172,7 +191,7 @@ class MeilisearchFilesParseCommand extends Command
|
|||||||
|
|
||||||
$this->log('File parsed', [
|
$this->log('File parsed', [
|
||||||
'url' => $normalized,
|
'url' => $normalized,
|
||||||
'chars' => strlen($text),
|
'chars' => mb_strlen($text),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
} catch (\Throwable $e) {
|
} catch (\Throwable $e) {
|
||||||
|
|||||||
Reference in New Issue
Block a user