Bugfix
This commit is contained in:
@@ -20,72 +20,64 @@ class MeilisearchImageHelper
|
|||||||
*/
|
*/
|
||||||
public function resolveImagePath(?string $uuid): ?string
|
public function resolveImagePath(?string $uuid): ?string
|
||||||
{
|
{
|
||||||
error_log('--- MeiliImg START ---');
|
|
||||||
|
|
||||||
if (!$uuid) {
|
if (!$uuid) {
|
||||||
error_log('[MeiliImg] UUID leer → return null');
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
error_log('[MeiliImg] UUID = ' . $uuid);
|
|
||||||
|
|
||||||
// Contao-Framework initialisieren (CLI & Frontend)
|
// Contao-Framework initialisieren (CLI & Frontend)
|
||||||
|
try {
|
||||||
$this->framework->initialize();
|
$this->framework->initialize();
|
||||||
error_log('[MeiliImg] Framework initialized');
|
} catch (\Throwable $e) {
|
||||||
|
error_log('[ContaoMeilisearch] ImageHelper: Framework init failed: ' . $e->getMessage());
|
||||||
/** @var FilesModel|null $file */
|
|
||||||
$file = FilesModel::findByUuid($uuid);
|
|
||||||
|
|
||||||
if (!$file) {
|
|
||||||
error_log('[MeiliImg] FilesModel::findByUuid() = NULL');
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
error_log('[MeiliImg] FilesModel gefunden');
|
/** @var FilesModel|null $file */
|
||||||
error_log('[MeiliImg] file->path = ' . $file->path);
|
try {
|
||||||
error_log('[MeiliImg] file->uuid = ' . ($file->uuid ?? '(n/a)'));
|
$file = FilesModel::findByUuid($uuid);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
error_log(
|
||||||
|
'[ContaoMeilisearch] ImageHelper: FilesModel lookup failed (' . $uuid . '): ' . $e->getMessage()
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$file) {
|
||||||
|
error_log('[ContaoMeilisearch] ImageHelper: File not found for UUID ' . $uuid);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
// ImageSize aus tl_settings
|
// ImageSize aus tl_settings
|
||||||
$rawSize = Config::get('meilisearch_imagesize');
|
$imageSizeId = (int) Config::get('meilisearch_imagesize');
|
||||||
$imageSizeId = (int) $rawSize;
|
|
||||||
|
|
||||||
error_log('[MeiliImg] meilisearch_imagesize raw = ' . var_export($rawSize, true));
|
|
||||||
error_log('[MeiliImg] meilisearch_imagesize int = ' . $imageSizeId);
|
|
||||||
|
|
||||||
// Fallback: Originaldatei
|
// Fallback: Originaldatei
|
||||||
if ($imageSizeId <= 0) {
|
if ($imageSizeId <= 0) {
|
||||||
error_log('[MeiliImg] imageSizeId <= 0 → FALLBACK file->path = ' . $file->path);
|
|
||||||
error_log('--- MeiliImg END ---');
|
|
||||||
return $file->path;
|
return $file->path;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
$builder = $this->studio
|
$figure = $this->studio
|
||||||
->createFigureBuilder()
|
->createFigureBuilder()
|
||||||
->from($file->path)
|
->from($file->path)
|
||||||
->setSize($imageSizeId);
|
->setSize($imageSizeId)
|
||||||
|
->build();
|
||||||
error_log('[MeiliImg] FigureBuilder erstellt (from=' . $file->path . ', size=' . $imageSizeId . ')');
|
|
||||||
|
|
||||||
$figure = $builder->build();
|
|
||||||
error_log('[MeiliImg] Figure build() OK');
|
|
||||||
|
|
||||||
$image = $figure->getImage();
|
$image = $figure->getImage();
|
||||||
|
|
||||||
if ($image === null) {
|
if ($image === null) {
|
||||||
error_log('[MeiliImg] figure->getImage() = NULL');
|
error_log(
|
||||||
|
'[ContaoMeilisearch] ImageHelper: Image generation failed for ' . $file->path
|
||||||
|
);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
$src = $image->getImageSrc();
|
return $image->getImageSrc() ?: null;
|
||||||
|
|
||||||
error_log('[MeiliImg] image->getImageSrc() = ' . $src);
|
|
||||||
|
|
||||||
return $src ?: null;
|
|
||||||
|
|
||||||
} catch (\Throwable $e) {
|
} catch (\Throwable $e) {
|
||||||
error_log('[MeiliImg] EXCEPTION ' . get_class($e) . ': ' . $e->getMessage());
|
error_log(
|
||||||
error_log('--- MeiliImg END ---');
|
'[ContaoMeilisearch] ImageHelper: Image processing failed for '
|
||||||
|
. $file->path . ': ' . $e->getMessage()
|
||||||
|
);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,26 +34,48 @@ class MeilisearchIndexService
|
|||||||
*/
|
*/
|
||||||
public function run(): void
|
public function run(): void
|
||||||
{
|
{
|
||||||
|
try {
|
||||||
$this->framework->initialize();
|
$this->framework->initialize();
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
error_log('[ContaoMeilisearch] Framework initialization failed: ' . $e->getMessage());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
$host = (string) Config::get('meilisearch_host');
|
$host = (string) Config::get('meilisearch_host');
|
||||||
$apiKey = (string) Config::get('meilisearch_api_write');
|
$apiKey = (string) Config::get('meilisearch_api_write');
|
||||||
$this->indexName = (string) Config::get('meilisearch_index');
|
$this->indexName = (string) Config::get('meilisearch_index');
|
||||||
|
|
||||||
if ($host === '' || $this->indexName === '') {
|
if ($host === '' || $this->indexName === '') {
|
||||||
throw new \RuntimeException('Meilisearch is not configured in tl_settings.');
|
error_log('[ContaoMeilisearch] Meilisearch is not configured in tl_settings.');
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
$this->client = new Client($host, $apiKey);
|
$this->client = new Client($host, $apiKey);
|
||||||
$index = $this->client->index($this->indexName);
|
$index = $this->client->index($this->indexName);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
error_log('[ContaoMeilisearch] Failed to connect to Meilisearch: ' . $e->getMessage());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
$index->updateSettings(['primaryKey' => 'id']);
|
$index->updateSettings(['primaryKey' => 'id']);
|
||||||
} catch (\Throwable) {}
|
} catch (\Throwable $e) {
|
||||||
|
error_log('[ContaoMeilisearch] Failed to set primaryKey: ' . $e->getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
$this->ensureIndexSettings($index);
|
$this->ensureIndexSettings($index);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
error_log('[ContaoMeilisearch] Failed to update index settings: ' . $e->getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
$index->deleteAllDocuments();
|
$index->deleteAllDocuments();
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
error_log('[ContaoMeilisearch] Failed to delete documents: ' . $e->getMessage());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
$this->indexTlSearch($index);
|
$this->indexTlSearch($index);
|
||||||
$this->indexTlSearchPdf($index);
|
$this->indexTlSearchPdf($index);
|
||||||
@@ -78,7 +100,6 @@ class MeilisearchIndexService
|
|||||||
$text
|
$text
|
||||||
);
|
);
|
||||||
|
|
||||||
// Text normalisieren
|
|
||||||
$text = preg_replace('/\s{2,}/u', ' ', $text);
|
$text = preg_replace('/\s{2,}/u', ' ', $text);
|
||||||
$text = preg_replace('/\n{2,}/u', "\n", $text);
|
$text = preg_replace('/\n{2,}/u', "\n", $text);
|
||||||
|
|
||||||
@@ -121,7 +142,13 @@ class MeilisearchIndexService
|
|||||||
*/
|
*/
|
||||||
private function indexTlSearch(Indexes $index): void
|
private function indexTlSearch(Indexes $index): void
|
||||||
{
|
{
|
||||||
|
try {
|
||||||
$rows = $this->connection->fetchAllAssociative('SELECT * FROM tl_search');
|
$rows = $this->connection->fetchAllAssociative('SELECT * FROM tl_search');
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
error_log('[ContaoMeilisearch] Failed to read tl_search: ' . $e->getMessage());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!$rows) {
|
if (!$rows) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -132,6 +159,7 @@ class MeilisearchIndexService
|
|||||||
$documents = [];
|
$documents = [];
|
||||||
|
|
||||||
foreach ($rows as $row) {
|
foreach ($rows as $row) {
|
||||||
|
try {
|
||||||
$type = $this->detectTypeFromMeta($row['meta'] ?? null);
|
$type = $this->detectTypeFromMeta($row['meta'] ?? null);
|
||||||
|
|
||||||
$eventStart = null;
|
$eventStart = null;
|
||||||
@@ -168,10 +196,21 @@ class MeilisearchIndexService
|
|||||||
}
|
}
|
||||||
|
|
||||||
$documents[] = $doc;
|
$documents[] = $doc;
|
||||||
|
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
error_log(
|
||||||
|
'[ContaoMeilisearch] Failed to build document for tl_search ID '
|
||||||
|
. ($row['id'] ?? '?') . ': ' . $e->getMessage()
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($documents !== []) {
|
if ($documents !== []) {
|
||||||
|
try {
|
||||||
$index->addDocuments($documents);
|
$index->addDocuments($documents);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
error_log('[ContaoMeilisearch] Failed to add tl_search documents: ' . $e->getMessage());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -180,7 +219,13 @@ class MeilisearchIndexService
|
|||||||
*/
|
*/
|
||||||
private function indexTlSearchPdf(Indexes $index): void
|
private function indexTlSearchPdf(Indexes $index): void
|
||||||
{
|
{
|
||||||
|
try {
|
||||||
$rows = $this->connection->fetchAllAssociative('SELECT * FROM tl_search_pdf');
|
$rows = $this->connection->fetchAllAssociative('SELECT * FROM tl_search_pdf');
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
error_log('[ContaoMeilisearch] Failed to read tl_search_pdf: ' . $e->getMessage());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!$rows) {
|
if (!$rows) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -188,6 +233,7 @@ class MeilisearchIndexService
|
|||||||
$documents = [];
|
$documents = [];
|
||||||
|
|
||||||
foreach ($rows as $row) {
|
foreach ($rows as $row) {
|
||||||
|
try {
|
||||||
$fileType = in_array($row['type'], ['pdf', 'docx', 'xlsx', 'pptx'], true)
|
$fileType = in_array($row['type'], ['pdf', 'docx', 'xlsx', 'pptx'], true)
|
||||||
? $row['type']
|
? $row['type']
|
||||||
: 'pdf';
|
: 'pdf';
|
||||||
@@ -202,9 +248,22 @@ class MeilisearchIndexService
|
|||||||
'poster' => self::FILETYPE_ICON_MAP[$fileType]
|
'poster' => self::FILETYPE_ICON_MAP[$fileType]
|
||||||
?? self::FILETYPE_ICON_MAP['pdf'],
|
?? self::FILETYPE_ICON_MAP['pdf'],
|
||||||
];
|
];
|
||||||
|
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
error_log(
|
||||||
|
'[ContaoMeilisearch] Failed to build PDF document for ID '
|
||||||
|
. ($row['id'] ?? '?') . ': ' . $e->getMessage()
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($documents !== []) {
|
||||||
|
try {
|
||||||
$index->addDocuments($documents);
|
$index->addDocuments($documents);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
error_log('[ContaoMeilisearch] Failed to add tl_search_pdf documents: ' . $e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private function detectTypeFromMeta(?string $meta): string
|
private function detectTypeFromMeta(?string $meta): string
|
||||||
|
|||||||
@@ -34,19 +34,15 @@ class OfficeIndexService
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
error_log('bearbeite Office-Datei: ' . $url);
|
|
||||||
|
|
||||||
// innerhalb des Crawls gleiche URL nicht mehrfach parsen
|
// innerhalb des Crawls gleiche URL nicht mehrfach parsen
|
||||||
$seenKey = md5($url);
|
$seenKey = md5($url);
|
||||||
if (isset($this->seenThisCrawl[$seenKey])) {
|
if (isset($this->seenThisCrawl[$seenKey])) {
|
||||||
error_log('→ übersprungen: bereits im Crawl verarbeitet');
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$this->seenThisCrawl[$seenKey] = true;
|
$this->seenThisCrawl[$seenKey] = true;
|
||||||
|
|
||||||
$normalized = $this->normalizeOfficeUrl($url);
|
$normalized = $this->normalizeOfficeUrl($url);
|
||||||
if ($normalized === null) {
|
if ($normalized === null) {
|
||||||
error_log('→ übersprungen: kein gültiger Office-Pfad');
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -54,7 +50,6 @@ class OfficeIndexService
|
|||||||
|
|
||||||
$absolutePath = $this->getAbsolutePath($relativePath);
|
$absolutePath = $this->getAbsolutePath($relativePath);
|
||||||
if (!is_file($absolutePath)) {
|
if (!is_file($absolutePath)) {
|
||||||
error_log('→ übersprungen: Datei existiert nicht: ' . $absolutePath);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -65,7 +60,6 @@ class OfficeIndexService
|
|||||||
|
|
||||||
$text = $this->parseOfficeFile($absolutePath, $type);
|
$text = $this->parseOfficeFile($absolutePath, $type);
|
||||||
if ($text === '') {
|
if ($text === '') {
|
||||||
error_log('→ übersprungen: Office-Datei ohne Textinhalt');
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,10 +72,10 @@ class OfficeIndexService
|
|||||||
$type
|
$type
|
||||||
);
|
);
|
||||||
|
|
||||||
error_log('geschrieben in tl_search_pdf');
|
|
||||||
|
|
||||||
} catch (\Throwable $e) {
|
} catch (\Throwable $e) {
|
||||||
error_log('Office Service FEHLER: ' . $e->getMessage());
|
error_log(
|
||||||
|
'[ContaoMeilisearch] Office indexing failed for "' . $url . '": ' . $e->getMessage()
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -107,11 +101,7 @@ class OfficeIndexService
|
|||||||
parse_str($parts['query'], $query);
|
parse_str($parts['query'], $query);
|
||||||
|
|
||||||
if (!empty($query['p'])) {
|
if (!empty($query['p'])) {
|
||||||
$p = (string) $query['p'];
|
$p = urldecode((string) $query['p']);
|
||||||
|
|
||||||
// Query-Parameter korrekt dekodieren
|
|
||||||
$p = urldecode($p);
|
|
||||||
|
|
||||||
$ext = strtolower(pathinfo($p, PATHINFO_EXTENSION));
|
$ext = strtolower(pathinfo($p, PATHINFO_EXTENSION));
|
||||||
|
|
||||||
if (in_array($ext, ['docx', 'xlsx', 'pptx'], true)) {
|
if (in_array($ext, ['docx', 'xlsx', 'pptx'], true)) {
|
||||||
@@ -136,9 +126,9 @@ class OfficeIndexService
|
|||||||
int $mtime,
|
int $mtime,
|
||||||
string $type
|
string $type
|
||||||
): void {
|
): void {
|
||||||
$db = Database::getInstance();
|
try {
|
||||||
|
Database::getInstance()
|
||||||
$db->prepare('
|
->prepare('
|
||||||
INSERT INTO tl_search_pdf
|
INSERT INTO tl_search_pdf
|
||||||
(tstamp, type, url, title, text, checksum, file_mtime)
|
(tstamp, type, url, title, text, checksum, file_mtime)
|
||||||
VALUES
|
VALUES
|
||||||
@@ -150,7 +140,8 @@ class OfficeIndexService
|
|||||||
title=VALUES(title),
|
title=VALUES(title),
|
||||||
text=VALUES(text),
|
text=VALUES(text),
|
||||||
file_mtime=VALUES(file_mtime)
|
file_mtime=VALUES(file_mtime)
|
||||||
')->execute(
|
')
|
||||||
|
->execute(
|
||||||
time(),
|
time(),
|
||||||
$type,
|
$type,
|
||||||
$url,
|
$url,
|
||||||
@@ -159,6 +150,11 @@ class OfficeIndexService
|
|||||||
$checksum,
|
$checksum,
|
||||||
$mtime
|
$mtime
|
||||||
);
|
);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
error_log(
|
||||||
|
'[ContaoMeilisearch] Failed to write Office index entry (' . $url . '): ' . $e->getMessage()
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private function parseOfficeFile(string $absolutePath, string $type): string
|
private function parseOfficeFile(string $absolutePath, string $type): string
|
||||||
@@ -186,8 +182,10 @@ class OfficeIndexService
|
|||||||
}
|
}
|
||||||
|
|
||||||
return $this->cleanText($text);
|
return $this->cleanText($text);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
} catch (\Throwable) {
|
error_log(
|
||||||
|
'[ContaoMeilisearch] Failed to parse DOCX "' . $absolutePath . '": ' . $e->getMessage()
|
||||||
|
);
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -205,8 +203,10 @@ class OfficeIndexService
|
|||||||
}
|
}
|
||||||
|
|
||||||
return $this->cleanText($text);
|
return $this->cleanText($text);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
} catch (\Throwable) {
|
error_log(
|
||||||
|
'[ContaoMeilisearch] Failed to parse XLSX "' . $absolutePath . '": ' . $e->getMessage()
|
||||||
|
);
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -226,8 +226,10 @@ class OfficeIndexService
|
|||||||
}
|
}
|
||||||
|
|
||||||
return $this->cleanText($text);
|
return $this->cleanText($text);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
} catch (\Throwable) {
|
error_log(
|
||||||
|
'[ContaoMeilisearch] Failed to parse PPTX "' . $absolutePath . '": ' . $e->getMessage()
|
||||||
|
);
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,7 +23,6 @@ class PdfIndexService
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Wird aus dem Listener beim ersten Hook-Call pro Crawl aufgerufen.
|
* Wird aus dem Listener beim ersten Hook-Call pro Crawl aufgerufen.
|
||||||
* MUSS IMMER laufen (auch wenn Checkbox später aus ist).
|
|
||||||
*/
|
*/
|
||||||
public function resetTableOnce(): void
|
public function resetTableOnce(): void
|
||||||
{
|
{
|
||||||
@@ -34,10 +33,11 @@ class PdfIndexService
|
|||||||
$this->didReset = true;
|
$this->didReset = true;
|
||||||
$this->seenThisCrawl = [];
|
$this->seenThisCrawl = [];
|
||||||
|
|
||||||
// bei <=100 PDFs: sauber & simpel
|
try {
|
||||||
Database::getInstance()->execute('TRUNCATE tl_search_pdf');
|
Database::getInstance()->execute('TRUNCATE tl_search_pdf');
|
||||||
|
} catch (\Throwable $e) {
|
||||||
error_log('PDF Reset: tl_search_pdf geleert (TRUNCATE)');
|
error_log('[ContaoMeilisearch] PDF reset failed: ' . $e->getMessage());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -54,25 +54,20 @@ class PdfIndexService
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
error_log('bearbeite PDF: ' . $url);
|
// innerhalb des Crawls gleiche URL nicht mehrfach parsen
|
||||||
|
|
||||||
// innerhalb des Crawls gleiche URL nicht 20x parsen (News-Teaser etc.)
|
|
||||||
$seenKey = md5($url);
|
$seenKey = md5($url);
|
||||||
if (isset($this->seenThisCrawl[$seenKey])) {
|
if (isset($this->seenThisCrawl[$seenKey])) {
|
||||||
error_log('→ übersprungen: bereits im Crawl verarbeitet');
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$this->seenThisCrawl[$seenKey] = true;
|
$this->seenThisCrawl[$seenKey] = true;
|
||||||
|
|
||||||
$normalizedPath = $this->normalizePdfUrl($url);
|
$normalizedPath = $this->normalizePdfUrl($url);
|
||||||
if ($normalizedPath === null) {
|
if ($normalizedPath === null) {
|
||||||
error_log('→ übersprungen: kein gültiger PDF-Pfad');
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$absolutePath = $this->getAbsolutePath($normalizedPath);
|
$absolutePath = $this->getAbsolutePath($normalizedPath);
|
||||||
if (!is_file($absolutePath)) {
|
if (!is_file($absolutePath)) {
|
||||||
error_log('→ übersprungen: Datei existiert nicht: ' . $absolutePath);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -88,7 +83,6 @@ class PdfIndexService
|
|||||||
|
|
||||||
$text = $this->parsePdf($absolutePath);
|
$text = $this->parsePdf($absolutePath);
|
||||||
if ($text === '') {
|
if ($text === '') {
|
||||||
error_log('→ übersprungen: PDF ohne Textinhalt');
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -100,10 +94,10 @@ class PdfIndexService
|
|||||||
$mtime
|
$mtime
|
||||||
);
|
);
|
||||||
|
|
||||||
error_log('geschrieben in tl_search_pdf');
|
|
||||||
|
|
||||||
} catch (\Throwable $e) {
|
} catch (\Throwable $e) {
|
||||||
error_log('PDF Service FEHLER: ' . $e->getMessage());
|
error_log(
|
||||||
|
'[ContaoMeilisearch] PDF indexing failed for "' . $url . '": ' . $e->getMessage()
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -118,8 +112,12 @@ class PdfIndexService
|
|||||||
$decoded = html_entity_decode($url);
|
$decoded = html_entity_decode($url);
|
||||||
$parts = parse_url($decoded);
|
$parts = parse_url($decoded);
|
||||||
|
|
||||||
// Fall 2: absolute URL auf gleiche Site -> Pfad extrahieren
|
// Fall 2: absolute URL auf gleiche Site
|
||||||
if (!empty($parts['path']) && str_starts_with($parts['path'], '/files/') && str_ends_with(strtolower($parts['path']), '.pdf')) {
|
if (
|
||||||
|
!empty($parts['path'])
|
||||||
|
&& str_starts_with($parts['path'], '/files/')
|
||||||
|
&& str_ends_with(strtolower($parts['path']), '.pdf')
|
||||||
|
) {
|
||||||
return $parts['path'];
|
return $parts['path'];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -131,13 +129,7 @@ class PdfIndexService
|
|||||||
parse_str($parts['query'], $query);
|
parse_str($parts['query'], $query);
|
||||||
|
|
||||||
if (!empty($query['p'])) {
|
if (!empty($query['p'])) {
|
||||||
$p = (string) $query['p'];
|
$p = urldecode((string) $query['p']);
|
||||||
|
|
||||||
// Query-Parameter korrekt dekodieren
|
|
||||||
$p = urldecode($p);
|
|
||||||
|
|
||||||
// deine Links enthalten oft "pdf/DATEI.pdf"
|
|
||||||
// => wird zu "/files/pdf/DATEI.pdf"
|
|
||||||
return '/files/' . ltrim($p, '/');
|
return '/files/' . ltrim($p, '/');
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -151,10 +143,9 @@ class PdfIndexService
|
|||||||
|
|
||||||
private function upsertPdf(string $url, string $title, string $text, string $checksum, int $mtime): void
|
private function upsertPdf(string $url, string $title, string $text, string $checksum, int $mtime): void
|
||||||
{
|
{
|
||||||
$db = Database::getInstance();
|
try {
|
||||||
|
Database::getInstance()
|
||||||
// wichtig: UNIQUE(checksum) -> entweder INSERT oder UPDATE
|
->prepare('
|
||||||
$db->prepare('
|
|
||||||
INSERT INTO tl_search_pdf
|
INSERT INTO tl_search_pdf
|
||||||
(tstamp, url, title, text, checksum, file_mtime)
|
(tstamp, url, title, text, checksum, file_mtime)
|
||||||
VALUES
|
VALUES
|
||||||
@@ -165,7 +156,8 @@ class PdfIndexService
|
|||||||
title=VALUES(title),
|
title=VALUES(title),
|
||||||
text=VALUES(text),
|
text=VALUES(text),
|
||||||
file_mtime=VALUES(file_mtime)
|
file_mtime=VALUES(file_mtime)
|
||||||
')->execute(
|
')
|
||||||
|
->execute(
|
||||||
time(),
|
time(),
|
||||||
$url,
|
$url,
|
||||||
$title,
|
$title,
|
||||||
@@ -173,6 +165,11 @@ class PdfIndexService
|
|||||||
$checksum,
|
$checksum,
|
||||||
$mtime
|
$mtime
|
||||||
);
|
);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
error_log(
|
||||||
|
'[ContaoMeilisearch] Failed to write PDF index entry (' . $url . '): ' . $e->getMessage()
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private function parsePdf(string $absolutePath): string
|
private function parsePdf(string $absolutePath): string
|
||||||
@@ -184,8 +181,10 @@ class PdfIndexService
|
|||||||
$text = $this->cleanPdfContent($pdf->getText());
|
$text = $this->cleanPdfContent($pdf->getText());
|
||||||
|
|
||||||
return mb_substr($text, 0, 20000);
|
return mb_substr($text, 0, 20000);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
} catch (\Throwable) {
|
error_log(
|
||||||
|
'[ContaoMeilisearch] Failed to parse PDF "' . $absolutePath . '": ' . $e->getMessage()
|
||||||
|
);
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -221,8 +220,10 @@ class PdfIndexService
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (\Throwable) {
|
} catch (\Throwable $e) {
|
||||||
// ignore
|
error_log(
|
||||||
|
'[ContaoMeilisearch] Failed to read PDF metadata "' . $absolutePath . '": ' . $e->getMessage()
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
|||||||
Reference in New Issue
Block a user