new Twig
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
namespace MummertMedia\ContaoMeilisearchBundle\Command;
|
||||
|
||||
use Contao\CoreBundle\Framework\ContaoFramework;
|
||||
use Doctrine\DBAL\Connection;
|
||||
use Contao\Database;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
@@ -13,7 +13,6 @@ class MeilisearchFilesCleanupCommand extends Command
|
||||
{
|
||||
public function __construct(
|
||||
private readonly ContaoFramework $framework,
|
||||
private readonly Connection $connection,
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
@@ -22,13 +21,13 @@ class MeilisearchFilesCleanupCommand extends Command
|
||||
{
|
||||
$this
|
||||
->setName('meilisearch:files:cleanup')
|
||||
->setDescription('Remove stale indexed files from tl_search_files')
|
||||
->setDescription('Remove stale indexed files (PDF, DOCX, XLSX, PPTX) from tl_search_pdf')
|
||||
->addOption(
|
||||
'grace',
|
||||
null,
|
||||
InputOption::VALUE_OPTIONAL,
|
||||
'Grace period in seconds (files newer than now-grace are kept)',
|
||||
86400
|
||||
86400 // 24 Stunden
|
||||
)
|
||||
->addOption(
|
||||
'dry-run',
|
||||
@@ -50,10 +49,10 @@ class MeilisearchFilesCleanupCommand extends Command
|
||||
$cutoff = time() - $grace;
|
||||
|
||||
if ($dryRun) {
|
||||
$count = $this->connection->fetchOne(
|
||||
'SELECT COUNT(*) FROM tl_search_files WHERE last_seen < ?',
|
||||
[$cutoff]
|
||||
);
|
||||
$count = Database::getInstance()
|
||||
->prepare('SELECT COUNT(*) AS cnt FROM tl_search_pdf WHERE last_seen < ?')
|
||||
->execute($cutoff)
|
||||
->cnt;
|
||||
|
||||
$message = sprintf(
|
||||
'[DRY-RUN] %d stale file(s) would be removed (last_seen < %s)',
|
||||
@@ -64,14 +63,14 @@ class MeilisearchFilesCleanupCommand extends Command
|
||||
$output->writeln('<comment>' . $message . '</comment>');
|
||||
$this->log($message);
|
||||
|
||||
$this->log('Cleaner stopped (dry-run)');
|
||||
$this->log('Cleaner successfully stopped');
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
$affected = $this->connection->executeStatement(
|
||||
'DELETE FROM tl_search_files WHERE last_seen < ?',
|
||||
[$cutoff]
|
||||
);
|
||||
$affected = Database::getInstance()
|
||||
->prepare('DELETE FROM tl_search_pdf WHERE last_seen < ?')
|
||||
->execute($cutoff)
|
||||
->affectedRows;
|
||||
|
||||
$message = sprintf(
|
||||
'Removed %d stale file(s) (last_seen < %s)',
|
||||
@@ -93,8 +92,15 @@ class MeilisearchFilesCleanupCommand extends Command
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Einheitliches Logging mit Zeitstempel
|
||||
*/
|
||||
private function log(string $message): void
|
||||
{
|
||||
error_log(sprintf('[%s] %s', date('Y-m-d H:i:s'), $message));
|
||||
error_log(sprintf(
|
||||
'[%s] %s',
|
||||
date('Y-m-d H:i:s'),
|
||||
$message
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -3,13 +3,15 @@
|
||||
namespace MummertMedia\ContaoMeilisearchBundle\EventListener;
|
||||
|
||||
use Contao\Config;
|
||||
use Contao\System;
|
||||
use MummertMedia\ContaoMeilisearchBundle\Service\PdfIndexService;
|
||||
use MummertMedia\ContaoMeilisearchBundle\Service\OfficeIndexService;
|
||||
|
||||
class IndexPageListener
|
||||
{
|
||||
public function __construct()
|
||||
{
|
||||
}
|
||||
public function __construct(
|
||||
private readonly PdfIndexService $pdfIndexService,
|
||||
private readonly OfficeIndexService $officeIndexService,
|
||||
) {}
|
||||
|
||||
private function debug(string $message, array $context = []): void
|
||||
{
|
||||
@@ -101,6 +103,7 @@ class IndexPageListener
|
||||
$this->debug('Meta: searchimage candidate', ['searchimage' => $searchImage]);
|
||||
|
||||
if (!empty($searchImage)) {
|
||||
// >>> HINWEIS: falls dein tl_search-Feld "image" heißt, hier auf $set['image'] ändern!
|
||||
$set['imagepath'] = trim((string) $searchImage);
|
||||
}
|
||||
|
||||
@@ -136,12 +139,20 @@ class IndexPageListener
|
||||
'class' => $e::class,
|
||||
]);
|
||||
}
|
||||
|
||||
$this->debug('Meta: final set snapshot', [
|
||||
'priority' => $set['priority'] ?? null,
|
||||
'keywords' => $set['keywords'] ?? null,
|
||||
'imagepath' => $set['imagepath'] ?? null,
|
||||
'startDate' => $set['startDate'] ?? null,
|
||||
'checksum' => $set['checksum'] ?? null,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* =====================
|
||||
* DATEI-ERKENNUNG + UPSERT
|
||||
* DATEI-INDEXIERUNG (PDF / OFFICE)
|
||||
* =====================
|
||||
*/
|
||||
if ((int) ($data['protected'] ?? 0) !== 0) {
|
||||
@@ -149,13 +160,15 @@ class IndexPageListener
|
||||
return;
|
||||
}
|
||||
|
||||
$indexFiles = (bool) Config::get('meilisearch_index_files');
|
||||
$indexPdfs = (bool) Config::get('meilisearch_index_pdfs');
|
||||
$indexOffice = (bool) Config::get('meilisearch_index_office');
|
||||
|
||||
$this->debug('File indexing setting', [
|
||||
'meilisearch_index_files' => $indexFiles,
|
||||
$this->debug('File indexing settings', [
|
||||
'meilisearch_index_pdfs' => $indexPdfs,
|
||||
'meilisearch_index_office' => $indexOffice,
|
||||
]);
|
||||
|
||||
if (!$indexFiles) {
|
||||
if (!$indexPdfs && !$indexOffice) {
|
||||
$this->debug('Abort: file indexing disabled');
|
||||
return;
|
||||
}
|
||||
@@ -163,85 +176,61 @@ class IndexPageListener
|
||||
$links = $this->findAllLinks($content);
|
||||
$this->debug('Links found', ['count' => count($links)]);
|
||||
|
||||
$fileLinks = [];
|
||||
$pdfLinks = [];
|
||||
$officeLinks = [];
|
||||
|
||||
foreach ($links as $link) {
|
||||
$type = $this->detectIndexableFileType($link['url']);
|
||||
if ($type !== null) {
|
||||
$fileLinks[] = $link + ['type' => $type];
|
||||
|
||||
if ($type === 'pdf' && $indexPdfs) {
|
||||
$pdfLinks[] = $link;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (in_array($type, ['docx', 'xlsx', 'pptx'], true) && $indexOffice) {
|
||||
$officeLinks[] = $link;
|
||||
}
|
||||
}
|
||||
|
||||
$this->debug('Indexable file links found', [
|
||||
'count' => count($fileLinks),
|
||||
'types' => array_count_values(array_column($fileLinks, 'type')),
|
||||
$this->debug('Indexable file links', [
|
||||
'pdf' => count($pdfLinks),
|
||||
'office' => count($officeLinks),
|
||||
]);
|
||||
|
||||
if ($fileLinks) {
|
||||
$db = System::getContainer()->get('database_connection');
|
||||
$time = time();
|
||||
|
||||
foreach ($fileLinks as $file) {
|
||||
$url = strtok($file['url'], '#');
|
||||
|
||||
$path = parse_url($url, PHP_URL_PATH);
|
||||
$abs = $path ? TL_ROOT . '/' . ltrim($path, '/') : null;
|
||||
|
||||
$mtime = ($abs && is_file($abs)) ? filemtime($abs) : 0;
|
||||
$checksum = md5($url . '|' . $mtime);
|
||||
|
||||
$existing = $db->fetchAssociative(
|
||||
'SELECT id, checksum FROM tl_search_files WHERE url = ?',
|
||||
[$url]
|
||||
);
|
||||
|
||||
if ($existing) {
|
||||
$db->update(
|
||||
'tl_search_files',
|
||||
[
|
||||
'tstamp' => $time,
|
||||
'last_seen' => $time,
|
||||
'page_id' => (int) ($data['pid'] ?? 0),
|
||||
'file_mtime' => $mtime,
|
||||
'checksum' => $checksum,
|
||||
],
|
||||
['id' => $existing['id']]
|
||||
);
|
||||
|
||||
$this->debug('File updated', [
|
||||
'url' => $url,
|
||||
'checksum' => $checksum,
|
||||
]);
|
||||
} else {
|
||||
$db->insert(
|
||||
'tl_search_files',
|
||||
[
|
||||
'tstamp' => $time,
|
||||
'last_seen' => $time,
|
||||
'type' => $file['type'],
|
||||
'url' => $url,
|
||||
'title' => $file['linkText'] ?? basename($url),
|
||||
'page_id' => (int) ($data['pid'] ?? 0),
|
||||
'file_mtime' => $mtime,
|
||||
'checksum' => $checksum,
|
||||
]
|
||||
);
|
||||
|
||||
$this->debug('File inserted', [
|
||||
'url' => $url,
|
||||
'checksum' => $checksum,
|
||||
]);
|
||||
try {
|
||||
if ($pdfLinks !== []) {
|
||||
$this->debug('PDF handlePdfLinks(): call', ['count' => count($pdfLinks)]);
|
||||
$this->pdfIndexService->handlePdfLinks($pdfLinks);
|
||||
$this->debug('PDF handlePdfLinks(): ok');
|
||||
}
|
||||
|
||||
if ($officeLinks !== []) {
|
||||
$this->debug('Office handleOfficeLinks(): call', ['count' => count($officeLinks)]);
|
||||
$this->officeIndexService->handleOfficeLinks($officeLinks);
|
||||
$this->debug('Office handleOfficeLinks(): ok');
|
||||
}
|
||||
} catch (\Throwable $e) {
|
||||
$this->debug('File indexing failed', [
|
||||
'error' => $e->getMessage(),
|
||||
'class' => $e::class,
|
||||
]);
|
||||
}
|
||||
|
||||
$this->debug('Hook end', [
|
||||
'final_set_keys' => array_keys($set),
|
||||
'final_set' => [
|
||||
'priority' => $set['priority'] ?? null,
|
||||
'keywords' => $set['keywords'] ?? null,
|
||||
'imagepath' => $set['imagepath'] ?? null,
|
||||
'startDate' => $set['startDate'] ?? null,
|
||||
'checksum' => $set['checksum'] ?? null,
|
||||
],
|
||||
]);
|
||||
}
|
||||
|
||||
/* === Hilfsmethoden unverändert === */
|
||||
|
||||
/**
|
||||
* Extrahiert MEILISEARCH_JSON aus HTML-Kommentar
|
||||
*/
|
||||
private function extractMeilisearchJson(string $content): ?array
|
||||
{
|
||||
if (!preg_match('/<!--\s*MEILISEARCH_JSON\s*(\{.*?\})\s*-->/s', $content, $m)) {
|
||||
@@ -256,6 +245,9 @@ class IndexPageListener
|
||||
: null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sammle alle <a href="…"> Links
|
||||
*/
|
||||
private function findAllLinks(string $content): array
|
||||
{
|
||||
if (!preg_match_all(
|
||||
@@ -278,8 +270,12 @@ class IndexPageListener
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ermittelt indexierbaren Dateityp (pdf|docx|xlsx|pptx) oder null
|
||||
*/
|
||||
private function detectIndexableFileType(string $url): ?string
|
||||
{
|
||||
// Hash entfernen
|
||||
$url = strtok($url, '#');
|
||||
|
||||
$parts = parse_url($url);
|
||||
@@ -287,6 +283,7 @@ class IndexPageListener
|
||||
return null;
|
||||
}
|
||||
|
||||
// direkter Pfad (/files/…)
|
||||
if (!empty($parts['path'])) {
|
||||
$ext = strtolower(pathinfo($parts['path'], PATHINFO_EXTENSION));
|
||||
if (in_array($ext, ['pdf', 'docx', 'xlsx', 'pptx'], true)) {
|
||||
@@ -294,12 +291,18 @@ class IndexPageListener
|
||||
}
|
||||
}
|
||||
|
||||
// Query-Parameter (Contao 4 + 5)
|
||||
if (!empty($parts['query'])) {
|
||||
parse_str($parts['query'], $query);
|
||||
|
||||
foreach (['file', 'p', 'f'] as $param) {
|
||||
if (!empty($query[$param])) {
|
||||
$candidate = rawurldecode(html_entity_decode((string) $query[$param], ENT_QUOTES));
|
||||
$candidate = (string) $query[$param];
|
||||
|
||||
// sicher decodieren (Contao 4 + 5)
|
||||
$candidate = html_entity_decode($candidate, ENT_QUOTES);
|
||||
$candidate = rawurldecode($candidate);
|
||||
|
||||
$ext = strtolower(pathinfo($candidate, PATHINFO_EXTENSION));
|
||||
|
||||
if (in_array($ext, ['pdf', 'docx', 'xlsx', 'pptx'], true)) {
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
Psr\Container\ContainerInterface: '@service_container'
|
||||
|
||||
MummertMedia\ContaoMeilisearchBundle\:
|
||||
resource: '../../{Command,EventListener,Service}'
|
||||
resource: '../../{Command,Cron,EventListener,Service}'
|
||||
autowire: true
|
||||
autoconfigure: true
|
||||
|
||||
@@ -19,6 +19,12 @@ services:
|
||||
tags:
|
||||
- { name: contao.hook, hook: indexPage, method: onIndexPage }
|
||||
|
||||
MummertMedia\ContaoMeilisearchBundle\Cron\MeilisearchIndexCron:
|
||||
autowire: true
|
||||
autoconfigure: false
|
||||
tags:
|
||||
- { name: contao.cron, interval: daily, method: __invoke }
|
||||
|
||||
MummertMedia\ContaoMeilisearchBundle\Controller\FrontendModule\MeilisearchSearchController:
|
||||
autowire: true
|
||||
autoconfigure: false
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
use Contao\DC_Table;
|
||||
|
||||
$GLOBALS['TL_DCA']['tl_search_files'] = [
|
||||
$GLOBALS['TL_DCA']['tl_search_pdf'] = [
|
||||
'config' => [
|
||||
'dataContainer' => DC_Table::class,
|
||||
'sql' => [
|
||||
|
||||
@@ -4,11 +4,8 @@ use Contao\CoreBundle\DataContainer\PaletteManipulator;
|
||||
use Contao\System;
|
||||
|
||||
/**
|
||||
* -------------------------------------------------
|
||||
* Fields
|
||||
* -------------------------------------------------
|
||||
*/
|
||||
|
||||
$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_host'] = [
|
||||
'inputType' => 'text',
|
||||
'eval' => [
|
||||
@@ -62,9 +59,17 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_imagesize'] = [
|
||||
'chosen' => true,
|
||||
'includeBlankOption' => true,
|
||||
],
|
||||
// 🔥 DAS HAT GEFEHLT
|
||||
'sql' => "int(10) unsigned NOT NULL default 0",
|
||||
];
|
||||
|
||||
$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_past_events'] = [
|
||||
'inputType' => 'checkbox',
|
||||
'eval' => [
|
||||
'tl_class' => 'w50 clr',
|
||||
],
|
||||
];
|
||||
|
||||
$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_fallback_image'] = [
|
||||
'inputType' => 'fileTree',
|
||||
'eval' => [
|
||||
@@ -75,54 +80,25 @@ $GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_fallback_image'] = [
|
||||
'sql' => "varbinary(16) NULL",
|
||||
];
|
||||
|
||||
$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_past_events'] = [
|
||||
'inputType' => 'checkbox',
|
||||
'eval' => [
|
||||
'tl_class' => 'w50 clr',
|
||||
],
|
||||
];
|
||||
|
||||
/**
|
||||
* -------------------------------------------------
|
||||
* Datei-Indexierung (Tika)
|
||||
* -------------------------------------------------
|
||||
*/
|
||||
|
||||
$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_files'] = [
|
||||
$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_pdfs'] = [
|
||||
'label' => &$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_pdfs'],
|
||||
'inputType' => 'checkbox',
|
||||
'eval' => [
|
||||
'tl_class' => 'w50',
|
||||
'submitOnChange' => true,
|
||||
],
|
||||
'sql' => "char(1) NOT NULL default '1'",
|
||||
];
|
||||
|
||||
$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_index_office'] = [
|
||||
'label' => &$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_office'],
|
||||
'inputType' => 'checkbox',
|
||||
'eval' => ['tl_class' => 'w50'],
|
||||
'sql' => "char(1) NOT NULL default '0'",
|
||||
];
|
||||
|
||||
$GLOBALS['TL_DCA']['tl_settings']['fields']['meilisearch_tika_url'] = [
|
||||
'inputType' => 'text',
|
||||
'eval' => [
|
||||
'rgxp' => 'url',
|
||||
'mandatory' => true,
|
||||
'tl_class' => 'w50 clr',
|
||||
],
|
||||
];
|
||||
|
||||
/**
|
||||
* -------------------------------------------------
|
||||
* Selector / Subpalette
|
||||
* -------------------------------------------------
|
||||
*/
|
||||
|
||||
$GLOBALS['TL_DCA']['tl_settings']['palettes']['__selector__'][] = 'meilisearch_index_files';
|
||||
|
||||
$GLOBALS['TL_DCA']['tl_settings']['subpalettes']['meilisearch_index_files']
|
||||
= 'meilisearch_tika_url';
|
||||
|
||||
/**
|
||||
* -------------------------------------------------
|
||||
* Palette
|
||||
* -------------------------------------------------
|
||||
*/
|
||||
|
||||
PaletteManipulator::create()
|
||||
->addLegend('meilisearch_legend', null, PaletteManipulator::POSITION_AFTER, true)
|
||||
->addField('meilisearch_host', 'meilisearch_legend')
|
||||
@@ -132,5 +108,6 @@ PaletteManipulator::create()
|
||||
->addField('meilisearch_imagesize', 'meilisearch_legend')
|
||||
->addField('meilisearch_fallback_image', 'meilisearch_legend')
|
||||
->addField('meilisearch_index_past_events', 'meilisearch_legend')
|
||||
->addField('meilisearch_index_files', 'meilisearch_legend')
|
||||
->addField('meilisearch_index_pdfs', 'meilisearch_legend')
|
||||
->addField('meilisearch_index_office', 'meilisearch_legend')
|
||||
->applyToPalette('default', 'tl_settings');
|
||||
@@ -28,10 +28,10 @@ $GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_past_events'][0]
|
||||
$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_past_events'][1]
|
||||
= 'Vergangene Kalender-Events werden ebenfalls in Meilisearch indexiert.';
|
||||
|
||||
$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_files'] = [
|
||||
'Dateien indexieren',
|
||||
'Aktiviert die Indexierung von PDF-Dateien sowie DOCX, XLSX und PPTX.',
|
||||
$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_pdfs'] = [
|
||||
'PDFs indexieren',
|
||||
'Aktiviert die Indexierung von PDF-Dateien für die Suche.',
|
||||
];
|
||||
|
||||
$GLOBALS['TL_LANG']['tl_settings']['meilisearch_tika_url']
|
||||
= ['Apache Tika URL', 'URL der Apache Tika Instanz (z. B. https://tika.domain.tld).'];
|
||||
$GLOBALS['TL_LANG']['tl_settings']['meilisearch_index_office']
|
||||
= ['Office-Dateien indexieren', 'DOCX, XLSX und PPTX in die Suche aufnehmen.'];
|
||||
@@ -4,6 +4,7 @@ Contao 5 – Frontend Module Template
|
||||
#}
|
||||
|
||||
<!-- indexer::stop -->
|
||||
{% block meilisearch %}
|
||||
<div
|
||||
id="topsearch"
|
||||
class="meilisearch-search"
|
||||
@@ -218,4 +219,5 @@ Contao 5 – Frontend Module Template
|
||||
}
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
||||
<!-- indexer::continue -->
|
||||
Reference in New Issue
Block a user