New Script: Apache Tika (#2079)

* New Script: Apache Tika

* Temp: Replace github URLs to my own fork

* Add additional dependencies according to the Docker image installation

See https://github.com/apache/tika-docker/blob/master/full/Dockerfile

* Apache Tika: Set correct tags

* Apache Tika: Set TODO to make it updateable

* Apache Tika: Fix "software-properties-common: command not found"

* Apache Tika: Automate version detection

* Apache Tika: Add `update_script`

* Apache Tika: Added clean up of `/opt/apache-tika/tika-server-standard-prev-version.jar` after upgrade

* Apache Tika: Bump up ram to 2048

* Apache Tika: Set updateable to true

* Apache Tika: Switch from `default-jdk` to `openjdk-17-jre-headless`

* Apache Tika: Removed comment about Docker file

* Apache Tika: Removed empty line

* Revert "Temp: Replace github URLs to my own fork"

This reverts commit f1c5d87206.
This commit is contained in:
Andy Grunwald 2025-02-06 18:40:21 +01:00 committed by GitHub
parent d7b6a97415
commit ef33864adf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 181 additions and 0 deletions

69
ct/apache-tika.sh Executable file
View File

@ -0,0 +1,69 @@
#!/usr/bin/env bash
source <(curl -s https://raw.githubusercontent.com/community-scripts/ProxmoxVE/main/misc/build.func)
# Copyright (c) 2021-2025 community-scripts ORG
# Author: Andy Grunwald (andygrunwald)
# License: MIT | https://github.com/community-scripts/ProxmoxVE/raw/main/LICENSE
# Source: https://github.com/apache/tika/
# App Default Values
APP="Apache-Tika"
var_tags="document"
var_cpu="1"
var_ram="2048"
var_disk="10"
var_os="debian"
var_version="12"
var_unprivileged="1"
# App Output & Base Settings
header_info "$APP"
base_settings
# Core
variables
color
catch_errors
function update_script() {
header_info
check_container_storage
check_container_resources
if [[ ! -f /etc/systemd/system/apache-tika.service ]]; then
msg_error "No ${APP} Installation Found!"
exit
fi
RELEASE="$(wget -qO- https://dlcdn.apache.org/tika/ | grep -oP '(?<=href=")[0-9]+\.[0-9]+\.[0-9]+(?=/")' | sort -V | tail -n1)"
if [[ ! -f /opt/${APP}_version.txt ]] || [[ "${RELEASE}" != "$(cat /opt/${APP}_version.txt)" ]]; then
msg_info "Stopping ${APP}"
systemctl stop apache-tika
msg_ok "Stopped ${APP}"
msg_info "Updating ${APP} to v${RELEASE}"
cd /opt/apache-tika
wget -q "https://dlcdn.apache.org/tika/${RELEASE}/tika-server-standard-${RELEASE}.jar"
mv --force tika-server-standard.jar tika-server-standard-prev-version.jar
mv tika-server-standard-${RELEASE}.jar tika-server-standard.jar
echo "${RELEASE}" >/opt/${APP}_version.txt
msg_ok "Updated ${APP} to v${RELEASE}"
msg_info "Starting ${APP}"
systemctl start apache-tika
msg_ok "Started ${APP}"
msg_info "Cleaning Up"
rm -rf /opt/apache-tika/tika-server-standard-prev-version.jar
msg_ok "Cleanup Completed"
msg_ok "Updated Successfully"
else
msg_ok "No update required. ${APP} is already at v${RELEASE}"
fi
exit
}
start
build_container
description
msg_ok "Completed Successfully!\n"
echo -e "${CREATING}${GN}${APP} setup has been successfully initialized!${CL}"
echo -e "${INFO}${YW} Access it using the following URL:${CL}"
echo -e "${TAB}${GATEWAY}${BGN}http://${IP}:9998${CL}"

View File

@ -0,0 +1,78 @@
#!/usr/bin/env bash
# Copyright (c) 2021-2025 community-scripts ORG
# Author: Andy Grunwald (andygrunwald)
# License: MIT | https://github.com/community-scripts/ProxmoxVE/raw/main/LICENSE
# Source: https://github.com/apache/tika/
source /dev/stdin <<< "$FUNCTIONS_FILE_PATH"
color
verb_ip6
catch_errors
setting_up_container
network_check
update_os
msg_info "Installing Dependencies"
$STD apt-get install -y \
curl \
sudo \
mc \
software-properties-common \
gdal-bin \
tesseract-ocr \
tesseract-ocr-eng \
tesseract-ocr-ita \
tesseract-ocr-fra \
tesseract-ocr-spa \
tesseract-ocr-deu
$STD echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | debconf-set-selections
$STD apt-get install -y \
xfonts-utils \
fonts-freefont-ttf \
fonts-liberation \
ttf-mscorefonts-installer \
cabextract
msg_ok "Installed Dependencies"
msg_info "Setup OpenJDK"
$STD apt-get install -y \
openjdk-17-jre-headless
msg_ok "Setup OpenJDK"
msg_info "Installing Apache Tika"
mkdir -p /opt/apache-tika
cd /opt/apache-tika
RELEASE="$(wget -qO- https://dlcdn.apache.org/tika/ | grep -oP '(?<=href=")[0-9]+\.[0-9]+\.[0-9]+(?=/")' | sort -V | tail -n1)"
wget -q "https://dlcdn.apache.org/tika/${RELEASE}/tika-server-standard-${RELEASE}.jar"
mv tika-server-standard-${RELEASE}.jar tika-server-standard.jar
echo "${RELEASE}" >/opt/${APPLICATION}_version.txt
msg_ok "Installed Apache Tika"
msg_info "Creating Service"
cat <<EOF >/etc/systemd/system/apache-tika.service
[Unit]
Description=Apache Tika
Documentation=https://tika.apache.org/
After=syslog.target network.target
[Service]
User=root
Restart=always
Type=simple
ExecStart=java -jar /opt/apache-tika/tika-server-standard.jar --host 0.0.0.0 --port 9998
ExecReload=/bin/kill -HUP \$MAINPID
[Install]
WantedBy=multi-user.target
EOF
systemctl enable -q --now apache-tika
msg_ok "Created Service"
motd_ssh
customize
msg_info "Cleaning up"
$STD apt-get -y autoremove
$STD apt-get -y autoclean
msg_ok "Cleaned"

34
json/apache-tika.json Normal file
View File

@ -0,0 +1,34 @@
{
"name": "Apache Tika",
"slug": "apache-tika",
"categories": [
12
],
"date_created": "2025-02-05",
"type": "ct",
"updateable": true,
"privileged": false,
"interface_port": 9998,
"documentation": null,
"website": "https://tika.apache.org/",
"logo": "https://tika.apache.org/tika.png",
"description": "The Apache Tika™ toolkit detects and extracts metadata and text from over a thousand different file types (such as PPT, XLS, and PDF). All of these file types can be parsed through a single interface, making Tika useful for search engine indexing, content analysis, translation, and much more.",
"install_methods": [
{
"type": "default",
"script": "ct/apache-tika.sh",
"resources": {
"cpu": 1,
"ram": 2024,
"hdd": 10,
"os": "debian",
"version": "12"
}
}
],
"default_credentials": {
"username": null,
"password": null
},
"notes": []
}