FSCrawler
Get Started with Apps!
Keywords: index, crawler
Train: Community
Home Page: https://fscrawler.readthedocs.io
Added: 2024-08-06
Last Updated: 2025-04-23
FSCrawler is a crawler that helps to index binary documents such as PDF, Open Office, MS Office.
Run as Context- FSCrawler runs as root user.
Group: 0 / root
User: 0 / root
App Metadata (Raw File)
{
"1.1.11": {
"healthy": true,
"supported": true,
"healthy_error": null,
"location": "/__w/apps/apps/trains/community/fscrawler/1.1.11",
"last_update": "2025-04-23 17:40:39",
"required_features": [],
"human_version": "2.10-SNAPSHOT-ocr-es7_1.1.11",
"version": "1.1.11",
"app_metadata": {
"app_version": "2.10-SNAPSHOT-ocr-es7",
"capabilities": [],
"categories": [
"productivity"
],
"changelog_url": "https://fscrawler.readthedocs.io/en/latest/release/index.html",
"date_added": "2024-08-06",
"description": "FSCrawler is a crawler that helps to index binary documents such as PDF, Open Office, MS Office.",
"home": "https://fscrawler.readthedocs.io",
"host_mounts": [],
"icon": "https://media.sys.truenas.net/apps/fscrawler/icons/icon.svg",
"keywords": [
"index",
"crawler"
],
"lib_version": "2.1.16",
"lib_version_hash": "dac15686f882b9ce65b8549a3d5c0ed7bafe2df7a9028880d1a99b0ff4af1eff",
"maintainers": [
{
"email": "dev@ixsystems.com",
"name": "truenas",
"url": "https://www.truenas.com/"
}
],
"name": "fscrawler",
"run_as_context": [
{
"description": "FSCrawler runs as root user.",
"gid": 0,
"group_name": "root",
"uid": 0,
"user_name": "root"
}
],
"screenshots": [],
"sources": [
"https://github.com/dadoonet/fscrawler",
"https://hub.docker.com/r/dadoonet/fscrawler",
"https://fscrawler.readthedocs.io/"
],
"title": "FSCrawler",
"train": "community",
"version": "1.1.11"
},
"schema": {
"groups": [
{
"name": "FSCrawler Configuration",
"description": "Configure FSCrawler"
},
{
"name": "Network Configuration",
"description": "Configure Network for FSCrawler"
},
{
"name": "Storage Configuration",
"description": "Configure Storage for FSCrawler"
},
{
"name": "Labels Configuration",
"description": "Configure Labels for FSCrawler"
},
{
"name": "Resources Configuration",
"description": "Configure Resources for FSCrawler"
}
],
"questions": [
{
"variable": "TZ",
"group": "FSCrawler Configuration",
"label": "Timezone",
"schema": {
"type": "string",
"default": "Etc/UTC",
"required": true,
"$ref": [
"definitions/timezone"
]
}
},
{
"variable": "fscrawler",
"label": "",
"group": "FSCrawler Configuration",
"schema": {
"type": "dict",
"attrs": [
{
"variable": "image_selector",
"label": "Image",
"description": "The image to use for FSCrawler.</br>\nImages with OCR support are a lot larger than images without OCR support.</br>\nApproximate image sizes:</br>\n- With OCR Support: 1.2GB</br>\n- Without OCR Support: 0.5GB\n",
"schema": {
"type": "string",
"default": "image",
"required": true,
"enum": [
{
"value": "image",
"description": "With OCR Support - Elasticsearch 7 and 8"
},
{
"value": "no_ocr_image",
"description": "Without OCR Support - Elasticsearch 7 and 8"
}
]
}
},
{
"variable": "job_name",
"label": "Job Name",
"description": "The name of the FSCrawler job to run. </br>\nA _settings.yaml file in the directory named after the job name will have to be manually created.\n",
"schema": {
"type": "string",
"default": "",
"required": true
}
},
{
"variable": "loop",
"label": "Loop",
"description": "The number of times to run the job.</br>\nhttps://fscrawler.readthedocs.io/en/latest/admin/cli-options.html#loop </br>\n-1 means run forever. </br>\n0 means never run. </br>\n",
"schema": {
"type": "int",
"default": -1,
"required": true,
"min": -1
}
},
{
"variable": "restart",
"label": "Restart",
"description": "Restart the job from the beginning.</br>\nhttps://fscrawler.readthedocs.io/en/latest/admin/cli-options.html#restart\n",
"schema": {
"type": "boolean",
"default": false
}
},
{
"variable": "additional_envs",
"label": "Additional Environment Variables",
"description": "Configure additional environment variables for FSCrawler.",
"schema": {
"type": "list",
"default": [],
"items": [
{
"variable": "env",
"label": "Environment Variable",
"schema": {
"type": "dict",
"attrs": [
{
"variable": "name",
"label": "Name",
"schema": {
"type": "string",
"required": true
}
},
{
"variable": "value",
"label": "Value",
"schema": {
"type": "string",
"required": true
}
}
]
}
}
]
}
}
]
}
},
{
"variable": "network",
"label": "",
"group": "Network Configuration",
"schema": {
"type": "dict",
"attrs": [
{
"variable": "enable_rest",
"label": "Enable Rest API Service",
"description": "Enable Rest API Service for FSCrawler.</br>\nhttps://fscrawler.readthedocs.io/en/latest/admin/fs/rest.html</br>\nAdditional configuration is needed in the job file. Check the Notes card\nafter installation for more information.\n",
"schema": {
"type": "boolean",
"default": false
}
},
{
"variable": "rest_port",
"label": "REST Port",
"description": "The port for FSCrawler REST API",
"schema": {
"type": "int",
"show_if": [
[
"enable_rest",
"=",
true
]
],
"default": 30084,
"required": true,
"$ref": [
"definitions/port"
]
}
},
{
"variable": "host_network",
"label": "Host Network",
"description": "Bind to the host network. It's recommended to keep this disabled.\n",
"schema": {
"type": "boolean",
"default": false
}
}
]
}
},
{
"variable": "storage",
"label": "",
"group": "Storage Configuration",
"schema": {
"type": "dict",
"attrs": [
{
"variable": "jobs",
"label": "FSCrawler Jobs Storage",
"description": "The path to store FSCrawler Jobs.",
"schema": {
"type": "dict",
"attrs": [
{
"variable": "type",
"label": "Type",
"description": "ixVolume: Is dataset created automatically by the system.</br>\nHost Path: Is a path that already exists on the system.\n",
"schema": {
"type": "string",
"required": true,
"immutable": true,
"default": "ix_volume",
"enum": [
{
"value": "host_path",
"description": "Host Path (Path that already exists on the system)"
},
{
"value": "ix_volume",
"description": "ixVolume (Dataset created automatically by the system)"
}
]
}
},
{
"variable": "ix_volume_config",
"label": "ixVolume Configuration",
"description": "The configuration for the ixVolume dataset.",
"schema": {
"type": "dict",
"show_if": [
[
"type",
"=",
"ix_volume"
]
],
"$ref": [
"normalize/ix_volume"
],
"attrs": [
{
"variable": "acl_enable",
"label": "Enable ACL",
"description": "Enable ACL for the storage.",
"schema": {
"type": "boolean",
"default": false
}
},
{
"variable": "dataset_name",
"label": "Dataset Name",
"description": "The name of the dataset to use for storage.",
"schema": {
"type": "string",
"required": true,
"immutable": true,
"hidden": true,
"default": "jobs"
}
},
{
"variable": "acl_entries",
"label": "ACL Configuration",
"schema": {
"type": "dict",
"show_if": [
[
"acl_enable",
"=",
true
]
],
"attrs": []
}
}
]
}
},
{
"variable": "host_path_config",
"label": "Host Path Configuration",
"schema": {
"type": "dict",
"show_if": [
[
"type",
"=",
"host_path"
]
],
"attrs": [
{
"variable": "acl_enable",
"label": "Enable ACL",
"description": "Enable ACL for the storage.",
"schema": {
"type": "boolean",
"default": false
}
},
{
"variable": "acl",
"label": "ACL Configuration",
"schema": {
"type": "dict",
"show_if": [
[
"acl_enable",
"=",
true
]
],
"attrs": [],
"$ref": [
"normalize/acl"
]
}
},
{
"variable": "path",
"label": "Host Path",
"description": "The host path to use for storage.",
"schema": {
"type": "hostpath",
"show_if": [
[
"acl_enable",
"=",
false
]
],
"required": true
}
}
]
}
}
]
}
},
{
"variable": "additional_storage",
"label": "Additional Storage",
"description": "Additional storage for FSCrawler.",
"schema": {
"type": "list",
"default": [],
"items": [
{
"variable": "storageEntry",
"label": "Storage Entry",
"schema": {
"type": "dict",
"attrs": [
{
"variable": "type",
"label": "Type",
"description": "ixVolume: Is dataset created automatically by the system.</br>\nHost Path: Is a path that already exists on the system.</br>\nSMB Share: Is a SMB share that is mounted to as a volume.\n",
"schema": {
"type": "string",
"required": true,
"default": "ix_volume",
"immutable": true,
"enum": [
{
"value": "host_path",
"description": "Host Path (Path that already exists on the system)"
},
{
"value": "ix_volume",
"description": "ixVolume (Dataset created automatically by the system)"
},
{
"value": "cifs",
"description": "SMB/CIFS Share (Mounts a volume to a SMB share)"
}
]
}
},
{
"variable": "read_only",
"label": "Read Only",
"description": "Mount the volume as read only.",
"schema": {
"type": "boolean",
"default": false
}
},
{
"variable": "mount_path",
"label": "Mount Path",
"description": "The path inside the container to mount the storage.",
"schema": {
"type": "path",
"required": true
}
},
{
"variable": "host_path_config",
"label": "Host Path Configuration",
"schema": {
"type": "dict",
"show_if": [
[
"type",
"=",
"host_path"
]
],
"attrs": [
{
"variable": "acl_enable",
"label": "Enable ACL",
"description": "Enable ACL for the storage.",
"schema": {
"type": "boolean",
"default": false
}
},
{
"variable": "acl",
"label": "ACL Configuration",
"schema": {
"type": "dict",
"show_if": [
[
"acl_enable",
"=",
true
]
],
"attrs": [],
"$ref": [
"normalize/acl"
]
}
},
{
"variable": "path",
"label": "Host Path",
"description": "The host path to use for storage.",
"schema": {
"type": "hostpath",
"show_if": [
[
"acl_enable",
"=",
false
]
],
"required": true
}
}
]
}
},
{
"variable": "ix_volume_config",
"label": "ixVolume Configuration",
"description": "The configuration for the ixVolume dataset.",
"schema": {
"type": "dict",
"show_if": [
[
"type",
"=",
"ix_volume"
]
],
"$ref": [
"normalize/ix_volume"
],
"attrs": [
{
"variable": "acl_enable",
"label": "Enable ACL",
"description": "Enable ACL for the storage.",
"schema": {
"type": "boolean",
"default": false
}
},
{
"variable": "dataset_name",
"label": "Dataset Name",
"description": "The name of the dataset to use for storage.",
"schema": {
"type": "string",
"required": true,
"immutable": true,
"default": "storage_entry"
}
},
{
"variable": "acl_entries",
"label": "ACL Configuration",
"schema": {
"type": "dict",
"show_if": [
[
"acl_enable",
"=",
true
]
],
"attrs": []
}
}
]
}
},
{
"variable": "cifs_config",
"label": "SMB Configuration",
"description": "The configuration for the SMB dataset.",
"schema": {
"type": "dict",
"show_if": [
[
"type",
"=",
"cifs"
]
],
"attrs": [
{
"variable": "server",
"label": "Server",
"description": "The server to mount the SMB share.",
"schema": {
"type": "string",
"required": true
}
},
{
"variable": "path",
"label": "Path",
"description": "The path to mount the SMB share.",
"schema": {
"type": "string",
"required": true
}
},
{
"variable": "username",
"label": "Username",
"description": "The username to use for the SMB share.",
"schema": {
"type": "string",
"required": true
}
},
{
"variable": "password",
"label": "Password",
"description": "The password to use for the SMB share.",
"schema": {
"type": "string",
"required": true,
"private": true
}
},
{
"variable": "domain",
"label": "Domain",
"description": "The domain to use for the SMB share.",
"schema": {
"type": "string"
}
}
]
}
}
]
}
}
]
}
}
]
}
},
{
"variable": "labels",
"label": "",
"group": "Labels Configuration",
"schema": {
"type": "list",
"default": [],
"items": [
{
"variable": "label",
"label": "Label",
"schema": {
"type": "dict",
"attrs": [
{
"variable": "key",
"label": "Key",
"schema": {
"type": "string",
"required": true
}
},
{
"variable": "value",
"label": "Value",
"schema": {
"type": "string",
"required": true
}
},
{
"variable": "containers",
"label": "Containers",
"description": "Containers where the label should be applied",
"schema": {
"type": "list",
"items": [
{
"variable": "container",
"label": "Container",
"schema": {
"type": "string",
"required": true,
"enum": [
{
"value": "fscrawler",
"description": "fscrawler"
}
]
}
}
]
}
}
]
}
}
]
}
},
{
"variable": "resources",
"label": "",
"group": "Resources Configuration",
"schema": {
"type": "dict",
"attrs": [
{
"variable": "limits",
"label": "Limits",
"schema": {
"type": "dict",
"attrs": [
{
"variable": "cpus",
"label": "CPUs",
"description": "CPUs limit for FSCrawler.",
"schema": {
"type": "int",
"default": 2,
"required": true
}
},
{
"variable": "memory",
"label": "Memory (in MB)",
"description": "Memory limit for FSCrawler.",
"schema": {
"type": "int",
"default": 4096,
"required": true
}
}
]
}
}
]
}
}
]
},
"readme": "<h1>FSCrawler</h1> <p><a href=\"https://fscrawler.readthedocs.io/\">FSCrawler</a> is a crawler that helps to index binary documents such as PDF, Open Office, MS Office.</p>",
"changelog": null,
"chart_metadata": {
"app_version": "2.10-SNAPSHOT-ocr-es7",
"capabilities": [],
"categories": [
"productivity"
],
"changelog_url": "https://fscrawler.readthedocs.io/en/latest/release/index.html",
"date_added": "2024-08-06",
"description": "FSCrawler is a crawler that helps to index binary documents such as PDF, Open Office, MS Office.",
"home": "https://fscrawler.readthedocs.io",
"host_mounts": [],
"icon": "https://media.sys.truenas.net/apps/fscrawler/icons/icon.svg",
"keywords": [
"index",
"crawler"
],
"lib_version": "2.1.16",
"lib_version_hash": "dac15686f882b9ce65b8549a3d5c0ed7bafe2df7a9028880d1a99b0ff4af1eff",
"maintainers": [
{
"email": "dev@ixsystems.com",
"name": "truenas",
"url": "https://www.truenas.com/"
}
],
"name": "fscrawler",
"run_as_context": [
{
"description": "FSCrawler runs as root user.",
"gid": 0,
"group_name": "root",
"uid": 0,
"user_name": "root"
}
],
"screenshots": [],
"sources": [
"https://github.com/dadoonet/fscrawler",
"https://hub.docker.com/r/dadoonet/fscrawler",
"https://fscrawler.readthedocs.io/"
],
"title": "FSCrawler",
"train": "community",
"version": "1.1.11"
}
}
}
Support, maintenance, and documentation for applications within the Community catalog is handled by the TrueNAS community. The TrueNAS Applications Portal hosts but does not validate or maintain any linked resources associated with this app.
There currently aren’t any resources available for this application!
Please help the TrueNAS community create content or discuss this application in the TrueNAS Community forum.