{
  "_id": "69df45efc5259ad4cd8de9d2",
  "Package": "rtika",
  "Type": "Package",
  "Title": "R Interface to 'Apache Tika'",
  "Version": "3.2.3",
  "Authors@R": "c(\nperson(given = \"Sasha\", family = \"Goodman\", email=\"goodmansasha@gmail.com\", role=c(\"aut\",\"cre\") ),\nperson(given = \"The Apache Software Foundation\", role=c(\"aut\",\"cph\") ),\nperson(given = \"Julia\", family = \"Silge\", role = c(\"rev\"), comment = \"Reviewed the package for rOpenSci, see https://github.com/ropensci/software-review/issues/191/\"),\nperson(given = \"David\",family = \"Gohel\", role = c(\"rev\"), comment = \"Reviewed the package for rOpenSci, see https://github.com/ropensci/software-review/issues/191/\")\n)",
  "Maintainer": "Sasha Goodman <goodmansasha@gmail.com>",
  "License": "Apache License 2.0 | file LICENSE",
  "SystemRequirements": "Java (>=11)",
  "Description": "Extract text or metadata from over a thousand file types,\nusing Apache Tika <https://tika.apache.org/>. Get either plain\ntext or structured XHTML content.",
  "Encoding": "UTF-8",
  "RoxygenNote": "7.3.3",
  "URL": "https://docs.ropensci.org/rtika/,\nhttps://github.com/ropensci/rtika/",
  "BugReports": "https://github.com/ropensci/rtika/issues/",
  "VignetteBuilder": "knitr",
  "Config/pak/sysreqs": "default-jdk libssl-dev",
  "Repository": "https://ropensci.r-universe.dev",
  "Date/Publication": "2025-10-12 13:59:05 UTC",
  "RemoteUrl": "https://github.com/ropensci/rtika",
  "RemoteRef": "master",
  "RemoteSha": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-04-15 07:58:59 UTC",
    "User": "root"
  },
  "Author": "Sasha Goodman [aut, cre],\nThe Apache Software Foundation [aut, cph],\nJulia Silge [rev] (Reviewed the package for rOpenSci, see\nhttps://github.com/ropensci/software-review/issues/191/),\nDavid Gohel [rev] (Reviewed the package for rOpenSci, see\nhttps://github.com/ropensci/software-review/issues/191/)",
  "MD5sum": "e53a7a1a7a6cca9a12ecb8f0a4fe0452",
  "_user": "ropensci",
  "_type": "src",
  "_file": "rtika_3.2.3.tar.gz",
  "_fileid": "1c4f0c413e3e3333a2ffd069bd60d8aa3fade7caf27da5e845bb834e67144332",
  "_filesize": 838383,
  "_sha256": "1c4f0c413e3e3333a2ffd069bd60d8aa3fade7caf27da5e845bb834e67144332",
  "_created": "2026-04-15T07:58:59.000Z",
  "_published": "2026-04-15T08:01:51.040Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 71412993032,
      "time": 127,
      "config": "linux-devel-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6445688264"
    },
    {
      "job": 71412993023,
      "time": 133,
      "config": "linux-release-x86_64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6445689727"
    },
    {
      "job": 71412993002,
      "time": 103,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "ERROR",
      "artifact": "6445681224"
    },
    {
      "job": 71412993008,
      "time": 113,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6445684493"
    },
    {
      "job": 71412629993,
      "time": 153,
      "config": "pkgdown",
      "r": "4.5.2",
      "check": "OK",
      "artifact": "6445650606"
    },
    {
      "job": 71412629986,
      "time": 164,
      "config": "source",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6445653544"
    },
    {
      "job": 71412993022,
      "time": 130,
      "config": "wasm-release",
      "r": "4.5.1",
      "check": "OK",
      "artifact": "6445688593"
    },
    {
      "job": 71412993101,
      "time": 82,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6445676138"
    },
    {
      "job": 71412993043,
      "time": 71,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6445672609"
    },
    {
      "job": 71412993037,
      "time": 113,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6445684355"
    }
  ],
  "_buildurl": "https://github.com/r-universe/ropensci/actions/runs/24443080958",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/ropensci/rtika",
  "_commit": {
    "id": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
    "author": "Sasha Goodman Chase <4193801+soshsquatch@users.noreply.github.com>",
    "committer": "Sasha Goodman Chase <4193801+soshsquatch@users.noreply.github.com>",
    "message": "fixed tabulapdf link\n\nsmall fix, refreshed docs with pkgdown link to tabulapdf\n",
    "time": 1760277545
  },
  "_maintainer": {
    "name": "Sasha Goodman",
    "email": "goodmansasha@gmail.com",
    "login": "soshsquatch",
    "uuid": 4193801
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.5.0",
      "role": "Depends"
    },
    {
      "package": "curl",
      "role": "Imports"
    },
    {
      "package": "sys",
      "version": ">= 2.1",
      "role": "Imports"
    },
    {
      "package": "stats",
      "role": "Imports"
    },
    {
      "package": "utils",
      "role": "Imports"
    },
    {
      "package": "digest",
      "role": "Imports"
    },
    {
      "package": "backports",
      "role": "Imports"
    },
    {
      "package": "jsonlite",
      "role": "Suggests"
    },
    {
      "package": "xml2",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "covr",
      "role": "Suggests"
    },
    {
      "package": "magrittr",
      "role": "Suggests"
    }
  ],
  "_owner": "ropensci",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2025-41",
      "n": 5
    }
  ],
  "_tags": [],
  "_topics": [
    "extract-metadata",
    "extract-text",
    "java",
    "parse",
    "pdf-files",
    "peer-reviewed",
    "tesseract",
    "tika"
  ],
  "_stars": 55,
  "_contributors": [
    {
      "user": "soshsquatch",
      "count": 158,
      "uuid": 4193801
    },
    {
      "user": "jeroen",
      "count": 5,
      "uuid": 216319
    },
    {
      "user": "noamross",
      "count": 4,
      "uuid": 571752
    },
    {
      "user": "juliasilge",
      "count": 1,
      "uuid": 12505835
    }
  ],
  "_userbio": {
    "uuid": 1200269,
    "type": "organization",
    "name": "rOpenSci",
    "description": "Tools and R Packages for Open Science"
  },
  "_downloads": {
    "count": 298,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/rtika"
  },
  "_devurl": "https://github.com/ropensci/rtika",
  "_pkgdown": "https://docs.ropensci.org/rtika/",
  "_searchresults": 12,
  "_metadata": {
    "review": {
      "id": 191,
      "status": "reviewed",
      "version": "0.1.2",
      "organization": "rOpenSci Software Review",
      "url": "https://github.com/ropensci/software-review/issues/191"
    },
    "ropensci_category": "literature"
  },
  "_rbuild": "4.5.3",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "extra/rtika.html",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/ropensci/rtika",
  "_realowner": "ropensci",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1.8",
      "date": "2018-05-02"
    },
    {
      "version": "1.1.19",
      "date": "2018-10-05"
    },
    {
      "version": "1.19.1",
      "date": "2018-11-15"
    },
    {
      "version": "1.20",
      "date": "2019-03-04"
    },
    {
      "version": "1.21",
      "date": "2019-06-22"
    },
    {
      "version": "1.22",
      "date": "2019-08-02"
    },
    {
      "version": "1.23",
      "date": "2019-12-13"
    },
    {
      "version": "1.24.1",
      "date": "2020-04-25"
    },
    {
      "version": "2.0.0",
      "date": "2021-08-06"
    },
    {
      "version": "2.4.1",
      "date": "2022-09-26"
    },
    {
      "version": "2.7.0",
      "date": "2023-05-05"
    },
    {
      "version": "3.2.3",
      "date": "2025-10-12"
    }
  ],
  "_exports": [
    "install_tika",
    "java",
    "tika",
    "tika_check",
    "tika_fetch",
    "tika_html",
    "tika_jar",
    "tika_json",
    "tika_json_text",
    "tika_text",
    "tika_xml"
  ],
  "_help": [
    {
      "page": "install_tika",
      "title": "Install or Update the Apache Tika 'jar'",
      "topics": [
        "install_tika"
      ]
    },
    {
      "page": "java",
      "title": "System Command to Run Java",
      "topics": [
        "java"
      ]
    },
    {
      "page": "tika",
      "title": "Main R Interface to 'Apache Tika'",
      "topics": [
        "tika"
      ]
    },
    {
      "page": "tika_check",
      "title": "Check Tika against a checksum",
      "topics": [
        "tika_check"
      ]
    },
    {
      "page": "tika_fetch",
      "title": "Fetch Files with the Content-Type Preserved in the File Extension",
      "topics": [
        "tika_fetch"
      ]
    },
    {
      "page": "tika_html",
      "title": "Get Structured XHTML",
      "topics": [
        "tika_html"
      ]
    },
    {
      "page": "tika_jar",
      "title": "Path to Apache Tika",
      "topics": [
        "tika_jar"
      ]
    },
    {
      "page": "tika_json",
      "title": "Get json Metadata and XHTML Content",
      "topics": [
        "tika_json"
      ]
    },
    {
      "page": "tika_json_text",
      "title": "Get json Metadata and Plain Text Content",
      "topics": [
        "tika_json_text"
      ]
    },
    {
      "page": "tika_text",
      "title": "Get Plain Text",
      "topics": [
        "tika_text"
      ]
    },
    {
      "page": "tika_xml",
      "title": "Get a Structured XHTML Rendition",
      "topics": [
        "tika_xml"
      ]
    }
  ],
  "_readme": "https://github.com/ropensci/rtika/raw/master/README.md",
  "_rundeps": [
    "backports",
    "curl",
    "digest",
    "sys"
  ],
  "_vignettes": [
    {
      "source": "rtika_introduction.Rmd",
      "filename": "rtika_introduction.html",
      "title": "Introduction to rtika",
      "author": "Sasha Goodman",
      "engine": "knitr::rmarkdown",
      "headings": [
        "A Digital Babel Fish",
        "Extract Plain Text",
        "Preserve Content-Type when Downloading",
        "Settings for Big Datasets",
        "Get a Structured XHTML Rendition",
        "Access Metadata in the XHTML",
        "Get Metadata in JSON Format",
        "Get Metadata from \"Container\" Documents",
        "Extending rtika",
        "References"
      ],
      "created": "2018-03-02 03:09:08",
      "modified": "2019-08-02 05:39:32",
      "commits": 19
    }
  ],
  "_score": 5.99563519459755,
  "_indexed": true,
  "_nocasepkg": "rtika",
  "_universes": [
    "ropensci",
    "soshsquatch"
  ],
  "_binaries": [
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "3.2.3",
      "date": "2026-04-15T08:01:06.000Z",
      "distro": "noble",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "425345c8d566fcde9588f09e909151e4052a3c82ca1d796719214e31ef2f8359",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24443080958"
    },
    {
      "r": "4.5.3",
      "os": "linux",
      "version": "3.2.3",
      "date": "2026-04-15T08:01:11.000Z",
      "distro": "noble",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "8a7c86a94a06167217a5ef8384631a6f4b3168d4ddbdcd5cd070bd3fc16b2f19",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24443080958"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "3.2.3",
      "date": "2026-04-15T08:00:43.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "98d9fee0456deecc31b7e8792fb8b20fb9680d5fb7afd5afa906a0520bb53856",
      "status": "failure",
      "check": "ERROR",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24443080958"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "3.2.3",
      "date": "2026-04-15T08:00:52.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "4b327c9f70e272d6f6dfcfd75a8a3d2dbedd842b5b7a47a6fea4b6ed687328b7",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24443080958"
    },
    {
      "r": "4.5.1",
      "os": "wasm",
      "version": "3.2.3",
      "date": "2026-04-15T08:01:21.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "432490abbe6aa7349e0f2225328e9106318bd07c84b42ac0d1fc19ab1b80dc42",
      "status": "success",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24443080958"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "3.2.3",
      "date": "2026-04-15T08:00:17.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "b4d8453c3ca7fb23f538d6fad45ef3c1ed362f49a3a68e1306069f33ecc81def",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24443080958"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "3.2.3",
      "date": "2026-04-15T08:00:02.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "044517154c95f36c990b3f02b50f81f82984bee9cf54a97f25ce2b71896ed854",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24443080958"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "3.2.3",
      "date": "2026-04-15T08:00:43.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "ee76f693df3e89fcfa456de0e77a464c0924074d64b7070923a423e4a4672757",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24443080958"
    }
  ]
}