{
  "_id": "6a2e6db93efcd9bda4312c45",
  "Package": "rtika",
  "Type": "Package",
  "Title": "R Interface to 'Apache Tika'",
  "Version": "3.2.3",
  "Authors@R": "c(\nperson(given = \"Sasha\", family = \"Goodman\", email=\"goodmansasha@gmail.com\", role=c(\"aut\",\"cre\") ),\nperson(given = \"The Apache Software Foundation\", role=c(\"aut\",\"cph\") ),\nperson(given = \"Julia\", family = \"Silge\", role = c(\"rev\"), comment = \"Reviewed the package for rOpenSci, see https://github.com/ropensci/software-review/issues/191/\"),\nperson(given = \"David\",family = \"Gohel\", role = c(\"rev\"), comment = \"Reviewed the package for rOpenSci, see https://github.com/ropensci/software-review/issues/191/\")\n)",
  "Maintainer": "Sasha Goodman <goodmansasha@gmail.com>",
  "License": "Apache License 2.0 | file LICENSE",
  "SystemRequirements": "Java (>=11)",
  "Description": "Extract text or metadata from over a thousand file types,\nusing Apache Tika <https://tika.apache.org/>. Get either plain\ntext or structured XHTML content.",
  "Encoding": "UTF-8",
  "RoxygenNote": "7.3.3",
  "URL": "https://docs.ropensci.org/rtika/,\nhttps://github.com/ropensci/rtika/",
  "BugReports": "https://github.com/ropensci/rtika/issues/",
  "VignetteBuilder": "knitr",
  "Config/pak/sysreqs": "default-jdk libssl-dev",
  "Repository": "https://ropensci.r-universe.dev",
  "Date/Publication": "2025-10-12 13:59:05 UTC",
  "RemoteUrl": "https://github.com/ropensci/rtika",
  "RemoteRef": "master",
  "RemoteSha": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-06-14 08:57:42 UTC",
    "User": "root"
  },
  "Author": "Sasha Goodman [aut, cre],\nThe Apache Software Foundation [aut, cph],\nJulia Silge [rev] (Reviewed the package for rOpenSci, see\nhttps://github.com/ropensci/software-review/issues/191/),\nDavid Gohel [rev] (Reviewed the package for rOpenSci, see\nhttps://github.com/ropensci/software-review/issues/191/)",
  "MD5sum": "678a123f533d69c1dd14d026256c804f",
  "_user": "ropensci",
  "_type": "src",
  "_file": "rtika_3.2.3.tar.gz",
  "_fileid": "785e42e9d168c34785d0efb87408f3b630da04925c4e91baa5f1f11c03f79dd8",
  "_filesize": 837218,
  "_sha256": "785e42e9d168c34785d0efb87408f3b630da04925c4e91baa5f1f11c03f79dd8",
  "_created": "2026-06-14T08:57:42.000Z",
  "_published": "2026-06-14T09:00:41.163Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 81264313341,
      "time": 129,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7619408492"
    },
    {
      "job": 81264313335,
      "time": 134,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7619408839"
    },
    {
      "job": 81264313359,
      "time": 81,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7619402662"
    },
    {
      "job": 81264313340,
      "time": 77,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7619402243"
    },
    {
      "job": 81264117832,
      "time": 154,
      "config": "pkgdown",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7619387921"
    },
    {
      "job": 81264117833,
      "time": 190,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7619392377"
    },
    {
      "job": 81264313328,
      "time": 109,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7619405998"
    },
    {
      "job": 81264313334,
      "time": 95,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7619404462"
    },
    {
      "job": 81264313337,
      "time": 79,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7619402382"
    },
    {
      "job": 81264313338,
      "time": 88,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7619403629"
    }
  ],
  "_buildurl": "https://github.com/r-universe/ropensci/actions/runs/27493844660",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/ropensci/rtika",
  "_commit": {
    "id": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
    "author": "Sasha Goodman Chase <4193801+soshsquatch@users.noreply.github.com>",
    "committer": "Sasha Goodman Chase <4193801+soshsquatch@users.noreply.github.com>",
    "message": "fixed tabulapdf link\n\nsmall fix, refreshed docs with pkgdown link to tabulapdf\n",
    "time": 1760277545
  },
  "_maintainer": {
    "name": "Sasha Goodman",
    "email": "goodmansasha@gmail.com",
    "login": "soshsquatch",
    "description": "Computational social science. ",
    "uuid": 4193801
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.5.0",
      "role": "Depends"
    },
    {
      "package": "curl",
      "role": "Imports"
    },
    {
      "package": "sys",
      "version": ">= 2.1",
      "role": "Imports"
    },
    {
      "package": "stats",
      "role": "Imports"
    },
    {
      "package": "utils",
      "role": "Imports"
    },
    {
      "package": "digest",
      "role": "Imports"
    },
    {
      "package": "backports",
      "role": "Imports"
    },
    {
      "package": "jsonlite",
      "role": "Suggests"
    },
    {
      "package": "xml2",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "covr",
      "role": "Suggests"
    },
    {
      "package": "magrittr",
      "role": "Suggests"
    }
  ],
  "_owner": "ropensci",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2025-41",
      "n": 5
    }
  ],
  "_tags": [],
  "_topics": [
    "extract-metadata",
    "extract-text",
    "java",
    "parse",
    "pdf-files",
    "peer-reviewed",
    "tesseract",
    "tika"
  ],
  "_stars": 55,
  "_contributors": [
    {
      "user": "soshsquatch",
      "count": 158,
      "uuid": 4193801
    },
    {
      "user": "jeroen",
      "count": 5,
      "uuid": 216319
    },
    {
      "user": "noamross",
      "count": 4,
      "uuid": 571752
    },
    {
      "user": "juliasilge",
      "count": 1,
      "uuid": 12505835
    }
  ],
  "_userbio": {
    "uuid": 1200269,
    "type": "organization",
    "name": "rOpenSci",
    "followers": 1098,
    "description": "Tools and R Packages for Open Science"
  },
  "_downloads": {
    "count": 229,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/rtika"
  },
  "_devurl": "https://github.com/ropensci/rtika",
  "_pkgdown": "https://docs.ropensci.org/rtika/",
  "_searchresults": 13,
  "_metadata": {
    "review": {
      "id": 191,
      "status": "reviewed",
      "version": "0.1.2",
      "organization": "rOpenSci Software Review",
      "url": "https://github.com/ropensci/software-review/issues/191"
    },
    "ropensci_category": "literature"
  },
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "extra/rtika.html",
    "LICENSE",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/ropensci/rtika",
  "_realowner": "ropensci",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1.8",
      "date": "2018-05-02"
    },
    {
      "version": "1.1.19",
      "date": "2018-10-05"
    },
    {
      "version": "1.19.1",
      "date": "2018-11-15"
    },
    {
      "version": "1.20",
      "date": "2019-03-04"
    },
    {
      "version": "1.21",
      "date": "2019-06-22"
    },
    {
      "version": "1.22",
      "date": "2019-08-02"
    },
    {
      "version": "1.23",
      "date": "2019-12-13"
    },
    {
      "version": "1.24.1",
      "date": "2020-04-25"
    },
    {
      "version": "2.0.0",
      "date": "2021-08-06"
    },
    {
      "version": "2.4.1",
      "date": "2022-09-26"
    },
    {
      "version": "2.7.0",
      "date": "2023-05-05"
    },
    {
      "version": "3.2.3",
      "date": "2025-10-12"
    }
  ],
  "_exports": [
    "install_tika",
    "java",
    "tika",
    "tika_check",
    "tika_fetch",
    "tika_html",
    "tika_jar",
    "tika_json",
    "tika_json_text",
    "tika_text",
    "tika_xml"
  ],
  "_help": [
    {
      "page": "install_tika",
      "title": "Install or Update the Apache Tika 'jar'",
      "topics": [
        "install_tika"
      ]
    },
    {
      "page": "java",
      "title": "System Command to Run Java",
      "topics": [
        "java"
      ]
    },
    {
      "page": "tika",
      "title": "Main R Interface to 'Apache Tika'",
      "topics": [
        "tika"
      ]
    },
    {
      "page": "tika_check",
      "title": "Check Tika against a checksum",
      "topics": [
        "tika_check"
      ]
    },
    {
      "page": "tika_fetch",
      "title": "Fetch Files with the Content-Type Preserved in the File Extension",
      "topics": [
        "tika_fetch"
      ]
    },
    {
      "page": "tika_html",
      "title": "Get Structured XHTML",
      "topics": [
        "tika_html"
      ]
    },
    {
      "page": "tika_jar",
      "title": "Path to Apache Tika",
      "topics": [
        "tika_jar"
      ]
    },
    {
      "page": "tika_json",
      "title": "Get json Metadata and XHTML Content",
      "topics": [
        "tika_json"
      ]
    },
    {
      "page": "tika_json_text",
      "title": "Get json Metadata and Plain Text Content",
      "topics": [
        "tika_json_text"
      ]
    },
    {
      "page": "tika_text",
      "title": "Get Plain Text",
      "topics": [
        "tika_text"
      ]
    },
    {
      "page": "tika_xml",
      "title": "Get a Structured XHTML Rendition",
      "topics": [
        "tika_xml"
      ]
    }
  ],
  "_readme": "https://github.com/ropensci/rtika/raw/master/README.md",
  "_rundeps": [
    "backports",
    "curl",
    "digest",
    "sys"
  ],
  "_vignettes": [
    {
      "source": "rtika_introduction.Rmd",
      "filename": "rtika_introduction.html",
      "title": "Introduction to rtika",
      "author": "Sasha Goodman",
      "engine": "knitr::rmarkdown",
      "headings": [
        "A Digital Babel Fish",
        "Extract Plain Text",
        "Preserve Content-Type when Downloading",
        "Settings for Big Datasets",
        "Get a Structured XHTML Rendition",
        "Access Metadata in the XHTML",
        "Get Metadata in JSON Format",
        "Get Metadata from \"Container\" Documents",
        "Extending rtika",
        "References"
      ],
      "created": "2018-03-02 03:09:08",
      "modified": "2019-08-02 05:39:32",
      "commits": 19
    }
  ],
  "_score": 6.030397300856762,
  "_indexed": true,
  "_nocasepkg": "rtika",
  "_universes": [
    "ropensci",
    "soshsquatch"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "3.2.3",
      "date": "2026-06-14T08:59:58.000Z",
      "distro": "noble",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "1af5adca75413c65f8c724ec3e61329267f4ec4f21a1f33e01beee99dcdab3d7",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/27493844660"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "3.2.3",
      "date": "2026-06-14T08:59:59.000Z",
      "distro": "noble",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "23c45e45a4f0c6fe8e600bf55ffffcb86d2d8234d4b157af7927c1c8cc79d86f",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/27493844660"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "3.2.3",
      "date": "2026-06-14T08:59:10.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "1a57f6ad66134f1a2707c7d227ab341d3da9cab352d365761a480d2946ad388f",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/27493844660"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "3.2.3",
      "date": "2026-06-14T08:59:07.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "c392cd408e269063cb817c373c94d2ba509ddf90f464e60b6df8f07d3047d6cc",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/27493844660"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "3.2.3",
      "date": "2026-06-14T08:59:48.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "454e5e8965a1806ce1f4dc4984b6480d5f5051f73ca649b7fd3b9d64644b5c72",
      "status": "success",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/27493844660"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "3.2.3",
      "date": "2026-06-14T08:59:13.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "26b0dfaddb8faa8f3d6bf9f14361c28ff6facf3b470b4044a7ce1f28440c5baf",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/27493844660"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "3.2.3",
      "date": "2026-06-14T08:58:59.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "e680a781a61c40942a10874f323e23b4e530ea1f479cb919969d78620a3616b5",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/27493844660"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "3.2.3",
      "date": "2026-06-14T08:59:07.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "f1a6b93edf3a5f95334ae9eddfb9a64e67221b09d50437e9ecee6d4604aa17ca",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/27493844660"
    }
  ]
}