{
  "_id": "6a44ceab6ff6f2357551ba70",
  "Type": "Package",
  "Package": "pangoling",
  "Title": "Access to Large Language Model Predictions",
  "Version": "1.0.3",
  "Authors@R": "c(\nperson(\"Bruno\", \"Nicenboim\", , \"b.nicenboim@tilburguniversity.edu\", role = c(\"aut\", \"cre\"),\ncomment = c(ORCID = \"0000-0002-5176-3943\")),\nperson(\"Chris\", \"Emmerly\", role = \"ctb\"),\nperson(\"Giovanni\", \"Cassani\", role = \"ctb\"),\nperson(\"Lisa\", \"Levinson\", role = \"rev\"),\nperson(\"Utku\", \"Turk\", role = \"rev\")\n)",
  "Description": "Provides access to word predictability estimates using\nlarge language models (LLMs) based on 'transformer'\narchitectures via integration with the 'Hugging Face' ecosystem\n<https://huggingface.co/>. The package interfaces with\npre-trained neural networks and supports both\ncausal/auto-regressive LLMs (e.g., 'GPT-2') and\nmasked/bidirectional LLMs (e.g., 'BERT') to compute the\nprobability of words, phrases, or tokens given their linguistic\ncontext. For details on GPT-2 and causal models, see Radford et\nal. (2019)\n<https://storage.prod.researchhub.com/uploads/papers/2020/06/01/language-models.pdf>,\nfor details on BERT and masked models, see Devlin et al. (2019)\n<doi:10.48550/arXiv.1810.04805>. By enabling a straightforward\nestimation of word predictability, the package facilitates\nresearch in psycholinguistics, computational linguistics, and\nnatural language processing (NLP).",
  "License": "MIT + file LICENSE",
  "URL": "https://docs.ropensci.org/pangoling/,\nhttps://github.com/ropensci/pangoling",
  "BugReports": "https://github.com/ropensci/pangoling/issues",
  "Config/testthat/edition": "3",
  "Encoding": "UTF-8",
  "Language": "en-US",
  "LazyData": "true",
  "Roxygen": "list(markdown = TRUE)",
  "RoxygenNote": "7.3.1",
  "StagedInstall": "yes",
  "VignetteBuilder": "knitr",
  "Config/pak/sysreqs": "libpng-dev python3",
  "Repository": "https://ropensci.r-universe.dev",
  "Date/Publication": "2026-01-13 15:59:45 UTC",
  "RemoteUrl": "https://github.com/ropensci/pangoling",
  "RemoteRef": "main",
  "RemoteSha": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-07-01 08:16:06 UTC",
    "User": "root"
  },
  "Author": "Bruno Nicenboim [aut, cre] (ORCID:\n<https://orcid.org/0000-0002-5176-3943>),\nChris Emmerly [ctb],\nGiovanni Cassani [ctb],\nLisa Levinson [rev],\nUtku Turk [rev]",
  "Maintainer": "Bruno Nicenboim <b.nicenboim@tilburguniversity.edu>",
  "_user": "ropensci",
  "_type": "src",
  "_file": "pangoling_1.0.3.tar.gz",
  "_fileid": "https://r2.ropensci.org/dd785654033426e9214ce0e4d82352aeabd2e5fa62bf1016228b97540da4ce90",
  "_filesize": 999302,
  "_sha256": "dd785654033426e9214ce0e4d82352aeabd2e5fa62bf1016228b97540da4ce90",
  "_expires": "2026-10-09T08:24:10.000Z",
  "_created": "2026-07-01T08:16:06.000Z",
  "_published": "2026-07-01T08:24:11.185Z",
  "_jobs": [
    {
      "job": 84486391140,
      "time": 190,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "8004099099"
    },
    {
      "job": 84486391150,
      "time": 189,
      "config": "linux-release-x86_64",
      "r": "4.6.1",
      "check": "OK",
      "artifact": "8004098517"
    },
    {
      "job": 84486391173,
      "time": 137,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "8004137397"
    },
    {
      "job": 84486391135,
      "time": 187,
      "config": "macos-release-arm64",
      "r": "4.6.1",
      "check": "OK",
      "artifact": "8004153640"
    },
    {
      "job": 84485695541,
      "time": 163,
      "config": "pkgdown",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "8004001468"
    },
    {
      "job": 84485695561,
      "time": 232,
      "config": "source",
      "r": "4.6.1",
      "check": "OK",
      "artifact": "8004026143"
    },
    {
      "job": 84486391146,
      "time": 188,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "8004098264"
    },
    {
      "job": 84486391175,
      "time": 202,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "8004102231"
    },
    {
      "job": 84486391182,
      "time": 195,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "8004099448"
    },
    {
      "job": 84486391156,
      "time": 212,
      "config": "windows-release",
      "r": "4.6.1",
      "check": "OK",
      "artifact": "8004105143"
    }
  ],
  "_host": "GitHub-Actions",
  "_buildurl": "https://github.com/r-universe/ropensci/actions/runs/28503336880",
  "_status": "success",
  "_upstream": "https://github.com/ropensci/pangoling",
  "_commit": {
    "id": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
    "author": "Bruno Nicenboim <bruno.nicenboim@gmail.com>",
    "committer": "GitHub <noreply@github.com>",
    "message": "fix DOI",
    "time": 1768319985
  },
  "_maintainer": {
    "name": "Bruno Nicenboim",
    "email": "b.nicenboim@tilburguniversity.edu",
    "login": "bnicenboim",
    "orcid": "0000-0002-5176-3943",
    "twitter": "@bruno_nicenboim",
    "description": "",
    "uuid": 5982330
  },
  "_distro": "resolute",
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 4.1.0",
      "role": "Depends"
    },
    {
      "package": "cachem",
      "role": "Imports"
    },
    {
      "package": "data.table",
      "role": "Imports"
    },
    {
      "package": "memoise",
      "role": "Imports"
    },
    {
      "package": "reticulate",
      "role": "Imports"
    },
    {
      "package": "rstudioapi",
      "role": "Imports"
    },
    {
      "package": "stats",
      "role": "Imports"
    },
    {
      "package": "tidyselect",
      "role": "Imports"
    },
    {
      "package": "tidytable",
      "version": ">= 0.7.2",
      "role": "Imports"
    },
    {
      "package": "utils",
      "role": "Imports"
    },
    {
      "package": "brms",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "parallel",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "spelling",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    },
    {
      "package": "tictoc",
      "role": "Suggests"
    },
    {
      "package": "covr",
      "role": "Suggests"
    }
  ],
  "_owner": "ropensci",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2026-03",
      "n": 1
    }
  ],
  "_tags": [],
  "_topics": [
    "nlp",
    "psycholinguistics",
    "transformers"
  ],
  "_stars": 12,
  "_contributors": [
    {
      "user": "bnicenboim",
      "count": 269,
      "uuid": 5982330
    }
  ],
  "_userbio": {
    "uuid": 1200269,
    "type": "organization",
    "name": "rOpenSci",
    "followers": 1106,
    "description": "Tools and R Packages for Open Science"
  },
  "_downloads": {
    "count": 568,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/pangoling"
  },
  "_devurl": "https://github.com/ropensci/pangoling",
  "_pkgdown": "https://docs.ropensci.org/pangoling/",
  "_searchresults": 16,
  "_metadata": {
    "review": {
      "id": 575,
      "status": "reviewed",
      "version": "0.0.0.9005",
      "organization": "rOpenSci Software Review",
      "url": "https://github.com/ropensci/software-review/issues/575"
    },
    "ropensci_category": "scalereprod"
  },
  "_rbuild": "4.6.1",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/pangoling.html",
    "extra/readme.html",
    "extra/readme.md",
    "LICENSE",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/ropensci/pangoling",
  "_realowner": "ropensci",
  "_cranurl": true,
  "_releases": [
    {
      "version": "1.0.3",
      "date": "2025-04-07"
    }
  ],
  "_exports": [
    "causal_config",
    "causal_lp",
    "causal_lp_mats",
    "causal_next_tokens_pred_tbl",
    "causal_next_tokens_tbl",
    "causal_pred_mats",
    "causal_preload",
    "causal_targets_pred",
    "causal_tokens_lp_tbl",
    "causal_tokens_pred_lst",
    "causal_words_pred",
    "install_py_pangoling",
    "installed_py_pangoling",
    "masked_config",
    "masked_lp",
    "masked_preload",
    "masked_targets_pred",
    "masked_tokens_pred_tbl",
    "masked_tokens_tbl",
    "ntokens",
    "perplexity_calc",
    "set_cache_folder",
    "tokenize_lst",
    "transformer_vocab"
  ],
  "_datasets": [
    {
      "name": "df_jaeger14",
      "title": "Self-Paced Reading Dataset on Chinese Relative Clauses",
      "object": "df_jaeger14",
      "class": [
        "tidytable",
        "tbl",
        "data.table",
        "data.frame"
      ],
      "fields": [
        "subject",
        "item",
        "cond",
        "word",
        "wordn",
        "rt",
        "region",
        "question",
        "accuracy",
        "correct_answer",
        "question_type",
        "experiment",
        "list",
        "sentence"
      ],
      "rows": 8624,
      "table": true,
      "tojson": true
    },
    {
      "name": "df_sent",
      "title": "Example dataset: Two word-by-word sentences",
      "object": "df_sent",
      "class": [
        "tidytable",
        "tbl",
        "data.table",
        "data.frame"
      ],
      "fields": [
        "sent_n",
        "word"
      ],
      "rows": 15,
      "table": true,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "causal_config",
      "title": "Returns the configuration of a causal model",
      "concept": [
        "causal model helper functions"
      ],
      "topics": [
        "causal_config"
      ]
    },
    {
      "page": "causal_next_tokens_pred_tbl",
      "title": "Generate next tokens after a context and their predictability using a causal transformer model",
      "concept": [
        "causal model functions"
      ],
      "topics": [
        "causal_next_tokens_pred_tbl"
      ]
    },
    {
      "page": "causal_pred_mats",
      "title": "Generate a list of predictability matrices using a causal transformer model",
      "concept": [
        "causal model functions"
      ],
      "topics": [
        "causal_pred_mats"
      ]
    },
    {
      "page": "causal_preload",
      "title": "Preloads a causal language model",
      "concept": [
        "causal model helper functions"
      ],
      "topics": [
        "causal_preload"
      ]
    },
    {
      "page": "causal_predictability",
      "title": "Compute predictability using a causal transformer model",
      "concept": [
        "causal model functions"
      ],
      "topics": [
        "causal_targets_pred",
        "causal_tokens_pred_lst",
        "causal_words_pred"
      ]
    },
    {
      "page": "df_jaeger14",
      "title": "Self-Paced Reading Dataset on Chinese Relative Clauses",
      "concept": [
        "datasets"
      ],
      "topics": [
        "df_jaeger14"
      ]
    },
    {
      "page": "df_sent",
      "title": "Example dataset: Two word-by-word sentences",
      "concept": [
        "datasets"
      ],
      "topics": [
        "df_sent"
      ]
    },
    {
      "page": "install_py_pangoling",
      "title": "Install the Python packages needed for 'pangoling'",
      "concept": [
        "helper functions"
      ],
      "topics": [
        "install_py_pangoling"
      ]
    },
    {
      "page": "installed_py_pangoling",
      "title": "Check if the required Python dependencies for 'pangoling' are installed",
      "concept": [
        "helper functions"
      ],
      "topics": [
        "installed_py_pangoling"
      ]
    },
    {
      "page": "masked_config",
      "title": "Returns the configuration of a masked model",
      "concept": [
        "masked model helper functions"
      ],
      "topics": [
        "masked_config"
      ]
    },
    {
      "page": "masked_preload",
      "title": "Preloads a masked language model",
      "concept": [
        "masked model helper functions"
      ],
      "topics": [
        "masked_preload"
      ]
    },
    {
      "page": "masked_targets_pred",
      "title": "Get the predictability of a target word (or phrase) given a left and right context",
      "concept": [
        "masked model functions"
      ],
      "topics": [
        "masked_targets_pred"
      ]
    },
    {
      "page": "masked_tokens_pred_tbl",
      "title": "Get the possible tokens and their log probabilities for each mask in a sentence",
      "concept": [
        "masked model functions"
      ],
      "topics": [
        "masked_tokens_pred_tbl"
      ]
    },
    {
      "page": "ntokens",
      "title": "The number of tokens in a string or vector of strings",
      "concept": [
        "token-related functions"
      ],
      "topics": [
        "ntokens"
      ]
    },
    {
      "page": "perplexity_calc",
      "title": "Calculates perplexity",
      "concept": [
        "general functions"
      ],
      "topics": [
        "perplexity_calc"
      ]
    },
    {
      "page": "set_cache_folder",
      "title": "Set cache folder for HuggingFace transformers",
      "concept": [
        "helper functions"
      ],
      "topics": [
        "set_cache_folder"
      ]
    },
    {
      "page": "tokenize_lst",
      "title": "Tokenize an input",
      "concept": [
        "token-related functions"
      ],
      "topics": [
        "tokenize_lst"
      ]
    },
    {
      "page": "transformer_vocab",
      "title": "Returns the vocabulary of a model",
      "concept": [
        "token-related functions"
      ],
      "topics": [
        "transformer_vocab"
      ]
    }
  ],
  "_pkglogo": "https://github.com/ropensci/pangoling/raw/main/man/figures/logo.png",
  "_readme": "https://github.com/ropensci/pangoling/raw/main/README.md",
  "_rundeps": [
    "cachem",
    "cli",
    "data.table",
    "fastmap",
    "glue",
    "here",
    "jsonlite",
    "lattice",
    "lifecycle",
    "magrittr",
    "Matrix",
    "memoise",
    "pillar",
    "png",
    "rappdirs",
    "Rcpp",
    "RcppTOML",
    "reticulate",
    "rlang",
    "rprojroot",
    "rstudioapi",
    "tidyselect",
    "tidytable",
    "utf8",
    "vctrs",
    "withr"
  ],
  "_vignettes": [
    {
      "source": "troubleshooting.Rmd",
      "filename": "troubleshooting.html",
      "title": "Troubleshooting the use of Python in R",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Module not found error in Rstudio",
        "HTTPSConnectionPool error"
      ],
      "created": "2025-03-11 08:53:06",
      "modified": "2025-03-11 08:53:06",
      "commits": 1
    },
    {
      "source": "intro-bert.Rmd",
      "filename": "intro-bert.html",
      "title": "Using a Bert model to get the predictability of words in their context",
      "engine": "knitr::rmarkdown",
      "headings": [
        "References"
      ],
      "created": "2025-03-11 08:53:06",
      "modified": "2025-03-11 08:53:06",
      "commits": 1
    },
    {
      "source": "intro-gpt2.Rmd",
      "filename": "intro-gpt2.html",
      "title": "Using a GPT2 transformer model to get word predictability",
      "engine": "knitr::rmarkdown",
      "headings": [
        "References"
      ],
      "created": "2025-03-11 08:53:06",
      "modified": "2025-03-11 14:01:55",
      "commits": 2
    },
    {
      "source": "example.Rmd",
      "filename": "example.html",
      "title": "Worked-out example: Surprisal from a causal (GPT) model as a cognitive processing bottleneck in reading",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Data analysis",
        "0. Preprocessing",
        "1. Add surprisal values to the dataset",
        "2. Analyze the dataset with a Bayesian hierarchical model",
        "Conclusion",
        "References"
      ],
      "created": "2025-03-11 08:53:06",
      "modified": "2025-03-11 20:22:12",
      "commits": 2
    }
  ],
  "_score": 5.584331224367531,
  "_indexed": true,
  "_nocasepkg": "pangoling",
  "_universes": [
    "ropensci",
    "bnicenboim"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "1.0.3",
      "date": "2026-07-01T08:19:12.000Z",
      "distro": "resolute",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "https://r2.ropensci.org/7deaf1b8c481bad62b4d572fae68a7363e7b96d54a430ddb820cfd7a4a7b8803",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/28503336880"
    },
    {
      "r": "4.6.1",
      "os": "linux",
      "version": "1.0.3",
      "date": "2026-07-01T08:19:11.000Z",
      "distro": "resolute",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "https://r2.ropensci.org/9b4c57abb17cd1009a7c473fc9e266db73c85252cc4dace71a5ec4f60e47d4a8",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/28503336880"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "1.0.3",
      "date": "2026-07-01T08:20:52.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "https://r2.ropensci.org/0a5db1dca203f0fc949721e7581381abbdef06b78214cbb638b900e1c60e935e",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/28503336880"
    },
    {
      "r": "4.6.1",
      "os": "mac",
      "version": "1.0.3",
      "date": "2026-07-01T08:21:20.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "https://r2.ropensci.org/59154670b6bb96eadb7e1af25e084c43e0f6703ddc86bea82a448fcdd94cb1a2",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/28503336880"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "1.0.3",
      "date": "2026-07-01T08:19:49.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "https://r2.ropensci.org/03cd34a6ad2f385d0310b75c40a3c1535cc0f32ae1205cc80fa83bef7924b5aa",
      "status": "success",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/28503336880"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "1.0.3",
      "date": "2026-07-01T08:17:52.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "https://r2.ropensci.org/17720b2167d7f9f5cc8579f0dced76264bdd564be431d70081f2ff6a0498c8e1",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/28503336880"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "1.0.3",
      "date": "2026-07-01T08:17:50.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "https://r2.ropensci.org/18de7b9c5f285957b2aff0b4df4f8a854c132df6de29345b61a9abb8d6d33cd1",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/28503336880"
    },
    {
      "r": "4.6.1",
      "os": "win",
      "version": "1.0.3",
      "date": "2026-07-01T08:17:58.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "https://r2.ropensci.org/5f859c92efd1ee8a59e31dbd63f68f05740093f6fab3be530c662fad274f68c3",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/28503336880"
    }
  ]
}