{
  "_id": "69dcaa86c5259ad4cd886389",
  "Type": "Package",
  "Package": "pangoling",
  "Title": "Access to Large Language Model Predictions",
  "Version": "1.0.3",
  "Authors@R": "c(\nperson(\"Bruno\", \"Nicenboim\", , \"b.nicenboim@tilburguniversity.edu\", role = c(\"aut\", \"cre\"),\ncomment = c(ORCID = \"0000-0002-5176-3943\")),\nperson(\"Chris\", \"Emmerly\", role = \"ctb\"),\nperson(\"Giovanni\", \"Cassani\", role = \"ctb\"),\nperson(\"Lisa\", \"Levinson\", role = \"rev\"),\nperson(\"Utku\", \"Turk\", role = \"rev\")\n)",
  "Description": "Provides access to word predictability estimates using\nlarge language models (LLMs) based on 'transformer'\narchitectures via integration with the 'Hugging Face' ecosystem\n<https://huggingface.co/>. The package interfaces with\npre-trained neural networks and supports both\ncausal/auto-regressive LLMs (e.g., 'GPT-2') and\nmasked/bidirectional LLMs (e.g., 'BERT') to compute the\nprobability of words, phrases, or tokens given their linguistic\ncontext. For details on GPT-2 and causal models, see Radford et\nal. (2019)\n<https://storage.prod.researchhub.com/uploads/papers/2020/06/01/language-models.pdf>,\nfor details on BERT and masked models, see Devlin et al. (2019)\n<doi:10.48550/arXiv.1810.04805>. By enabling a straightforward\nestimation of word predictability, the package facilitates\nresearch in psycholinguistics, computational linguistics, and\nnatural language processing (NLP).",
  "License": "MIT + file LICENSE",
  "URL": "https://docs.ropensci.org/pangoling/,\nhttps://github.com/ropensci/pangoling",
  "BugReports": "https://github.com/ropensci/pangoling/issues",
  "Config/testthat/edition": "3",
  "Encoding": "UTF-8",
  "Language": "en-US",
  "LazyData": "true",
  "Roxygen": "list(markdown = TRUE)",
  "RoxygenNote": "7.3.1",
  "StagedInstall": "yes",
  "VignetteBuilder": "knitr",
  "Config/pak/sysreqs": "libpng-dev python3",
  "Repository": "https://ropensci.r-universe.dev",
  "Date/Publication": "2026-01-13 15:59:45 UTC",
  "RemoteUrl": "https://github.com/ropensci/pangoling",
  "RemoteRef": "main",
  "RemoteSha": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-04-13 08:29:44 UTC",
    "User": "root"
  },
  "Author": "Bruno Nicenboim [aut, cre] (ORCID:\n<https://orcid.org/0000-0002-5176-3943>),\nChris Emmerly [ctb],\nGiovanni Cassani [ctb],\nLisa Levinson [rev],\nUtku Turk [rev]",
  "Maintainer": "Bruno Nicenboim <b.nicenboim@tilburguniversity.edu>",
  "MD5sum": "80648418fb1409a9b21dcff6d8fdab1e",
  "_user": "ropensci",
  "_type": "src",
  "_file": "pangoling_1.0.3.tar.gz",
  "_fileid": "3ff166e98ecf64f5dd069bcf41fc07672959d0f71e51716578bb575b3cdb2dbe",
  "_filesize": 1000046,
  "_sha256": "3ff166e98ecf64f5dd069bcf41fc07672959d0f71e51716578bb575b3cdb2dbe",
  "_created": "2026-04-13T08:29:44.000Z",
  "_published": "2026-04-13T08:34:14.495Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 71044831168,
      "time": 200,
      "config": "linux-devel-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6401388321"
    },
    {
      "job": 71044831150,
      "time": 198,
      "config": "linux-release-x86_64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6401387670"
    },
    {
      "job": 71044831215,
      "time": 122,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6401377328"
    },
    {
      "job": 71044831193,
      "time": 163,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6401389572"
    },
    {
      "job": 71044294903,
      "time": 196,
      "config": "pkgdown",
      "r": "4.5.2",
      "check": "OK",
      "artifact": "6401322114"
    },
    {
      "job": 71044294894,
      "time": 242,
      "config": "source",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6401334480"
    },
    {
      "job": 71044831135,
      "time": 153,
      "config": "wasm-release",
      "r": "4.5.1",
      "check": "OK",
      "artifact": "6401375727"
    },
    {
      "job": 71044831172,
      "time": 215,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6401391979"
    },
    {
      "job": 71044831212,
      "time": 225,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6401394779"
    },
    {
      "job": 71044831179,
      "time": 205,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6401389546"
    }
  ],
  "_buildurl": "https://github.com/r-universe/ropensci/actions/runs/24333389209",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/ropensci/pangoling",
  "_commit": {
    "id": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
    "author": "Bruno Nicenboim <bruno.nicenboim@gmail.com>",
    "committer": "GitHub <noreply@github.com>",
    "message": "fix DOI",
    "time": 1768319985
  },
  "_maintainer": {
    "name": "Bruno Nicenboim",
    "email": "b.nicenboim@tilburguniversity.edu",
    "login": "bnicenboim",
    "orcid": "0000-0002-5176-3943",
    "twitter": "@bruno_nicenboim",
    "uuid": 5982330
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 4.1.0",
      "role": "Depends"
    },
    {
      "package": "cachem",
      "role": "Imports"
    },
    {
      "package": "data.table",
      "role": "Imports"
    },
    {
      "package": "memoise",
      "role": "Imports"
    },
    {
      "package": "reticulate",
      "role": "Imports"
    },
    {
      "package": "rstudioapi",
      "role": "Imports"
    },
    {
      "package": "stats",
      "role": "Imports"
    },
    {
      "package": "tidyselect",
      "role": "Imports"
    },
    {
      "package": "tidytable",
      "version": ">= 0.7.2",
      "role": "Imports"
    },
    {
      "package": "utils",
      "role": "Imports"
    },
    {
      "package": "brms",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "parallel",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "spelling",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    },
    {
      "package": "tictoc",
      "role": "Suggests"
    },
    {
      "package": "covr",
      "role": "Suggests"
    }
  ],
  "_owner": "ropensci",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2025-15",
      "n": 2
    },
    {
      "week": "2026-03",
      "n": 1
    }
  ],
  "_tags": [],
  "_topics": [
    "nlp",
    "psycholinguistics",
    "transformers"
  ],
  "_stars": 12,
  "_contributors": [
    {
      "user": "bnicenboim",
      "count": 269,
      "uuid": 5982330
    }
  ],
  "_userbio": {
    "uuid": 1200269,
    "type": "organization",
    "name": "rOpenSci",
    "description": "Tools and R Packages for Open Science"
  },
  "_downloads": {
    "count": 678,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/pangoling"
  },
  "_devurl": "https://github.com/ropensci/pangoling",
  "_pkgdown": "https://docs.ropensci.org/pangoling/",
  "_searchresults": 16,
  "_metadata": {
    "review": {
      "id": 575,
      "status": "reviewed",
      "version": "0.0.0.9005",
      "organization": "rOpenSci Software Review",
      "url": "https://github.com/ropensci/software-review/issues/575"
    },
    "ropensci_category": "scalereprod"
  },
  "_rbuild": "4.5.3",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/pangoling.html",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/ropensci/pangoling",
  "_realowner": "ropensci",
  "_cranurl": true,
  "_releases": [
    {
      "version": "1.0.3",
      "date": "2025-04-07"
    }
  ],
  "_exports": [
    "causal_config",
    "causal_lp",
    "causal_lp_mats",
    "causal_next_tokens_pred_tbl",
    "causal_next_tokens_tbl",
    "causal_pred_mats",
    "causal_preload",
    "causal_targets_pred",
    "causal_tokens_lp_tbl",
    "causal_tokens_pred_lst",
    "causal_words_pred",
    "install_py_pangoling",
    "installed_py_pangoling",
    "masked_config",
    "masked_lp",
    "masked_preload",
    "masked_targets_pred",
    "masked_tokens_pred_tbl",
    "masked_tokens_tbl",
    "ntokens",
    "perplexity_calc",
    "set_cache_folder",
    "tokenize_lst",
    "transformer_vocab"
  ],
  "_datasets": [
    {
      "name": "df_jaeger14",
      "title": "Self-Paced Reading Dataset on Chinese Relative Clauses",
      "object": "df_jaeger14",
      "class": [
        "tidytable",
        "tbl",
        "data.table",
        "data.frame"
      ],
      "fields": [
        "subject",
        "item",
        "cond",
        "word",
        "wordn",
        "rt",
        "region",
        "question",
        "accuracy",
        "correct_answer",
        "question_type",
        "experiment",
        "list",
        "sentence"
      ],
      "rows": 8624,
      "table": true,
      "tojson": true
    },
    {
      "name": "df_sent",
      "title": "Example dataset: Two word-by-word sentences",
      "object": "df_sent",
      "class": [
        "tidytable",
        "tbl",
        "data.table",
        "data.frame"
      ],
      "fields": [
        "sent_n",
        "word"
      ],
      "rows": 15,
      "table": true,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "causal_config",
      "title": "Returns the configuration of a causal model",
      "concept": [
        "causal model helper functions"
      ],
      "topics": [
        "causal_config"
      ]
    },
    {
      "page": "causal_next_tokens_pred_tbl",
      "title": "Generate next tokens after a context and their predictability using a causal transformer model",
      "concept": [
        "causal model functions"
      ],
      "topics": [
        "causal_next_tokens_pred_tbl"
      ]
    },
    {
      "page": "causal_pred_mats",
      "title": "Generate a list of predictability matrices using a causal transformer model",
      "concept": [
        "causal model functions"
      ],
      "topics": [
        "causal_pred_mats"
      ]
    },
    {
      "page": "causal_preload",
      "title": "Preloads a causal language model",
      "concept": [
        "causal model helper functions"
      ],
      "topics": [
        "causal_preload"
      ]
    },
    {
      "page": "causal_predictability",
      "title": "Compute predictability using a causal transformer model",
      "concept": [
        "causal model functions"
      ],
      "topics": [
        "causal_targets_pred",
        "causal_tokens_pred_lst",
        "causal_words_pred"
      ]
    },
    {
      "page": "df_jaeger14",
      "title": "Self-Paced Reading Dataset on Chinese Relative Clauses",
      "concept": [
        "datasets"
      ],
      "topics": [
        "df_jaeger14"
      ]
    },
    {
      "page": "df_sent",
      "title": "Example dataset: Two word-by-word sentences",
      "concept": [
        "datasets"
      ],
      "topics": [
        "df_sent"
      ]
    },
    {
      "page": "install_py_pangoling",
      "title": "Install the Python packages needed for 'pangoling'",
      "concept": [
        "helper functions"
      ],
      "topics": [
        "install_py_pangoling"
      ]
    },
    {
      "page": "installed_py_pangoling",
      "title": "Check if the required Python dependencies for 'pangoling' are installed",
      "concept": [
        "helper functions"
      ],
      "topics": [
        "installed_py_pangoling"
      ]
    },
    {
      "page": "masked_config",
      "title": "Returns the configuration of a masked model",
      "concept": [
        "masked model helper functions"
      ],
      "topics": [
        "masked_config"
      ]
    },
    {
      "page": "masked_preload",
      "title": "Preloads a masked language model",
      "concept": [
        "masked model helper functions"
      ],
      "topics": [
        "masked_preload"
      ]
    },
    {
      "page": "masked_targets_pred",
      "title": "Get the predictability of a target word (or phrase) given a left and right context",
      "concept": [
        "masked model functions"
      ],
      "topics": [
        "masked_targets_pred"
      ]
    },
    {
      "page": "masked_tokens_pred_tbl",
      "title": "Get the possible tokens and their log probabilities for each mask in a sentence",
      "concept": [
        "masked model functions"
      ],
      "topics": [
        "masked_tokens_pred_tbl"
      ]
    },
    {
      "page": "ntokens",
      "title": "The number of tokens in a string or vector of strings",
      "concept": [
        "token-related functions"
      ],
      "topics": [
        "ntokens"
      ]
    },
    {
      "page": "perplexity_calc",
      "title": "Calculates perplexity",
      "concept": [
        "general functions"
      ],
      "topics": [
        "perplexity_calc"
      ]
    },
    {
      "page": "set_cache_folder",
      "title": "Set cache folder for HuggingFace transformers",
      "concept": [
        "helper functions"
      ],
      "topics": [
        "set_cache_folder"
      ]
    },
    {
      "page": "tokenize_lst",
      "title": "Tokenize an input",
      "concept": [
        "token-related functions"
      ],
      "topics": [
        "tokenize_lst"
      ]
    },
    {
      "page": "transformer_vocab",
      "title": "Returns the vocabulary of a model",
      "concept": [
        "token-related functions"
      ],
      "topics": [
        "transformer_vocab"
      ]
    }
  ],
  "_pkglogo": "https://github.com/ropensci/pangoling/raw/main/man/figures/logo.png",
  "_readme": "https://github.com/ropensci/pangoling/raw/main/README.md",
  "_rundeps": [
    "cachem",
    "cli",
    "data.table",
    "fastmap",
    "glue",
    "here",
    "jsonlite",
    "lattice",
    "lifecycle",
    "magrittr",
    "Matrix",
    "memoise",
    "pillar",
    "png",
    "rappdirs",
    "Rcpp",
    "RcppTOML",
    "reticulate",
    "rlang",
    "rprojroot",
    "rstudioapi",
    "tidyselect",
    "tidytable",
    "utf8",
    "vctrs",
    "withr"
  ],
  "_vignettes": [
    {
      "source": "troubleshooting.Rmd",
      "filename": "troubleshooting.html",
      "title": "Troubleshooting the use of Python in R",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Module not found error in Rstudio",
        "HTTPSConnectionPool error"
      ],
      "created": "2025-03-11 08:53:06",
      "modified": "2025-03-11 08:53:06",
      "commits": 1
    },
    {
      "source": "intro-bert.Rmd",
      "filename": "intro-bert.html",
      "title": "Using a Bert model to get the predictability of words in their context",
      "engine": "knitr::rmarkdown",
      "headings": [
        "References"
      ],
      "created": "2025-03-11 08:53:06",
      "modified": "2025-03-11 08:53:06",
      "commits": 1
    },
    {
      "source": "intro-gpt2.Rmd",
      "filename": "intro-gpt2.html",
      "title": "Using a GPT2 transformer model to get word predictability",
      "engine": "knitr::rmarkdown",
      "headings": [
        "References"
      ],
      "created": "2025-03-11 08:53:06",
      "modified": "2025-03-11 14:01:55",
      "commits": 2
    },
    {
      "source": "example.Rmd",
      "filename": "example.html",
      "title": "Worked-out example: Surprisal from a causal (GPT) model as a cognitive processing bottleneck in reading",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Data analysis",
        "0. Preprocessing",
        "1. Add surprisal values to the dataset",
        "2. Analyze the dataset with a Bayesian hierarchical model",
        "Conclusion",
        "References"
      ],
      "created": "2025-03-11 08:53:06",
      "modified": "2025-03-11 20:22:12",
      "commits": 2
    }
  ],
  "_score": 5.885361220031513,
  "_indexed": true,
  "_nocasepkg": "pangoling",
  "_universes": [
    "ropensci",
    "bnicenboim"
  ],
  "_binaries": [
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "1.0.3",
      "date": "2026-04-13T08:32:44.000Z",
      "distro": "noble",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "45a02e9568683c5706a8f094f98c5966958fb5e8281f0033171014dd923b7648",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24333389209"
    },
    {
      "r": "4.5.3",
      "os": "linux",
      "version": "1.0.3",
      "date": "2026-04-13T08:32:40.000Z",
      "distro": "noble",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "02508fdc74695a2642b8d0b5b60c1995af0f9ab110c2c293d4267d8821728a8e",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24333389209"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "1.0.3",
      "date": "2026-04-13T08:32:12.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "e85093c579a50588a97d01d5fa93699f485e69988adf8dabeff0ce3892d5d1c7",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24333389209"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "1.0.3",
      "date": "2026-04-13T08:32:49.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "eff38ca556ab1913118c43350f7e7f8d6424337cd003907ec0cdeb9eb7620663",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24333389209"
    },
    {
      "r": "4.5.1",
      "os": "wasm",
      "version": "1.0.3",
      "date": "2026-04-13T08:32:37.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "c2f7a8c1cd4ad4b3dfe5edffa9cf18c3071efc6164031b022f5338df36b5f959",
      "status": "success",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24333389209"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "1.0.3",
      "date": "2026-04-13T08:31:32.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "a0a3bc807e455efb7e11a97eee3ae4eb25869f33ec97e93122024ae9ae461ea4",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24333389209"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "1.0.3",
      "date": "2026-04-13T08:31:27.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "f6218a0fd735884a6e43912f21fa2cd9bf08644809bd70efcd93791bb600548f",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24333389209"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "1.0.3",
      "date": "2026-04-13T08:31:25.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "950c99dae8c7d5f0f5b16e3ddca203e1284e14023fd6949eee110c19089a5750",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/24333389209"
    }
  ]
}