aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAditya <bluenerd@protonmail.com>2025-01-13 12:44:28 +0530
committerAditya <bluenerd@protonmail.com>2025-01-13 12:44:28 +0530
commit6108457219d98a54c93b10ee3f91fde23df82cc5 (patch)
treec53b0b4c605308503280d91c225de078b96fb955
add files
-rw-r--r--.envrc1
-rw-r--r--flake.lock330
-rw-r--r--flake.nix54
-rw-r--r--main.tex252
-rw-r--r--refs.bib2016
5 files changed, 2653 insertions, 0 deletions
diff --git a/.envrc b/.envrc
new file mode 100644
index 0000000..3550a30
--- /dev/null
+++ b/.envrc
@@ -0,0 +1 @@
+use flake
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..def2532
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,330 @@
+{
+ "nodes": {
+ "devshell": {
+ "inputs": {
+ "nixpkgs": [
+ "nixvim",
+ "nixpkgs"
+ ]
+ },
+ "locked": {
+ "lastModified": 1722113426,
+ "narHash": "sha256-Yo/3loq572A8Su6aY5GP56knpuKYRvM2a1meP9oJZCw=",
+ "owner": "numtide",
+ "repo": "devshell",
+ "rev": "67cce7359e4cd3c45296fb4aaf6a19e2a9c757ae",
+ "type": "github"
+ },
+ "original": {
+ "owner": "numtide",
+ "repo": "devshell",
+ "type": "github"
+ }
+ },
+ "flake-compat": {
+ "locked": {
+ "lastModified": 1696426674,
+ "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
+ "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
+ "revCount": 57,
+ "type": "tarball",
+ "url": "https://api.flakehub.com/f/pinned/edolstra/flake-compat/1.0.1/018afb31-abd1-7bff-a5e4-cff7e18efb7a/source.tar.gz"
+ },
+ "original": {
+ "type": "tarball",
+ "url": "https://flakehub.com/f/edolstra/flake-compat/1.tar.gz"
+ }
+ },
+ "flake-parts": {
+ "inputs": {
+ "nixpkgs-lib": [
+ "nixvim",
+ "nixpkgs"
+ ]
+ },
+ "locked": {
+ "lastModified": 1722555600,
+ "narHash": "sha256-XOQkdLafnb/p9ij77byFQjDf5m5QYl9b2REiVClC+x4=",
+ "owner": "hercules-ci",
+ "repo": "flake-parts",
+ "rev": "8471fe90ad337a8074e957b69ca4d0089218391d",
+ "type": "github"
+ },
+ "original": {
+ "owner": "hercules-ci",
+ "repo": "flake-parts",
+ "type": "github"
+ }
+ },
+ "flake-utils": {
+ "inputs": {
+ "systems": "systems"
+ },
+ "locked": {
+ "lastModified": 1687709756,
+ "narHash": "sha256-Y5wKlQSkgEK2weWdOu4J3riRd+kV/VCgHsqLNTTWQ/0=",
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "rev": "dbabf0ca0c0c4bce6ea5eaf65af5cb694d2082c7",
+ "type": "github"
+ },
+ "original": {
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "type": "github"
+ }
+ },
+ "flake-utils_2": {
+ "inputs": {
+ "systems": "systems_2"
+ },
+ "locked": {
+ "lastModified": 1710146030,
+ "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
+ "type": "github"
+ },
+ "original": {
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "type": "github"
+ }
+ },
+ "git-hooks": {
+ "inputs": {
+ "flake-compat": [
+ "nixvim",
+ "flake-compat"
+ ],
+ "gitignore": "gitignore",
+ "nixpkgs": [
+ "nixvim",
+ "nixpkgs"
+ ],
+ "nixpkgs-stable": [
+ "nixvim",
+ "nixpkgs"
+ ]
+ },
+ "locked": {
+ "lastModified": 1724857454,
+ "narHash": "sha256-Qyl9Q4QMTLZnnBb/8OuQ9LSkzWjBU1T5l5zIzTxkkhk=",
+ "owner": "cachix",
+ "repo": "git-hooks.nix",
+ "rev": "4509ca64f1084e73bc7a721b20c669a8d4c5ebe6",
+ "type": "github"
+ },
+ "original": {
+ "owner": "cachix",
+ "repo": "git-hooks.nix",
+ "type": "github"
+ }
+ },
+ "gitignore": {
+ "inputs": {
+ "nixpkgs": [
+ "nixvim",
+ "git-hooks",
+ "nixpkgs"
+ ]
+ },
+ "locked": {
+ "lastModified": 1709087332,
+ "narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=",
+ "owner": "hercules-ci",
+ "repo": "gitignore.nix",
+ "rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
+ "type": "github"
+ },
+ "original": {
+ "owner": "hercules-ci",
+ "repo": "gitignore.nix",
+ "type": "github"
+ }
+ },
+ "home-manager": {
+ "inputs": {
+ "nixpkgs": [
+ "nixvim",
+ "nixpkgs"
+ ]
+ },
+ "locked": {
+ "lastModified": 1724435763,
+ "narHash": "sha256-UNky3lJNGQtUEXT2OY8gMxejakSWPTfWKvpFkpFlAfM=",
+ "owner": "nix-community",
+ "repo": "home-manager",
+ "rev": "c2cd2a52e02f1dfa1c88f95abeb89298d46023be",
+ "type": "github"
+ },
+ "original": {
+ "owner": "nix-community",
+ "repo": "home-manager",
+ "type": "github"
+ }
+ },
+ "nix-darwin": {
+ "inputs": {
+ "nixpkgs": [
+ "nixvim",
+ "nixpkgs"
+ ]
+ },
+ "locked": {
+ "lastModified": 1724561770,
+ "narHash": "sha256-zv8C9RNa86CIpyHwPIVO/k+5TfM8ZbjGwOOpTe1grls=",
+ "owner": "lnl7",
+ "repo": "nix-darwin",
+ "rev": "ac5694a0b855a981e81b4d9f14052e3ff46ca39e",
+ "type": "github"
+ },
+ "original": {
+ "owner": "lnl7",
+ "repo": "nix-darwin",
+ "type": "github"
+ }
+ },
+ "nixpkgs": {
+ "locked": {
+ "lastModified": 1709961763,
+ "narHash": "sha256-6H95HGJHhEZtyYA3rIQpvamMKAGoa8Yh2rFV29QnuGw=",
+ "owner": "NixOS",
+ "repo": "nixpkgs",
+ "rev": "3030f185ba6a4bf4f18b87f345f104e6a6961f34",
+ "type": "github"
+ },
+ "original": {
+ "owner": "NixOS",
+ "ref": "nixos-unstable",
+ "repo": "nixpkgs",
+ "type": "github"
+ }
+ },
+ "nixpkgs_2": {
+ "locked": {
+ "lastModified": 1724819573,
+ "narHash": "sha256-GnR7/ibgIH1vhoy8cYdmXE6iyZqKqFxQSVkFgosBh6w=",
+ "owner": "NixOS",
+ "repo": "nixpkgs",
+ "rev": "71e91c409d1e654808b2621f28a327acfdad8dc2",
+ "type": "github"
+ },
+ "original": {
+ "owner": "NixOS",
+ "ref": "nixos-unstable",
+ "repo": "nixpkgs",
+ "type": "github"
+ }
+ },
+ "nixvim": {
+ "inputs": {
+ "devshell": "devshell",
+ "flake-compat": "flake-compat",
+ "flake-parts": "flake-parts",
+ "git-hooks": "git-hooks",
+ "home-manager": "home-manager",
+ "nix-darwin": "nix-darwin",
+ "nixpkgs": "nixpkgs_2",
+ "nuschtosSearch": "nuschtosSearch",
+ "treefmt-nix": "treefmt-nix"
+ },
+ "locked": {
+ "lastModified": 1726148694,
+ "narHash": "sha256-bR7LFVtMjiVlO2OpmDSuLQ2XQr+h+JtVFYObAbThZSs=",
+ "owner": "nix-community",
+ "repo": "nixvim",
+ "rev": "27a0dd435dd3563f4cf9d788601fadfce8c59db6",
+ "type": "github"
+ },
+ "original": {
+ "owner": "nix-community",
+ "repo": "nixvim",
+ "type": "github"
+ }
+ },
+ "nuschtosSearch": {
+ "inputs": {
+ "flake-utils": "flake-utils_2",
+ "nixpkgs": [
+ "nixvim",
+ "nixpkgs"
+ ]
+ },
+ "locked": {
+ "lastModified": 1724584782,
+ "narHash": "sha256-7FfHv7b1jwMPSu9SPY9hdxStk8E6EeSwzqdvV69U4BM=",
+ "owner": "NuschtOS",
+ "repo": "search",
+ "rev": "5a08d691de30b6fc28d58ce71a5e420f2694e087",
+ "type": "github"
+ },
+ "original": {
+ "owner": "NuschtOS",
+ "repo": "search",
+ "type": "github"
+ }
+ },
+ "root": {
+ "inputs": {
+ "flake-utils": "flake-utils",
+ "nixpkgs": "nixpkgs",
+ "nixvim": "nixvim"
+ }
+ },
+ "systems": {
+ "locked": {
+ "lastModified": 1681028828,
+ "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+ "owner": "nix-systems",
+ "repo": "default",
+ "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+ "type": "github"
+ },
+ "original": {
+ "owner": "nix-systems",
+ "repo": "default",
+ "type": "github"
+ }
+ },
+ "systems_2": {
+ "locked": {
+ "lastModified": 1681028828,
+ "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+ "owner": "nix-systems",
+ "repo": "default",
+ "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+ "type": "github"
+ },
+ "original": {
+ "owner": "nix-systems",
+ "repo": "default",
+ "type": "github"
+ }
+ },
+ "treefmt-nix": {
+ "inputs": {
+ "nixpkgs": [
+ "nixvim",
+ "nixpkgs"
+ ]
+ },
+ "locked": {
+ "lastModified": 1724833132,
+ "narHash": "sha256-F4djBvyNRAXGusJiNYInqR6zIMI3rvlp6WiKwsRISos=",
+ "owner": "numtide",
+ "repo": "treefmt-nix",
+ "rev": "3ffd842a5f50f435d3e603312eefa4790db46af5",
+ "type": "github"
+ },
+ "original": {
+ "owner": "numtide",
+ "repo": "treefmt-nix",
+ "type": "github"
+ }
+ }
+ },
+ "root": "root",
+ "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..23b96bd
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,54 @@
+# Taken from https://flyx.org/nix-flakes-latex/
+{
+ description = "LaTeX Document Demo";
+ inputs = {
+ nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+ flake-utils.url = "github:numtide/flake-utils";
+ nixvim.url = "github:nix-community/nixvim";
+ };
+ outputs = {
+ self,
+ nixpkgs,
+ nixvim,
+ flake-utils,
+ }:
+ with flake-utils.lib;
+ eachSystem allSystems (system: let
+ pkgs = nixpkgs.legacyPackages.${system};
+ tex = pkgs.texlive.combine {
+ inherit (pkgs.texlive) scheme-basic cite pgf nicematrix latex-bin latexmk titlesec listings ieeetran;
+ };
+ nvim = nixvim.legacyPackages.x86_64-linux.makeNixvim {
+ plugins.lsp.enable = true;
+ colorschemes.gruvbox.enable = true;
+ };
+ in rec {
+ packages = {
+ document = pkgs.stdenvNoCC.mkDerivation rec {
+ name = "latex-demo-document";
+ src = self;
+ buildInputs = [ pkgs.coreutils pkgs.texliveFull pkgs.pandoc pkgs.texliveFull pkgs.tetex pkgs.zathura pkgs.kile];
+ phases = ["unpackPhase" "buildPhase" "installPhase"];
+ buildPhase = ''
+ runHook preBuild
+
+ export PATH="${pkgs.lib.makeBinPath buildInputs}";
+ #pandoc rpa.md -s -o rpa.tex --pdf-engine=pdflatex
+ mkdir -p .cache/texmf-var
+ env TEXMFHOME=.cache TEXMFVAR=.cache/texmf-var \
+ latexmk -f -interaction=nonstopmode -pdf -pdflatex \
+ main.tex
+ runHook postBuild
+ '';
+ installPhase = ''
+ runHook preInstall
+
+ install -m644 -D *.pdf $out/main.pdf
+
+ runHook postInstall
+ '';
+ };
+ };
+ defaultPackage = packages.document;
+ });
+}
diff --git a/main.tex b/main.tex
new file mode 100644
index 0000000..feb4841
--- /dev/null
+++ b/main.tex
@@ -0,0 +1,252 @@
+\documentclass[conference]{IEEEtran}
+\IEEEoverridecommandlockouts
+% The preceding line is only needed to identify funding in the first footnote. If that is unneeded, please comment it out.
+\usepackage{cite}
+\usepackage{amsmath,amssymb,amsfonts}
+\usepackage{algorithmic}
+\usepackage{graphicx}
+\usepackage{textcomp}
+\usepackage{xcolor}
+\usepackage{hyperref}
+
+\def\BibTeX{{\rm B\kern-.05em{\sc i\kern-.025em b}\kern-.08em
+ T\kern-.1667em\lower.7ex\hbox{E}\kern-.125emX}}
+\begin{document}
+
+\title{A Case Study on Retrieval-Augmented Generation for AI-Generated Content}
+
+\author{\IEEEauthorblockN{Aditya Kumar}
+\IEEEauthorblockA{\textit{University Institute of Engineering} \\
+\textit{Chandigarh University}\\
+Mohali, India \\
+24mai14003@cuchd.in}
+}
+
+\maketitle
+
+\begin{abstract}
+ Improvements in model algorithms has led to the development of Artificial Intelligence Generated Content (AIGC), aided by the expansion of core models, and the availability of high-quality datasets. Even with its noteworthy accomplishments, AIGC still confronts challenges that include keeping up with new information, managing large amounts of training and inference data, minimizing data leakage, and handling long tail data. The paradigm known as Retrieval-Augmented Generation (RAG) has now surfaced as a solution to these problems. Specifically, RAG presents the information retrieval procedure that improves the generation process by obtaining pertinent objects from accessible data sources, resulting in increased robustness and accuracy. In this study, we present a thorough overview of previous attempts to include RAG methodologies into AIGC scenarios. In order to isolate the essential abstractions of the augmentation approaches for different retrievers and generators, we first categorize RAG foundations based on how the retriever augments the generator. All RAG situations are covered by this cohesive viewpoint, which highlights developments and important technology that support possible future breakthroughs. We also provide a summary of further RAG enhancement techniques that help with efficient RAG system deployment and engineering. Then, looking at things from a different angle, we survey on real-world RAG applications across many modalities and tasks, providing insightful references for scholars and professionals. We also go over the shortcomings of the existing RAG systems, present the benchmarks for RAG, and make some recommendations for future research paths.
+\end{abstract}
+\begin{IEEEkeywords}
+ Retrieval-augmented generation, AI-generated content, generative models, information retrieval.
+\end{IEEEkeywords}
+\section{Introduction}
+\subsection{Background}
+Artificial Intelligence Generated Content (AIGC) has seen a surge in attention in recent years. Large Language Models (LLMs) such as the GPT series \cite{DBLP:conf/nips/BrownMRSKDNSSAA20,DBLP:journals/corr/abs-2107-03374,DBLP:journals/corr/abs-2303-08774} and the LLAMA series \cite{LLaMA,DBLP:journals/corr/abs-2307-09288,DBLP:journals/corr/abs-2308-12950} for texts and codes, DALLE \cite{DBLP:conf/icml/RameshPGGVRCS21,DBLP:journals/corr/abs-2204-06125,betker2023improving} and Stable Diffusion \cite{DBLP:conf/cvpr/RombachBLEO22} for images, and Sora \cite{openai/sora} novel model algorithms, explosive scale of foundation models, and massive high-quality datasets for videos are just a few examples of the carefully designed content generation tools that can produce a wide range of outputs across different modalities. The term ”AIGC” highlights that sophisticated generative models, instead of humans or rule-based methods, are used to construct the contents. Consequently, the use of cutting-edge model methods, enormous, high-quality datasets, and foundation models with an exponential scale, these generative models have demonstrated outstanding performance. In particular, image-generation tasks have moved from Generative Adversarial Networks (GANs) \cite{GAN} to Latent Diffusion Models (LDMs) \cite{DBLP:conf/cvpr/RombachBLEO22}, while sequence-to-sequence tasks have moved from using Long Short Term Memory (LSTM) networks \cite{DBLP:journals/neco/HochreiterS97} to Transformer-based models \cite{DBLP:conf/nips/VaswaniSPUJGKP17}. It is noteworthy that the architecture of foundation models has expanded from millions of parameters at first \cite{DBLP:conf/iclr/GuoRLFT0ZDSFTDC21,DBLP:journals/jmlr/RaffelSRLNMZLL20}, to billions or even trillions of parameters at this point \cite{DBLP:conf/nips/BrownMRSKDNSSAA20}, \cite{LLaMA}, \cite{Switch_transformers}. The availability of extensive, high-quality datasets \cite{DBLP:conf/nips/BrownMRSKDNSSAA20}, \cite{scalingLaw}, which offer enough training examples to completely tune model parameters, further supports these developments.
+
+Another essential use in computer science is information retrieval. Retrieval seeks to identify pertinent things that already exist from a sizable pool of resources, as compared to generation. Web search engines, which are primarily concerned with document retrieval, are the most common applications of retrieval \cite{DBLP:journals/ftir/RobertsonZ09}, \cite{DBLP:conf/emnlp/KarpukhinOMLWEC20}. Currently, billion-scale document collections can be handled by effective information retrieval systems \cite{DBLP:journals/tbd/JohnsonDJ21}, \cite{DBLP:conf/nips/ChenZWLLLYW21}. Retrieval has been used for many additional modalities in addition to documents \cite{DBLP:journals/csur/DattaJLW08,radford2021learning,DBLP:conf/emnlp/FengGTDFGS0LJZ20,DBLP:conf/icassp/WuCZHBD23}.
+
+Even with major progress in generative models, AIGC still faces obstacles like as out-of-date knowledge, a lack of long-tail knowledge \cite{Adaptive-Retrieval-whennottrust}, and the possibility of private training data leaks \cite{DBLP:conf/uss/CarliniTWJHLRBS21}. Retrieval-Augmented Generation (RAG) uses a flexible data store to try to alleviate these problems \cite{C-RAG}. Retrievable knowledge serves as non-parametric memory that may encode sensitive information, is readily updated, and can handle a large amount of long-tail knowledge. Retrieval can also reduce the cost of generation. Large models can be made smaller with RAG \cite{Atlas}, extended contexts can be supported \cite{MemTransformer2022}, and certain generation processes can be removed \cite{REST}.
+
+The retriever receives an input query, finds pertinent data sources, and uses the knowledge to better the generating process by interacting with the generator. Depending on how the retrieved results enhance the generation, there are various foundational paradigms (or foundations, to put it short): they can act as an enhanced input to the generator \cite{REALM}, \cite{2020RAG}; they can join as latent representations at a mid-stage of generation \cite{FID}, \cite{RETRO}; they can contribute to the final generation results as logits \cite{KNN-LM}, \cite{Efficient-KNNLM}; they can even affect or omit certain generation steps \cite{REST}, \cite{GPTCache}. Researchers have also suggested a number of improvements to strengthen the basic RAG procedure. These techniques include targeted improvements for particular parts as well as comprehensive improvements targeted at the pipeline as a whole.
+
+Furthermore, although the idea behind RAG first surfaced in text-to-text generation \cite{2020RAG}, this method has since found use in a wide range of fields, includeing codes \cite{DBLP:conf/emnlp/ParvezACRC21,DBLP:conf/naacl/AhmadCRC21,DBLP:conf/iclr/Zhou0XJN23}, audios \cite{DBLP:journals/corr/abs-2012-07331}, \cite{DBLP:conf/icml/HuangHY0LLYLYZ23}, images \cite{tseng2020retrievegan,sarto2022retrieval,ramos2023smallcap}, videos \cite{DBLP:journals/tomccap/ChenPLYCM23}, \cite{DBLP:journals/corr/abs-2401-00789}, 3D \cite{DBLP:journals/corr/abs-2402-02972}, \cite{DBLP:conf/iccv/ZhangGPCHLYL23}, knowledge \cite{DBLP:conf/coling/HuWSQ22,DBLP:conf/emnlp/HuangKZ21,DBLP:conf/emnlp/DasZTGPLTPM21}, and artificial intelligence for science \cite{wang2022retrieval}, \cite{jin2023genegpt}. Specifically, the fundamental concept and methodology of RAG are substantially uniform throughout modalities. It does, however, require some small modifications to augmentation methods, and the choice of generators and retrievers changes based on the particular modalities and applications.
+
+The lack of a comprehensive assessment covering all foundations, advancements, and applications of RAG, in spite of the field's recent rapid expansion and expanding applications, is impeding its progress. The practical relevance of the research in this area is severely undermined by the lack of discussion on RAG foundations, which prevents RAG's full potential from being realized. Despite of query-based RAG in text-generation tasks having garnered most study interest, it is important to recognize that other RAG foundations are equally useful and have a great deal of room for expansion. Another reason is that without a broad overview of RAG applications, practitioners and academics tend to ignore RAG's advancements across a variety of modalities and are ignorant of its potential applications.. While text creation is commonly regarded as the primary use case for RAG, it is important to note that RAG development in other modalities has also started to gain traction and has produced encouraging developments. A number of modalities have a long history of being associated with retrieval procedures, which gives RAG its unique qualities. Motivated by this, our goal in this study is to offer a thorough survey that presents a methodical summary of RAG.
+
+\subsection{Contribution}
+This case study provides a thorough introduction to RAG, addressing its origins, improvements, uses, benchmarks, constraints, and possible future paths. We extract the fundamentals of RAG foundations, seeing applications as modifications of these principles, notwithstanding differences in retrievers and generators across modalities and workloads. The goal of this paper is to provide scholars and practitioners with recommendations and references, along with insightful information that will help advance RAG techniques and related applications. To summarize, the following is a list of contributions:
+\begin{itemize}
+ \item This study performs a thorough analysis of RAG and distills the foundational abstractions of RAG for different retrievers and generators.
+ \item Examination of the improvements made in RAG literature and outlining the strategies used to make RAG systems more efficient.
+ \item Survey of existing AIGC methods that use RAG techniques for different modalities and tasks, showing how RAG adds value to existing generative models.
+ \item RAG's research directions and limitations, which provide insight into possible future developments.
+\end{itemize}
+
+\subsection{Related Work}
+Numerous surveys have appeared as RAG develops, although they only cover a portion of the subject. Specifically, they either only cover a small portion of RAG techniques for specific contexts, or they solely concentrate on one RAG foundation. Without a thorough examination of alternative modalities, the majority of the publications that are now available concentrate on text-related RAG activities that are assisted by LLMs. A fundamental review of RAG is provided in the survey by Li et al. \cite{DBLP:journals/corr/abs-2202-01110}, which also covers particular applications related to text production tasks. Similar to this, Asai et al.'s tutorial \cite{retrieval-lm-tutorial} focuses on retrieval-based language models and describes their training approaches and architectures. Meanwhile, RAG is examined in the context of LLMs in a recent survey by Gao et al. \cite{DBLP:journals/corr/abs-2312-10997}, with a focus on query-based RAG optimization techniques. Our approach extends RAG's reach to the full AIGC ecosystem, acknowledging its expansion outside the text domain and enabling a more thorough coverage of RAG research. Another survey, put forth by Zhao et al. \cite{DBLP:conf/emnlp/ZhaoCWJLQDGLLJ23}, skips over the topic of RAG foundations and instead provides RAG applications across several modalities. Only a portion of other modalities' works are covered in another study \cite{ding2024survey}. Even though certain facets of RAG have been studied in previous research, a thorough overview including the basics, improvements, and domain-specific applicability of RAG is still lacking. The goal of this paper is to close this gap by offering an organized analysis of RAG.
+
+\section{Preliminary}
+\subsection{Overview}
+The generator and the retriever are the two main modules that make up the RAG system. The generator generates the necessary contents, while the retriever looks for pertinent information in the data store. The following is how the RAG process goes: (i) The query is first sent to the retriever, which then looks for pertinent data; (ii) The original query and the retrieval results are then fed into the generator using a certain augmentation process; (iii) Lastly, the generator generates the intended results.
+
+\subsection{Generator}
+The era of AIGC has begun, thanks to generative AI's outstanding performance on a variety of jobs. In the RAG system, the generating module is essential. For example, transformer models are used for text-to-text tasks, VisualGPT \cite{DBLP:conf/cvpr/ChenGY0E22} is used for image-to-text tasks, Stable Diffusion [10] is used for text-to-image tasks, Codex \cite{DBLP:journals/corr/abs-2107-03374} is used for text-to-code tasks, and so on. Various generative models are used for different circumstances. Four common generators that are commonly used in RAG are introduced here: the diffusion model, GAN, LSTM, and transformer model.
+
+\subsubsection{Transformer Model}
+Transformer models, which combine feedforward networks, layer normalization modules, residual networks, and self-attention mechanisms, are among the highest performing models in the field of natural language processing (NLP) \cite{EfficientTransformers}. At each generating phase, vocabulary classification is applied to a series of latent representations obtained from tokenization and embedding to construct the final output sequence.
+
+\subsubsection{LSTM}
+The Recurrent Neural Network (RNN) model has a unique variant known as Long Short-Term Memory (LSTM) \cite{lstm_survey}. Cell states and gating methods are used to address the problems of exploding/vanishing gradients in long-term dependence processing. The three gates in the model—Input, Forget, and Output—filter data, while the central Cell State module stores and controls the data. It generates outputs autoregressively using the same vocabulary classification technique as transformer models.
+
+\subsubsection{Diffusion Model}
+A family of deep generative models known as diffusion models is capable of producing a wide range of realistic data samples, such as texts, photos, videos, molecules, and more \cite{yang2023diffsurvey}. In order to create fresh data from noise, diffusion models first add noise to the data gradually until it becomes random, then reverse the process. Neural networks and probabilistic modeling serve as the foundation for this procedure.
+
+\subsubsection{GAN}
+Generative Adversarial Networks (GANs) \cite{GAN} are deep learning models that can generate realistic images, audio, and other data \cite{GAN_Survey}. They consist of a generator and a discriminator, which compete through adversarial learning. The generator continuously improves its ability to generate realistic samples, while the discriminator continuously improves its ability to distinguish between true and false samples.
+
+\subsection{Retriever}
+Finding and obtaining pertinent information in response to an information need is known as retrieval. In particular, let's look at data sources that may be thought of as a key-value store, in which every key is associated with a value (keys and values can be the same). The goal is to use a similarity function to find the top k most similar keys to a given query in order to extract the associated values. Existing retrieval techniques can be divided into sparse retrieval, dense retrieval, and other categories based on various similarity functions. The entire process of commonly used sparse and dense retrieval may be broken down into two separate stages: (i) each object is first encoded into a particular representation, and (ii) an index is created to arrange the data source for effective search.
+
+\subsubsection{Sparse Retriever}
+Sparse retrieval techniques are frequently employed in document retrieval, where the documents to be searched are represented by the keys or values. This is done by making use of term matching metrics that examine word statistics from texts and create inverted indices for effective searching, such as TF-IDF \cite{DBLP:conf/sigir/RobertsonW97}, query probability \cite{DBLP:conf/sigir/LaffertyZ01}, and BM25 \cite{DBLP:journals/ftir/RobertsonZ09}. In general, BM25 is a robust baseline for extensive online search that incorporates query token occurrences, inverse document frequency weights, and other relevant metrics. Typically, sparse retrieval uses an inverted index to arrange items in order to facilitate effective search. In specifics, every term in the query looks up a list of potential documents, which are then ranked according to their statistical rankings.
+
+Typically, sparse retrieval uses an inverted index to arrange items in order to facilitate effective search. In specifics, every term in the query looks up a list of potential documents, which are then ranked according to their statistical rankings. Typically, sparse retrieval uses an inverted index to arrange items in order to facilitate effective search. In specifics, every term in the query looks up a list of potential documents, which are then ranked according to their statistical rankings.
+
+\subsubsection{Dense Retriever}
+Dense retrieval techniques, in contrast to sparse retrieval, use dense embedding vectors to represent queries and keys and create an Approximate Nearest Neighbor (ANN) index to expedite the search. This is true for every modality. Recent developments in pre-trained models (like BERT \cite{DBLP:conf/iclr/GuoRLFT0ZDSFTDC21}) have been used to encode queries and keys separately for text data \cite{DBLP:journals/ftir/RobertsonZ09}. Dense Passage Retrieval (DPR) is a common term for this method. Models for encoding code data \cite{DBLP:conf/emnlp/FengGTDFGS0LJZ20}, audio data \cite{DBLP:conf/icassp/HersheyCEGJMPPS17}, image data \cite{radford2021learning}, video data \cite{DBLP:conf/cvpr/DongLXJH0W19}, and other types of data have been proposed, much like text. Typically, measures like cosine, inner product, and L2-distance are used to calculate the similarity score between dense representations.
+
+Contrastive learning is used in dense retrieval training to make positive data more similar and negative samples less similar. To improve model quality even more, a number of hard negative techniques \cite{DBLP:conf/iclr/XiongXLTLBAO21} have been put forth. ANN algorithms are used for effective searching during inference. Tree \cite{bentley1975multidimensional}, \cite{li2023learning}, location sensitive hashing \cite{datar2004locality}, neighbor graph indices (e.g., HNSW \cite{malkov2018efficient}, DiskANN \cite{jayaram2019diskann}), and combined graph and inverted indices (e.g., SPANN \cite{DBLP:conf/nips/ChenZWLLLYW21}) are some of the indices created to support ANN search.
+
+\subsubsection{Others}
+There are more techniques for obtaining pertinent objects besides sparse and dense retrieval \cite{DBLP:conf/nips/WangHWMWCXCZL0022}, \cite{DBLP:conf/nips/ZhangWCCZMHDMWP23}. Some studies employ the edit distance between natural language texts \cite{DBLP:conf/emnlp/HayatiOAYTN18} or abstract syntax trees (AST) of code snippets \cite{DBLP:conf/icse/ZhangW00020}, \cite{DBLP:conf/iclr/PoesiaP00SMG22} directly in place of computing representations. Relationships between entities in knowledge graphs act as a pre-built index for retrieval. K-hop neighbor searches can therefore be used for retrieval in RAG approaches that use knowledge graphs \cite{DBLP:conf/acl/YeYHZX22}, \cite{DBLP:journals/corr/abs-2210-12925}. Named Entity Recognition (NER) \cite{lin2020bridging} is an additional retrieval technique in which the entities serve as keys and the query as the input.
+
+\section{Methodologies}
+\subsection{RAG Foundations}
+\subsubsection{Query-based RAG}
+Originating from the concept of prompt augmentation, query-based RAG easily incorporates insights from retrieved data with the user's inquiry, delivering it straight into the generator's input stage. This approach is often used in RAG applications. After being retrieved, the content is combined with the user's initial query to generate a composite input, which the generator processes to produce a response. Query-based RAG is frequently used in many different modalities.
+
+REALM \cite{REALM} uses a dual-BERT framework for text production, combining knowledge extractors with pre-trained models to expedite knowledge retrieval and integration. Lewis et al. \cite{lewis2020retrieval} used BART as the generator to efficiently improve the generation and DPR for information retrieval. A critique module is used by SELF-RAG \cite{Self-RAG} to assess if the retrieval is necessary. Query-based RAG can be used in situations that use LLM through API calls, in addition to being interoperable with local generators. By considering the language model as a ”black box,” REPLUG \cite{REPLUG} adheres to this paradigm and successfully incorporates pertinent external documents into the query. The top-ranked documents are reordered and integrated using a predictive reranker trained using In-Context RALM \cite{RALM}, which leverages BM25 for document retrieval.
+
+The query-based paradigm has been used in a number of publications \cite{DBLP:conf/iclr/Zhou0XJN23}, \cite{DBLP:conf/emnlp/ZanCLGWL22,DBLP:conf/icse/NashidSM23,DBLP:conf/sigsoft/JinSTSLSS23,DBLP:conf/acl/LuDHGHS22} in the field of code to improve the efficacy of downstream tasks by incorporating contextual information from text or code into the prompt.
+
+Recent studies on Knowledge Base Question Answering (KBQA) have also demonstrated the important benefits of integrating language and retrieval models. For example, by combining inquiries and obtained data into prompts, Uni-Parser \cite{DBLP:conf/acl/Liu22}, RNG-KBQA \cite{DBLP:conf/acl/YeYHZX22}, and ECBRF \cite{DBLP:conf/eacl/YangDCC23} successfully increase the accuracy and performance of QA systems.
+
+Chat-Orthopedist \cite{shi2023retrieval}, a tool in the AI-for-Science space, uses recovered data in model prompts, facilitating in shared decision-making for teenagers with idiopathic scoliosis and increases the efficacy and accuracy of LLMs.
+
+RetrieveGAN \cite{tseng2020retrievegan} incorporates retrieved data, such as selected picture patches and their bounding boxes, into the generator's input stage to increase the relevance and accuracy of generated images in the image generating task. Noise vectors and instance characteristics are concatenated by IC-GAN \cite{casanova2021instance}, which adjusts the particular conditions and details of the generated images.
+
+RetDream \cite{DBLP:journals/corr/abs-2402-02972} uses CLIP \cite{DBLP:journals/corr/abs-2204-06125} to first recover pertinent 3D elements for 3D generation. During the input phase, the returned contents are combined with user input.
+
+Frequently used in conjunction with LLM generators, query-based RAG pro- vides modular flexibility that enables the rapid integration of pretrained components for rapid deployment. Using the retrieved data in this setting requires quick design.
+
+\subsubsection{Latent Representation-based RAG}
+The recovered objects are used as latent representations in generative models in the latent representation-based RAG framework, thereby improving the quality of the generated information and strengthening the model's understanding capabilities.
+
+FiD \cite{FID} and RETRO \cite{RETRO} are two traditional structures of latent representation- based RAG in the text field upon which numerous later works have made changes. FiD \cite{FID} combines the generated latent representations for decoding by a single decoder to generate the final output after processing each recovered paragraph, its title, and the query through separate encoders. After retrieving pertinent data for every segmented sub-query, RETRO \cite{RETRO} uses a brand-new module called Chunked Cross-Attention (CCA) to combine the obtained data with each sub-query token. Other significant innovative structures fall under the purview of latent representation-based RAG as well. In order to enable input chunking and, in theory, meet the long-criticized context length limits of Transformer models, a number of studies \cite{MemTransformer2022}, \cite{Bertsch2023UnlimiformerLT} have integrated k Nearest Neighbor (kNN) search into transformer blocks. Kuratov et al. \cite{RMT-R} combined Transformer with RNN, using the intermediate output of the model as the retrieval content.
+
+FiD has become widely used in the disciplines of science and code, with applications in a variety of code-related domains \cite{DBLP:conf/kbse/LiL000J21,DBLP:conf/icsm/YuYCLZ22,DBLP:conf/nips/HashimotoGOL18,DBLP:conf/kbse/WeiLLXJ20,DBLP:conf/emnlp/ShiW0DZHZ022} and AI-for-Science \cite{wang2022retrieval}.
+
+Several research \cite{chen2022re,sheynin2022knn,blattmann2022retrieval,rombach2022text} use cross-attention techniques in the visual domain to integrate their latent representations and merge retrieval outcomes. On the other hand, Li et al. \cite{li2022memory} use an Affine Combination Module (ACM) that concatenates hidden characteristics directly between text and images.
+
+Numerous studies \cite{DBLP:conf/naacl/OguzCKPOSGMY22,DBLP:conf/iclr/YuZNZL0HWWX23,DBLP:conf/cikm/DongLWZXX23,DBLP:journals/corr/abs-2308-13259,DBLP:conf/sigir/YuY23} have used FiD and its derivatives for downstream tasks inside the knowledge domain. While TOME \cite{TOME} shifts to a nuanced encoding of mentions, giving mention granularity precedence over entity representations alone, EaE \cite{EaE} improves the generator's comprehension by entity-specific parameterization.
+
+ReMoDiffuse \cite{DBLP:conf/iccv/ZhangGPCHLYL23} advances the field of 3D generation by introducing a semantics-modulated attention method that improves the precision of producing comparable 3D motions from textual descriptions. By combining the original diffusion process with the reference diffusion process, AMD \cite{jing2023amd} successfully converts text to 3D motion.
+
+Koizumi et al. \cite{DBLP:journals/corr/abs-2012-07331} used an LLM in the audio domain, directing the creation of audio captions by integrating encoded dense information in the attention module. Deep features are extracted from text and audio using different encoders by ReAudioLDM \cite{DBLP:journals/corr/abs-2309-08051}, and these characteristics are then included into the Latent Diffusion Model's (LDM) attention mechanism.
+
+R-ConvED \cite{DBLP:journals/tomccap/ChenPLYCM23} processes retrieved video-sentence pairs using an attention mechanism and a convolutional encoder-decoder network, creating hidden states to generate captions for videos. CARE \cite{DBLP:journals/tip/YangCZ23} integrates idea representations into a hybrid attention mechanism and presents a concept detector to generate concept probabilities. EgoInstructor \cite{DBLP:journals/corr/abs-2401-00789} enhances the coherence and relevance of captions for egocentric videos by combining text and visual elements via gated-cross attention. Latent representation-based RAG combines retriever and generator hidden states and is flexible across modalities and tasks, although it necessitates extra training to align latent spaces. It makes it possible to create complex algorithms that smoothly integrate the data that has been retrieved.
+
+\subsubsection{Logit-based RAG}
+During the decoding phase, generative models incorporate retrieval information via logits in logit-based RAG. To calculate the probability for step-wise generation, the logits are usually merged using straightforward summation or models.
+
+Language model probabilities and those derived from retrieval distances of identical prefixes are combined at each decoding step in the text domain by kNN-LM \cite{KNN-LM} and its version \cite{Efficient-KNNLM}. Using highly aligned tokens from a local database as output, TRIME \cite{TRIME} and NPM \cite{NPM} are radical extensions of conventional kNNLM techniques that improve performance especially in longtail distribution circumstances.
+
+In addition to text, logit-based RAG is also used in other modalities like code and images.
+
+A number of research \cite{DBLP:conf/icse/ZhangW00020}, \cite{DBLP:conf/emnlp/Zhang0YC23} have also used the kNN concept in the code domain to improve final output control and attain better performance. Additionally, EDITSUM \cite{DBLP:conf/kbse/LiL000J21} incorporates prototype summaries at the logit level to enhance the quality of code summarisation. MA \cite{fei2021memory} uses the kNN-LM frame work to solve the image caption problem with positive outcomes. This makes logit-based RAG perfect for sequence creation since it uses previous data to infer current states and combines information at the logit level. It emphasises generator training and makes room for cutting-edge techniques that take advantage of probability distributions for upcoming assignments.
+
+\subsubsection{Speculative RAG}
+Speculative RAG looks for ways to economise resources and speed up reaction times by using retrieval rather of pure production. REST \cite{REST} allows for the creation of drafts by substituting retrieval for the tiny models used in speculative decoding \cite{Speculative_Decoding}. GPTCache \cite{GPTCache} creates a semantic cache to store LLM replies, hence resolving the problem of excessive latency when utilising the LLM APIs. In order to retrieve words or phrases from the documents rather than generating them, COG \cite{COG} breaks down the text generation process into a sequence of copy-and-paste operations. Cao et al. \cite{RetrievalisAccurateGeneration} suggested a novel paradigm that substitutes directly retrieved phrase level content for generation in order to remove the final result's reliance on the calibre of the first-stage retrieved content.
+
+Sequential data is now the main use of speculative RAG. Separating the generator and the retriever makes it possible to employ pre-trained models as components directly. We can investigate a greater variety of tactics to make efficient use of the recovered content within this framework.
+
+\subsection{RAG Enhancements}
+\subsubsection{Input Enhancement}
+The first input fed into the retriever has a significant impact on the outcome of the retrieval stage. This section presents query transformation and data augmentation as two input enhancement techniques.
+
+\textit{Query Transformation:} By altering the input query, query transformation can improve the retrieval outcome.
+
+The original query is used by Query2doc \cite{Query2doc} and HyDE \cite{HyDE} to create a faux document, which is then used as the retrieval query. Richer, pertinent information is included in the pseudo document, which aids in the retrieval of more precise results.
+
+By using the obtained contents, TOC \cite{TOC} breaks down the confusing query into several distinct sub-queries, which are then sent to the generator and combined to yield the final output.
+
+RQ-RAG \cite{RQ-RAG} deconstructs complex or ambiguous enquiries into distinct subqueries for fine-grained retrieval and combines the answers to provide a coherent response to the initial inquiry. Tayal et al. \cite{tayal2024dynamic} improved the generator's understanding of user intent by refining the original query using context retrieval and dynamic few-shot samples.
+
+\textit{Data Augmentation:} By using methods including deleting ambiguity, updating old documents, synthesising new data, and removing extraneous information, data augmentation enhances data prior to retrieval.
+
+Make-An-Audio \cite{DBLP:conf/icml/HuangHY0LLYLYZ23} adds random concept audio to enhance the original audio and employs captioning and audio-text retrieval to create captions for language-free audio in order to reduce data sparsity. In order to improve model performance in response to instructional prompts, LESS \cite{LESS} analyses gradient information to optimise dataset selection for downstream tasks. To pre-train the code retrieval model, ReACC \cite{DBLP:conf/acl/LuDHGHS22} uses data augmentation techniques including renaming and dead code insertion. By using a ”Vocabulary for 3GPP Specifications” and matching them to user queries using a router module, TelcoRAG \cite{Telco-RAG} improves the retrieval accuracy.
+
+\subsubsection{Retriever Enhancement}
+The information sent into the generators in RAG systems is determined by the quality of the content that is retrieved. The likelihood of model hallucinations or other deterioration rises with lower content quality. We present useful strategies to improve retrieval efficacy in this section.
+
+\textit{Recursive Retrieval:} This method involves conducting several searches to obtain more comprehensive and superior content.
+
+ReACT \cite{ReAct} provides deeper information by decomposing questions for recursive retrieval using Chain-of-Thought (CoT) \cite{COT}. The best retrieval material is chosen by RATP \cite{RATP} using the Monte-Carlo Tree Search for simulations. The content is then templated and sent to the generator for output. Chunk optimisation is the process of modifying chunk size to enhance retrieval outcomes.
+
+\textit{Chunk Optimization:} Chunk optimization refers to adjusting chunk size for improved retrieval results.
+
+One of the chunk optimisation techniques used by LlamaIndex \cite{LlamaIndex} is based on the ”small to big” theory. Finding finer-grained content while returning richer information is the main idea here. Sentence-window retrieval, for example, retrieves brief text passages and provides a window of pertinent sentences that encircle the recovered section. Documents are organised in a tree structure for automerge retrieval. By initially retrieving the child node, the method obtains the parent node, which contains the content of its child nodes. RAPTOR \cite{RAPTOR} uses recurrent embedding, clustering, and summarisation of text chunks until additional clustering is impractical in order to solve the lack of contextual information. This creates a multi-level tree structure. By creating a table of contents beforehand, PromptRAG \cite{Prompt-RAG} improves retrieval accuracy by allowing the model to choose pertinent chapters on its own based on the query. To increase recollection and produce better outcomes, Raina et al. \cite{raina2024question} divide text fragments into smaller, more atomic assertions.
+
+\textit{Retriever Finetuning:} The core component of the RAG system, the retriever, depends on an effective embedding model \cite{bge_embedding,bge_m3,cocktail,llm_embedder} to feed the generator with relevant content and represent it, improving system performance.
+
+Furthermore, domain-specific or task-related data can be used to refine embedding models with high expressive power in order to improve performance in certain domains. REPLUG \cite{REPLUG} handles LM as black box, which updates the retriever model in response to the outcomes. Python files, api names, signatures, and descriptions are used by APICoder \cite{DBLP:conf/emnlp/ZanCLGWL22} to refine the retriever.
+
+After retrieval, EDITSUM \cite{DBLP:conf/kbse/LiL000J21} optimises the retriever to reduce the jaccard distance between summaries. Target Similarity Tuning (TST) is used by SYNCHROMESH \cite{DBLP:conf/iclr/PoesiaP00SMG22} to fine-tune the retriever after adding tree distance os ASTs to the loss. Using the same data as the generator, R-ConvED \cite{DBLP:journals/tomccap/ChenPLYCM23} optimizes the retriever. InfoNCE loss was used by Kulkarni et al. \cite{RL4RAG} to optimise the retriever.
+
+\textit{Hybrid Retrieval:} A hybrid retrieve refers to the simultaneous use of a wide range of retrieval techniques or the extraction of data from several different sources.
+
+To increase the quality of retrieval, RAP-Gen \cite{DBLP:conf/sigsoft/Wang0JH23}, BlendedRAG \cite{Blended-RAG}, and ReACC \cite{DBLP:conf/acl/LuDHGHS22} employ both dense and sparse retrievers. Rencos \cite{DBLP:conf/icse/ZhangW00020} retrieves similar code snippets on a syntactic level using a sparse retriever and on a semantic level using a dense retriever. BASHEXPLAINER \cite{DBLP:conf/icsm/YuYCLZ22} first gathers semantic data using a dense retriever, and then it gathers lexical data using a sparse retriever. RetDream \cite{DBLP:journals/corr/abs-2402-02972} retrieves using text first, followed by image embedding. A retrieval evaluator in CRAG \cite{CRAG} determines the relevance of documents to queries and generates three retrieval replies based on confidence: a hybrid approach for unclear circumstances, Web Search if results are inaccurate, and direct use of results for Knowledge Refinement if results are accurate. By adding DKS (Dense Knowledge Similarity) and RAC (Retriever as Answer Classifier) to the retrieval phase and assessing answer relevance and knowledge applicability, Huang et al. \cite{RAGAE} enhanced question-answering. A new type of token known as the ”acting token,” which establishes the source from which to obtain information, is introduced by UniMSRAG \cite{UniMS-RAG}. By combining text and drawing for fine-grained retrieval, Koley et al. \cite{koley2024you} improve image retrieval and produce better outcomes.
+
+\textit{Reranking:} Rearranging the content that has been obtained in order to increase diversity and improve outcomes is known as the Rerank technique. In order to lessen the impact of information loss brought on by text compression into vectors, Re2G \cite{Re2G} uses a re-ranker \cite{ReRanker} model after the conventional retriever. In order to eliminate redundant programs and produce a diversified set of retrieved programs, AceCoder \cite{li2023acecoder} reranks the programs using a selector. Following retrieval, XRICL \cite{DBLP:conf/emnlp/0010Z0L22} employs an exemplar reranker based on distillation. Rangan, et al. \cite{rangan2024fine} evaluate the similarity of data subsets and reranks retrieval results by using the Quantised Influence Measure, which measures statistical biases between a query and a reference. In order to create a cohesive retriever, UDAPDR \cite{UDAPDR} use multi-teacher knowledge distillation in conjunction with LLMs to economically produce synthetic queries that train domain-specific rerankers. By using a static LLM for document rating and reward model training in addition to knowledge distillation, LLM-R \cite{LLM-R} iteratively improves its retriever. Progressive optimisation is made possible by the retriever's incremental improvement with each training cycle. Finardi et al. \cite{finardi2024chronicles} used monoT5 as a reranker to maximise the quality of the results and incorporated reciprocal rank into the retrieval process for improved text chunk relevancy. Li et al. \cite{li2024enhancing} improve the retrieval quality and factual accuracy of LLMs by incorporating a reranking module into their end-to-end RAG system.
+
+\textit{Retrieval Tranformation:} Retrieval transformation is the process of reword- ing content that has been retrieved in order to better engage the generator's potential and provide better output.
+
+In order to simplify the generator's duty and enable precise answer prediction, FILCO \cite{FILCO} effectively removes unnecessary content from recovered text, separating just the relevant supporting stuff. In order to significantly reduce latency time, FiD-Light \cite{FiD-Light} first uses an encoder to transform the retrieved content into a vector, which it subsequently compresses. Using a template, RRR \cite{RRR} combines the current query with the top-k documents in each round before restructuring it using LLMs that have already been trained (GPT-3.5-Turbo, etc.).
+
+\textit{Others:} There are more optimisation techniques for the retrieval process in addition to the ones mentioned above.
+
+For instance, meta-data filtering \cite{Pinecone} is a technique to aid in the processing of retrieved documents by filtering them for better outcomes using metadata (such as time, purpose, etc.). By asking an LLM to produce documents in response to a specific query, GENREAD \cite{GENREAD} and GRG \cite{GRG} present a revolutionary method that replaces or enhances the retrieval process. In order to improve retrieval accuracy, Multi-Head-RAG \cite{Multi-Head-RAG} uses a multi-head attention layer to capture distinct informational features and numerous embedding models to project the same text chunk into different vector spaces.
+
+\subsection{Generator Enhancement}
+The quality of the output results in RAG systems is frequently dictated by the quality of the generator. As a result, the maximum effectiveness of the entire RAG system is determined by the generator's capability.
+
+\textit{Prompt Engineering:} LLM generators in RAG systems might benefit from technologies in prompt engineering \cite{Prompt_Engineering_Guide} that concentrate on enhancing the output quality of LLMs, such prompt compression, Stepback Prompt \cite{StepBack-Prompting}, Active Prompt \cite{active-prompt}, Chain of Thought Prompt \cite{COT}, etc.
+
+In order to speed up model inference, LLMLingua \cite{LLMLingua} uses a tiny model to condense the query's total length. This lessens the detrimental effect of extraneous information on the model and the "Lost in the Middle" \cite{Lost_in_the_middle} issue. Using ChatGPT, ReMoDiffuse \cite{DBLP:conf/iccv/ZhangGPCHLYL23} breaks down intricate explanations into anatomical text scripts. To improve outcomes, ASAP \cite{ahmed2024automatic} adds exemplar tuples—which include input code, function definitions, analysis findings, and related comments—to prompts. CEDAR \cite{DBLP:conf/icse/NashidSM23} arranges code demonstration, question, and natural language instructions into a prompt using a pre-made prompt template. Translation pairs are added by XRICL \cite{DBLP:conf/emnlp/0010Z0L22} using COT technology as a transitional stage in cross-linguistic semantic parsing and inference. The Cognition Nexus method is used by ACTIVERAG \cite{ActiveRAG} to calibrate LLMs' internal cognition, and COT prompt is applied when generating answers. Other modalities can be used as input by Make-An-Audio \cite{DBLP:conf/icml/HuangHY0LLYLYZ23}, which can yield far more detailed data for the process that follows.
+
+\textit{Generator Finetuning:} Among other changes, decoding tuning entails improving generator control by adjusting hyperparameters for greater variability and limiting the output vocabulary.
+
+InferFix \cite{DBLP:conf/sigsoft/JinSTSLSS23} modifies the decoder's temperature to balance the variety and calibre of returns. SYNCHROMESH \cite{DBLP:conf/iclr/PoesiaP00SMG22} uses a completion engine to remove implementation flaws and restricts the decoder's output vocabulary. Finetuning the generator can improve the model's capacity to suit the retriever more accurately or have more exact domain knowledge.
+
+RETRO combines the content of the query and retriever by fixing the retriever's parameters and using the chunked cross attention mechanism in the generator. The generator CODEGEN-MONO 350M \cite{CODEGEN-MONO} is improved by API-Coder \cite{DBLP:conf/icse/NashidSM23} using a shuffled new file along with code blocks and API metadata. While maintaining the encoders and retriever fixed, CARE \cite{DBLP:journals/tip/YangCZ23} trains encoders using picture, audio, and video-text pairings before optimising the decoder (generator) to concurrently decrease caption and concept identification loss. After using picture data to optimise the video generator, Animate-AStory \cite{DBLP:journals/corr/abs-2307-06940} fine tunes a LoRA \cite{LoRA} adaptor to capture the specifics of the character's appearance. RetDream \cite{DBLP:journals/corr/abs-2402-02972} uses the produced images to refine a LoRA adaptor \cite{LoRA}.
+
+\subsection{Result Enhancement}
+In many situations, RAG results might not have the desired impact; nevertheless, there are methods for improving results that can assist mitigate this issue.
+
+Output Rewrite: Rewriting the material produced by the generator in specific situations to satisfy the requirements of activities that come after is known as output rewrite. In order to better match the real-world code context, SARGAM \cite{DBLP:journals/corr/abs-2306-06490} uses a unique Transformer in conjunction with Deletion, Placeholder, and Insertion Classifiers to enhance outputs in code-related activities. By reranking candidates according to the average of the log probabilities generated by the generator for each token, Ring \cite{DBLP:conf/aaai/JoshiSG0VR23} is able to acquire diversity outcomes. By matching the created relations with those shown in the knowledge graph's immediate neighbourhood of the query entity, CBRKBQA \cite{DBLP:conf/emnlp/DasZTGPLTPM21} updates the outcome.
+
+\subsection{RAG Pipeline Enhancement}
+RAG pipeline augmentation is the process of streamlining the entire RAG process to improve performance outcomes.
+
+Adaptive Retrieval: According to certain RAG research, retrieval doesn't always improve the outcome. When the model's intrinsic parameterised information is sufficient to address pertinent concerns, over-retrieval may result in resource waste and possible misunderstanding. Thus, rule-based and model-based techniques to assessing retrieval requirement will be covered in this subsection.
+
+\textit{Rule Based:} Using probability, FLARE \cite{FLARE} actively determines when and whether to search during the generating process. To calculate the percentage of generation and retrieval, Efficient-KNNLM \cite{Efficient-KNNLM} includes the generation probability of KNN-LM \cite{KNN-LM} and NPM \cite{NPM} along with a hyperparameter $\lambda$.
+
+For high-level questions, Mallen et al. \cite{Adaptive-Retrieval-whennottrust} used statistical analysis to provide accurate answers, but for low-frequency questions they used RAG. Jiang et al. \cite{lm-calibration} assessed model confidence using fit statistics, model uncertainty, and fit uncertainty to inform regression choices. In order to determine whether the deduction is appropriate, Kandpal et al. \cite{LLM_Struggle_to_Learn_Long-Tail_Knowledge} investigated the relationship between the amount of relevant text and the comprehension of model knowledge.
+
+\textit{Model-based:} In order to decide whether to execute a retrieval based on the retrieve token under various user queries, Self-RAG \cite{Self-RAG} makes use of a trained generator. Ren et al. \cite{LLM-Knowledge-Boundary} employed ”Judgement Prompting” to assess LLMs' ability to respond to pertinent queries and the accuracy of their responses, which helped determine if a retrieval was required.
+
+SKR \cite{SKR} makes use of LLMs' inherent capacity to determine beforehand whether they are able to respond to the inquiry; if they do, no retrieval is necessary. In order to ascertain whether information retrieval is necessary, Rowen \cite{Rowen} translates a query into several languages and verifies that the responses are consistent across these languages. AdaptiveRAG \cite{AdaptiveRAG} uses a classifier, which is a smaller LM, to dynamically determine whether to retrieve based on the query difficulty.
+
+\textit{Iterative RAG:} Instead of using a single round, iterative RAG cycles through the retrieval and creation phases again to gradually improve results.
+
+In order to effectively utilise scattered data and enhance results, RepoCoder \cite{DBLP:conf/emnlp/ZhangCZKLZMLC23} refines queries using previously created code through an iterative retrieval-generation approach to code completion. By employing the generator's output to identify knowledge gaps, retrieve pertinent data, and inform subsequent generation cycles, ITER-RETGEN \cite{ITER-RETGEN} iteratively improves the quality of the content. Using an iterative retrieval-augmented generator, SelfMemory \cite{SelfMemory} creates a large memory pool from which a memory selector selects an output to feed the subsequent generation cycle. RAT \cite{RAT} uses a zero-shot CoT prompt to first generate material by an LLM, then retrieves information from an external knowledge store to update each thinking step.
+
+\section{Discussion}
+Despute the widespread adoption of RAG, it suffers from several limitations by design.
+
+\subsection{Noises in Retrieval Results}
+Information loss in item representations and ANN search makes information retrieval fundamentally faulty. RAG systems may experience failure points due to the unavoidable noise, which may appear as irrelevant content or false information \cite{DBLP:journals/corr/abs-2401-05856}. Nevertheless, current research surprisingly discovers that noisy retrieval results may improve generation quality, even while increasing retrieval accuracy seems obvious for RAG efficacy \cite{DBLP:journals/corr/abs-2401-14887}. One explanation is that quick building may be facilitated by a variety of retrieval outcomes \cite{qiu2022evaluating}. As a result, it is unclear how retrieval noise affects real applications, which causes misunderstandings regarding metric selection and retriever-generator interaction
+
+\subsection{Extra Overhead}
+In most situations, retrieval has non-negligible overhead, even if it can occasionally lower generating costs \cite{Atlas,MemTransformer2022,REST}. Stated differently, delay is necessarily increased by the retrieval and interaction operations. This is enhanced when RAG is used in conjunction with sophisticated enhancing techniques like iterative RAG \cite{DBLP:conf/emnlp/ZhangCZKLZMLC23} and recursive retrieval \cite{Query_Expansion_by_Prompting_LLMs}. Moreover, the complexity of access and storage will rise in tandem with the size of retrieval sources \cite{EA}. The usefulness of RAG for latency-sensitive real-time systems is severely hampered by this overhead.
+
+\subsection{The Gap between Generators and Retrievers}
+The interplay between retrievers and generators necessitates careful design and optimisation because their latent spaces and goals may not coincide. Present methods either separate generation and retrieval or combine them in a middle stage. The latter could gain from combined training but hinder generality, whereas the former is more modular. Choosing an affordable engagement strategy to close the gap is difficult and requires careful consideration in real-world situations.
+
+\subsection{Increased System Complexity}
+The complexity of the system and the amount of hyper-parameters to adjust inevitably rise with the addition of retrieval. In query-based RAG, for example, a recent study discovered that employing top-k rather than a single retrieval enhances attribution but degrades fluency \cite{DBLP:journals/corr/abs-2302-05578}. Other factors, such metric selection, are yet not fully investigated. Therefore, when RAG is involved, tuning the generation service calls for greater skill.
+
+\subsection{Lengthy Context}
+RAG's enormous context lengthening, especially the query-based RAG, is one of its main drawbacks, rendering it unworkable for generators with constrained context length. Furthermore, the extended context often slows down the creation process. These issues have been somewhat alleviated by research developments in long-context support \cite{DBLP:journals/corr/abs-2308-16137} and quick compression \cite{LLMLingua}, but at a minor cost or accuracy trade-off.
+
+\section{Conclusion}
+This case study discussed about an extensive and in-depth analysis of RAG in the framework of AIGC, highlighting special attention to the applications, improvements, and foundations of augmentation. We started by methodically classifying and summarising the fundamental RAG concepts, offering insights into how retrievers and generators interact. Next, we looked at the improvements made to RAG that increase its efficacy even more, whether they were made to the pipeline as a whole or to individual components. We demonstrated real-world RAG implementations in a variety of tasks and modalities to aid researchers from a wide range of fields.
+
+\bibliography{refs}{}
+
+\bibliographystyle{IEEEtran}{}
+
+
+\end{document}
diff --git a/refs.bib b/refs.bib
new file mode 100644
index 0000000..759567c
--- /dev/null
+++ b/refs.bib
@@ -0,0 +1,2016 @@
+
+% Journals
+
+% First the Full Name is given, then the abbreviation used in the AMS Math
+% Reviews, with an indication if it could not be found there.
+% Note the 2nd overwrites the 1st, so swap them if you want the full name.
+
+ %{AMS}
+ @String{AMSTrans = "American Mathematical Society Translations" }
+ @String{AMSTrans = "Amer. Math. Soc. Transl." }
+ @String{BullAMS = "Bulletin of the American Mathematical Society" }
+ @String{BullAMS = "Bull. Amer. Math. Soc." }
+ @String{ProcAMS = "Proceedings of the American Mathematical Society" }
+ @String{ProcAMS = "Proc. Amer. Math. Soc." }
+ @String{TransAMS = "Transactions of the American Mathematical Society" }
+ @String{TransAMS = "Trans. Amer. Math. Soc." }
+
+ %ACM
+ @String{CACM = "Communications of the {ACM}" }
+ @String{CACM = "Commun. {ACM}" }
+ @String{CompServ = "Comput. Surveys" }
+ @String{JACM = "J. ACM" }
+ @String{ACMMathSoft = "{ACM} Transactions on Mathematical Software" }
+ @String{ACMMathSoft = "{ACM} Trans. Math. Software" }
+ @String{SIGNUM = "{ACM} {SIGNUM} Newsletter" }
+ @String{SIGNUM = "{ACM} {SIGNUM} Newslett." }
+
+ @String{AmerSocio = "American Journal of Sociology" }
+ @String{AmerStatAssoc = "Journal of the American Statistical Association" }
+ @String{AmerStatAssoc = "J. Amer. Statist. Assoc." }
+ @String{ApplMathComp = "Applied Mathematics and Computation" }
+ @String{ApplMathComp = "Appl. Math. Comput." }
+ @String{AmerMathMonthly = "American Mathematical Monthly" }
+ @String{AmerMathMonthly = "Amer. Math. Monthly" }
+ @String{BIT = "{BIT}" }
+ @String{BritStatPsych = "British Journal of Mathematical and Statistical
+ Psychology" }
+ @String{BritStatPsych = "Brit. J. Math. Statist. Psych." }
+ @String{CanMathBull = "Canadian Mathematical Bulletin" }
+ @String{CanMathBull = "Canad. Math. Bull." }
+ @String{CompApplMath = "Journal of Computational and Applied Mathematics" }
+ @String{CompApplMath = "J. Comput. Appl. Math." }
+ @String{CompPhys = "Journal of Computational Physics" }
+ @String{CompPhys = "J. Comput. Phys." }
+ @String{CompStruct = "Computers and Structures" }
+ @String{CompStruct = "Comput. \& Structures" }
+ @String{CompJour = "The Computer Journal" }
+ @String{CompJour = "Comput. J." }
+ @String{CompSysSci = "Journal of Computer and System Sciences" }
+ @String{CompSysSci = "J. Comput. System Sci." }
+ @String{Computing = "Computing" }
+ @String{ContempMath = "Contemporary Mathematics" }
+ @String{ContempMath = "Contemp. Math." }
+ @String{Crelle = "Crelle's Journal" }
+ @String{GiornaleMath = "Giornale di Mathematiche" }
+ @String{GiornaleMath = "Giorn. Mat." } % didn't find in AMS MR., ibid.
+
+ %IEEE
+ @String{Computer = "{IEEE} Computer" }
+ @String{IEEETransComp = "{IEEE} Transactions on Computers" }
+ @String{IEEETransComp = "{IEEE} Trans. Comput." }
+ @String{IEEETransAC = "{IEEE} Transactions on Automatic Control" }
+ @String{IEEETransAC = "{IEEE} Trans. Automat. Control" }
+ @String{IEEESpec = "{IEEE} Spectrum" } % didn't find in AMS MR
+ @String{ProcIEEE = "Proceedings of the {IEEE}" }
+ @String{ProcIEEE = "Proc. {IEEE}" } % didn't find in AMS MR
+ @String{IEEETransAeroElec = "{IEEE} Transactions on Aerospace and Electronic
+ Systems" }
+ @String{IEEETransAeroElec = "{IEEE} Trans. Aerospace Electron. Systems" }
+
+ @String{IMANumerAna = "{IMA} Journal of Numerical Analysis" }
+ @String{IMANumerAna = "{IMA} J. Numer. Anal." }
+ @String{InfProcLet = "Information Processing Letters" }
+ @String{InfProcLet = "Inform. Process. Lett." }
+ @String{InstMathApp = "Journal of the Institute of Mathematics and
+ its Applications" }
+ @String{InstMathApp = "J. Inst. Math. Appl." }
+ @String{IntControl = "International Journal of Control" }
+ @String{IntControl = "Internat. J. Control" }
+ @String{IntNumerEng = "International Journal for Numerical Methods in
+ Engineering" }
+ @String{IntNumerEng = "Internat. J. Numer. Methods Engrg." }
+ @String{IntSuper = "International Journal of Supercomputing Applications" }
+ @String{IntSuper = "Internat. J. Supercomputing Applic." } % didn't find
+%% in AMS MR
+ @String{Kibernetika = "Kibernetika" }
+ @String{JResNatBurStand = "Journal of Research of the National Bureau
+ of Standards" }
+ @String{JResNatBurStand = "J. Res. Nat. Bur. Standards" }
+ @String{LinAlgApp = "Linear Algebra and its Applications" }
+ @String{LinAlgApp = "Linear Algebra Appl." }
+ @String{MathAnaAppl = "Journal of Mathematical Analysis and Applications" }
+ @String{MathAnaAppl = "J. Math. Anal. Appl." }
+ @String{MathAnnalen = "Mathematische Annalen" }
+ @String{MathAnnalen = "Math. Ann." }
+ @String{MathPhys = "Journal of Mathematical Physics" }
+ @String{MathPhys = "J. Math. Phys." }
+ @String{MathComp = "Mathematics of Computation" }
+ @String{MathComp = "Math. Comp." }
+ @String{MathScand = "Mathematica Scandinavica" }
+ @String{MathScand = "Math. Scand." }
+ @String{TablesAidsComp = "Mathematical Tables and Other Aids to Computation" }
+ @String{TablesAidsComp = "Math. Tables Aids Comput." }
+ @String{NumerMath = "Numerische Mathematik" }
+ @String{NumerMath = "Numer. Math." }
+ @String{PacificMath = "Pacific Journal of Mathematics" }
+ @String{PacificMath = "Pacific J. Math." }
+ @String{ParDistComp = "Journal of Parallel and Distributed Computing" }
+ @String{ParDistComp = "J. Parallel and Distrib. Comput." } % didn't find
+%% in AMS MR
+ @String{ParComputing = "Parallel Computing" }
+ @String{ParComputing = "Parallel Comput." }
+ @String{PhilMag = "Philosophical Magazine" }
+ @String{PhilMag = "Philos. Mag." }
+ @String{ProcNAS = "Proceedings of the National Academy of Sciences
+ of the USA" }
+ @String{ProcNAS = "Proc. Nat. Acad. Sci. U. S. A." }
+ @String{Psychometrika = "Psychometrika" }
+ @String{QuartMath = "Quarterly Journal of Mathematics, Oxford, Series (2)" }
+ @String{QuartMath = "Quart. J. Math. Oxford Ser. (2)" }
+ @String{QuartApplMath = "Quarterly of Applied Mathematics" }
+ @String{QuartApplMath = "Quart. Appl. Math." }
+ @String{RevueInstStat = "Review of the International Statisical Institute" }
+ @String{RevueInstStat = "Rev. Inst. Internat. Statist." }
+
+ %SIAM
+ @String{JSIAM = "Journal of the Society for Industrial and Applied
+ Mathematics" }
+ @String{JSIAM = "J. Soc. Indust. Appl. Math." }
+ @String{JSIAMB = "Journal of the Society for Industrial and Applied
+ Mathematics, Series B, Numerical Analysis" }
+ @String{JSIAMB = "J. Soc. Indust. Appl. Math. Ser. B Numer. Anal." }
+ @String{SIAMAlgMeth = "{SIAM} Journal on Algebraic and Discrete Methods" }
+ @String{SIAMAlgMeth = "{SIAM} J. Algebraic Discrete Methods" }
+ @String{SIAMAppMath = "{SIAM} Journal on Applied Mathematics" }
+ @String{SIAMAppMath = "{SIAM} J. Appl. Math." }
+ @String{SIAMComp = "{SIAM} Journal on Computing" }
+ @String{SIAMComp = "{SIAM} J. Comput." }
+ @String{SIAMMatrix = "{SIAM} Journal on Matrix Analysis and Applications" }
+ @String{SIAMMatrix = "{SIAM} J. Matrix Anal. Appl." }
+ @String{SIAMNumAnal = "{SIAM} Journal on Numerical Analysis" }
+ @String{SIAMNumAnal = "{SIAM} J. Numer. Anal." }
+ @String{SIAMReview = "{SIAM} Review" }
+ @String{SIAMReview = "{SIAM} Rev." }
+ @String{SIAMSciStat = "{SIAM} Journal on Scientific and Statistical
+ Computing" }
+ @String{SIAMSciStat = "{SIAM} J. Sci. Statist. Comput." }
+
+ @String{SoftPracExp = "Software Practice and Experience" }
+ @String{SoftPracExp = "Software Prac. Experience" } % didn't find in AMS MR
+ @String{StatScience = "Statistical Science" }
+ @String{StatScience = "Statist. Sci." }
+ @String{Techno = "Technometrics" }
+ @String{USSRCompMathPhys = "{USSR} Computational Mathematics and Mathematical
+ Physics" }
+ @String{USSRCompMathPhys = "{U. S. S. R.} Comput. Math. and Math. Phys." }
+ @String{VLSICompSys = "Journal of {VLSI} and Computer Systems" }
+ @String{VLSICompSys = "J. {VLSI} Comput. Syst." }
+ @String{ZAngewMathMech = "Zeitschrift fur Angewandte Mathematik und
+ Mechanik" }
+ @String{ZAngewMathMech = "Z. Angew. Math. Mech." }
+ @String{ZAngewMathPhys = "Zeitschrift fur Angewandte Mathematik und Physik" }
+ @String{ZAngewMathPhys = "Z. Angew. Math. Phys." }
+
+% Publishers % ================================================= |
+
+ @String{Academic = "Academic Press" }
+ @String{ACMPress = "{ACM} Press" }
+ @String{AdamHilger = "Adam Hilger" }
+ @String{AddisonWesley = "Addison-Wesley" }
+ @String{AllynBacon = "Allyn and Bacon" }
+ @String{AMS = "American Mathematical Society" }
+ @String{Birkhauser = "Birkha{\"u}ser" }
+ @String{CambridgePress = "Cambridge University Press" }
+ @String{Chelsea = "Chelsea" }
+ @String{ClaredonPress = "Claredon Press" }
+ @String{DoverPub = "Dover Publications" }
+ @String{Eyolles = "Eyolles" }
+ @String{HoltRinehartWinston = "Holt, Rinehart and Winston" }
+ @String{Interscience = "Interscience" }
+ @String{JohnsHopkinsPress = "The Johns Hopkins University Press" }
+ @String{JohnWileySons = "John Wiley and Sons" }
+ @String{Macmillan = "Macmillan" }
+ @String{MathWorks = "The Math Works Inc." }
+ @String{McGrawHill = "McGraw-Hill" }
+ @String{NatBurStd = "National Bureau of Standards" }
+ @String{NorthHolland = "North-Holland" }
+ @String{OxfordPress = "Oxford University Press" } %address Oxford or London?
+ @String{PergamonPress = "Pergamon Press" }
+ @String{PlenumPress = "Plenum Press" }
+ @String{PrenticeHall = "Prentice-Hall" }
+ @String{SIAMPub = "{SIAM} Publications" }
+ @String{Springer = "Springer-Verlag" }
+ @String{TexasPress = "University of Texas Press" }
+ @String{VanNostrand = "Van Nostrand" }
+ @String{WHFreeman = "W. H. Freeman and Co." }
+
+%Entries
+
+@inproceedings{DBLP:conf/nips/BrownMRSKDNSSAA20,
+ author = {Tom B. Brown and
+ Benjamin Mann and others},
+ title = {Language Models are Few-Shot Learners},
+ booktitle = {NeurIPS},
+ year = {2020}
+}
+
+@article{DBLP:journals/corr/abs-2107-03374,
+ author = {Mark Chen and
+ Jerry Tworek and others},
+ title = {Evaluating Large Language Models Trained on Code},
+ journal = {arXiv:2107.03374},
+ year = {2021}
+}
+
+@article{DBLP:journals/corr/abs-2303-08774,
+ author = {OpenAI},
+ title = {{GPT-4} Technical Report},
+ journal = {arXiv:2303.08774},
+ year = {2023}
+}
+
+@article{LLaMA,
+ author = {Hugo Touvron and
+ Thibaut Lavril and others},
+ title = {LLaMA: Open and Efficient Foundation Language Models},
+ journal = {arXiv:2302.13971},
+ year = {2023}
+}
+
+@article{DBLP:journals/corr/abs-2307-09288,
+ author = {Hugo Touvron and
+ Louis Martin and others},
+ title = {Llama 2: Open Foundation and Fine-Tuned Chat Models},
+ journal = {arXiv:2307.09288},
+ year = {2023}
+}
+
+@article{DBLP:journals/corr/abs-2308-12950,
+ author = {Baptiste Rozi{\`{e}}re and
+ Jonas Gehring and others},
+ title = {Code Llama: Open Foundation Models for Code},
+ journal = {arXiv:2308.12950},
+ year = {2023}
+}
+
+@inproceedings{DBLP:conf/icml/RameshPGGVRCS21,
+ author = {Aditya Ramesh and
+ Mikhail Pavlov and
+ Gabriel Goh and others},
+ title = {Zero-Shot Text-to-Image Generation},
+ booktitle = {{ICML}},
+ year = {2021}
+}
+
+@article{DBLP:journals/corr/abs-2204-06125,
+ author = {Aditya Ramesh and
+ Prafulla Dhariwal and
+ Alex Nichol and others},
+ title = {Hierarchical Text-Conditional Image Generation with {CLIP} Latents},
+ journal = {arXiv:2204.06125},
+ year = {2022}
+}
+
+@article{betker2023improving,
+ title={Improving image generation with better captions},
+ author={Betker, James and Goh, Gabriel and Jing, Li and others},
+ journal={Computer Science},
+ volume={2},
+ number={3},
+ pages={8},
+ year={2023}
+}
+
+@inproceedings{DBLP:conf/cvpr/RombachBLEO22,
+ author = {Robin Rombach and
+ Andreas Blattmann and
+ Dominik Lorenz and others},
+ title = {High-Resolution Image Synthesis with Latent Diffusion Models},
+ booktitle = {{IEEE/CVF}},
+ year = {2022}
+}
+
+@misc{openai/sora,
+ author = {OpenAI},
+ year = {2024},
+ title = {Video generation models as world simulators},
+ howpublished = {\url{https://openai.com/research/video-generation-models-as-world-simulators}},
+}
+
+@article{GAN,
+ title={Generative adversarial networks},
+ author={Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and others},
+ journal={CACM},
+ volume={63},
+ number={11},
+ pages={139--144},
+ year={2020}
+}
+
+@article{DBLP:journals/neco/HochreiterS97,
+ author = {Sepp Hochreiter and
+ J{\"{u}}rgen Schmidhuber},
+ title = {Long Short-Term Memory},
+ journal = {Neural Comput.},
+ volume = {9},
+ number = {8},
+ pages = {1735--1780},
+ year = {1997}
+}
+
+@inproceedings{DBLP:conf/nips/VaswaniSPUJGKP17,
+ author = {Ashish Vaswani and
+ Noam Shazeer and
+ Niki Parmar and others},
+ title = {Attention is All you Need},
+ booktitle = {NeurIPS},
+ year = {2017}
+}
+
+@inproceedings{DBLP:conf/iclr/GuoRLFT0ZDSFTDC21,
+ author = {Daya Guo and
+ Shuo Ren and others},
+ title = {GraphCodeBERT: Pre-training Code Representations with Data Flow},
+ booktitle = {ICLR},
+ year = {2021}
+}
+
+@article{DBLP:journals/jmlr/RaffelSRLNMZLL20,
+ author = {Colin Raffel and
+ Noam Shazeer and
+ Adam Roberts and others},
+ title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text
+ Transformer},
+ journal = {JMLR},
+ volume = {21},
+ pages = {140:1--140:67},
+ year = {2020}
+}
+
+@article{Switch_transformers,
+ title={Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity},
+ author={Fedus, William and Zoph, Barret and Shazeer, Noam},
+ journal={JMLR},
+ volume={23},
+ number={120},
+ pages={1--39},
+ year={2022}
+}
+
+@article{scalingLaw,
+ author = {Jared Kaplan and
+ Sam McCandlish and
+ Tom Henighan and others},
+ title = {Scaling Laws for Neural Language Models},
+ year = {2020},
+ eprinttype = {arXiv}
+}
+
+@article{DBLP:journals/ftir/RobertsonZ09,
+ author = {Stephen E. Robertson and
+ Hugo Zaragoza},
+ title = {The Probabilistic Relevance Framework: {BM25} and Beyond},
+ journal = {FTIR},
+ volume = {3},
+ number = {4},
+ pages = {333--389},
+ year = {2009}
+}
+
+@inproceedings{DBLP:conf/emnlp/KarpukhinOMLWEC20,
+ author = {Vladimir Karpukhin and
+ Barlas Oguz and
+ Sewon Min and others},
+ title = {Dense Passage Retrieval for Open-Domain Question Answering},
+ booktitle = {{EMNLP}},
+ year = {2020}
+}
+
+@article{DBLP:journals/tbd/JohnsonDJ21,
+ author = {Jeff Johnson and
+ Matthijs Douze and
+ Herv{\'{e}} J{\'{e}}gou},
+ title = {Billion-Scale Similarity Search with GPUs},
+ journal = {{IEEE} Trans. Big Data},
+ volume = {7},
+ number = {3},
+ pages = {535--547},
+ year = {2021}
+}
+
+@inproceedings{DBLP:conf/nips/ChenZWLLLYW21,
+ author = {Qi Chen and
+ Bing Zhao and
+ Haidong Wang and others},
+ title = {{SPANN:} Highly-efficient Billion-scale Approximate Nearest Neighborhood
+ Search},
+ booktitle = {NeurIPS},
+ year = {2021}
+}
+
+@article{DBLP:journals/csur/DattaJLW08,
+ author = {Ritendra Datta and
+ Dhiraj Joshi and
+ Jia Li and others},
+ title = {Image retrieval: Ideas, influences, and trends of the new age},
+ journal = {CSUR},
+ volume = {40},
+ number = {2},
+ pages = {5:1--5:60},
+ year = {2008}
+}
+
+@inproceedings{radford2021learning,
+ title={Learning transferable visual models from natural language supervision},
+ author={Radford, Alec and Kim, Jong Wook and Hallacy, Chris and others},
+ booktitle={ICML},
+ year={2021}
+}
+
+@inproceedings{DBLP:conf/emnlp/FengGTDFGS0LJZ20,
+ author = {Zhangyin Feng and
+ Daya Guo and others},
+ title = {CodeBERT: {A} Pre-Trained Model for Programming and Natural Languages},
+ booktitle = {EMNLP Findings},
+ year = {2020}
+}
+
+@inproceedings{DBLP:conf/icassp/WuCZHBD23,
+ author = {Yusong Wu and
+ Ke Chen and
+ Tianyu Zhang and others},
+ title = {Large-Scale Contrastive Language-Audio Pretraining with Feature Fusion
+ and Keyword-to-Caption Augmentation},
+ booktitle = {ICASSP},
+ year = {2023}
+}
+
+@inproceedings{Adaptive-Retrieval-whennottrust,
+ author = {Alex Mallen and
+ Akari Asai and
+ Victor Zhong and others},
+ title = {When Not to Trust Language Models: Investigating Effectiveness of
+ Parametric and Non-Parametric Memories},
+ booktitle = {ACL},
+ year = {2023},
+
+}
+
+@inproceedings{DBLP:conf/uss/CarliniTWJHLRBS21,
+ author = {Nicholas Carlini and
+ Florian Tram{\`{e}}r and others},
+ title = {Extracting Training Data from Large Language Models},
+ booktitle = {{USENIX}},
+ year = {2021}
+}
+
+@article{C-RAG,
+ author = {Mintong Kang and
+ Nezihe Merve G{\"{u}}rel and others},
+ title = {{C-RAG:} Certified Generation Risks for Retrieval-Augmented Language
+ Models},
+ journal = {arXiv:2402.03181},
+ year = {2024}
+}
+
+@article{Atlas,
+ author={Izacard, Gautier and Lewis, Patrick and Lomeli, Maria and others},
+ title={Atlas: Few-shot learning with retrieval augmented language models},
+ journal={arXiv:2208.03299},
+ year={2022}
+}
+
+@inproceedings{MemTransformer2022,
+ author = {Yuhuai Wu and
+ Markus Norman Rabe and
+ DeLesley Hutchins and
+ Christian Szegedy},
+ title = {Memorizing Transformers},
+ booktitle = {ICLR},
+ year = {2022},
+
+}
+
+@article{REST,
+ author = {Zhenyu He and
+ Zexuan Zhong and
+ Tianle Cai and others},
+ title = {{REST:} Retrieval-Based Speculative Decoding},
+ journal = {arxiv:2311.08252},
+ year = {2023}
+}
+
+@article{REALM,
+ author = {Kelvin Guu and
+ Kenton Lee and
+ Zora Tung and others},
+ title = {{REALM:} Retrieval-Augmented Language Model Pre-Training},
+ journal = {ICML},
+ year = {2020}
+}
+
+@inproceedings{2020RAG,
+ author = {Patrick S. H. Lewis and
+ Ethan Perez and
+ Aleksandra Piktus and others},
+ title = {Retrieval-Augmented Generation for Knowledge-Intensive {NLP} Tasks},
+ booktitle = {NeurIPS},
+ year = {2020},
+}
+
+@inproceedings{FID,
+ author = {Gautier Izacard and
+ Edouard Grave},
+ title = {Leveraging Passage Retrieval with Generative Models for Open Domain Question Answering},
+ booktitle = {{EACL}},
+ year = {2021}
+}
+
+@inproceedings{RETRO,
+ author = {Sebastian Borgeaud and
+ Arthur Mensch and others},
+ title = {Improving Language Models by Retrieving from Trillions of Tokens},
+ booktitle = {{ICML}},
+ year = {2022}
+}
+
+@inproceedings{KNN-LM,
+ author = {Urvashi Khandelwal and
+ Omer Levy and
+ Dan Jurafsky and others},
+ title = {Generalization through Memorization: Nearest Neighbor Language Models},
+ booktitle = {ICLR},
+ year = {2020},
+}
+
+@inproceedings{Efficient-KNNLM,
+ author = {Junxian He and
+ Graham Neubig and
+ Taylor Berg{-}Kirkpatrick},
+ title = {Efficient Nearest Neighbor Language Models},
+ booktitle = {EMNLP},
+ year = {2021},
+
+}
+
+@online{GPTCache,
+ author = {zilliztech},
+ title = {GPTCache},
+ year = 2023,
+ url = {https://github.com/zilliztech/GPTCache},
+ }
+
+ @inproceedings{DBLP:conf/emnlp/ParvezACRC21,
+ author = {Md. Rizwan Parvez and
+ Wasi Uddin Ahmad and others},
+ title = {Retrieval Augmented Code Generation and Summarization},
+ booktitle = {EMNLP Findings},
+ year = {2021}
+}
+
+@inproceedings{DBLP:conf/naacl/AhmadCRC21,
+ author = {Wasi Uddin Ahmad and
+ Saikat Chakraborty and
+ Baishakhi Ray and others},
+ title = {Unified Pre-training for Program Understanding and Generation},
+ booktitle = {NAACL-HLT},
+ year = {2021}
+}
+
+@inproceedings{DBLP:conf/iclr/Zhou0XJN23,
+ author = {Shuyan Zhou and
+ Uri Alon and
+ Frank F. Xu and others},
+ title = {DocPrompting: Generating Code by Retrieving the Docs},
+ booktitle = {ICLR},
+ year = {2023}
+}
+
+@article{DBLP:journals/corr/abs-2012-07331,
+ title={Audio captioning using pre-trained large-scale language model guided by audio-based similar caption retrieval},
+ author={Koizumi, Yuma and Ohishi, Yasunori and others},
+ journal={arXiv:2012.07331},
+ year={2020}
+}
+
+@inproceedings{DBLP:conf/icml/HuangHY0LLYLYZ23,
+ author = {Rongjie Huang and
+ Jiawei Huang and
+ Dongchao Yang and others},
+ title = {Make-An-Audio: Text-To-Audio Generation with Prompt-Enhanced Diffusion
+ Models},
+ booktitle = {ICML},
+ year = {2023}
+}
+
+@inproceedings{tseng2020retrievegan,
+ title={Retrievegan: Image synthesis via differentiable patch retrieval},
+ author={Tseng, Hung-Yu and Lee, Hsin-Ying and others},
+ booktitle={ECCV},
+ year={2020}
+}
+
+@inproceedings{sarto2022retrieval,
+ title={Retrieval-augmented transformer for image captioning},
+ author={Sarto, Sara and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita},
+ booktitle={CBMI},
+ year={2022}
+}
+
+@inproceedings{ramos2023smallcap,
+ title={SmallCap: lightweight image captioning prompted with retrieval augmentation},
+ author={Ramos, Rita and Martins, Bruno and others},
+ booktitle={CVPR},
+ year={2023}
+}
+
+@article{DBLP:journals/tomccap/ChenPLYCM23,
+ author = {Jingwen Chen and
+ Yingwei Pan and
+ Yehao Li and others},
+ title = {Retrieval Augmented Convolutional Encoder-decoder Networks for Video
+ Captioning},
+ journal = {TOMCCAP},
+ volume = {19},
+ number = {1s},
+ pages = {48:1--48:24},
+ year = {2023}
+}
+
+@article{DBLP:journals/corr/abs-2401-00789,
+ author = {Jilan Xu and
+ Yifei Huang and
+ Junlin Hou and others},
+ title = {Retrieval-Augmented Egocentric Video Captioning},
+ journal = {arXiv:2401.00789},
+ year = {2024}
+}
+
+@article{DBLP:journals/corr/abs-2402-02972,
+ title={Retrieval-augmented score distillation for text-to-3d generation},
+ author={Seo, Junyoung and Hong, Susung and others},
+ journal={arXiv:2402.02972},
+ year={2024}
+}
+
+@inproceedings{DBLP:conf/iccv/ZhangGPCHLYL23,
+ author = {Mingyuan Zhang and
+ Xinying Guo and
+ Liang Pan and others},
+ title = {ReMoDiffuse: Retrieval-Augmented Motion Diffusion Model},
+ booktitle = {ICCV},
+ year = {2023}
+}
+
+@inproceedings{DBLP:conf/coling/HuWSQ22,
+ author = {Xixin Hu and
+ Xuan Wu and
+ Yiheng Shu and
+ Yuzhong Qu},
+ title = {Logical Form Generation via Multi-task Learning for Complex Question Answering over Knowledge Bases},
+ booktitle = {COLING},
+ year = {2022}
+}
+
+@inproceedings{DBLP:conf/emnlp/HuangKZ21,
+ author = {Xin Huang and
+ Jung{-}Jae Kim and
+ Bowei Zou},
+ title = {Unseen Entity Handling in Complex Question Answering over Knowledge Base via Language Generation},
+ booktitle = {EMNLP Findings},
+ year = {2021}
+}
+
+@inproceedings{DBLP:conf/emnlp/DasZTGPLTPM21,
+ author = {Rajarshi Das and
+ Manzil Zaheer and
+ Dung Thai and others},
+ title = {Case-based Reasoning for Natural Language Queries over Knowledge Bases},
+ booktitle = {EMNLP},
+ year = {2021}
+}
+
+@inproceedings{wang2022retrieval,
+ title={Retrieval-based Controllable Molecule Generation},
+ author={Wang, Zichao and Nie, Weili and Qiao, Zhuoran and others},
+ booktitle={ICLR},
+ year={2022}
+}
+
+@article{jin2023genegpt,
+ title={Genegpt: Augmenting large language models with domain tools for improved access to biomedical information},
+ author={Jin, Qiao and Yang, Yifan and Chen, Qingyu and Lu, Zhiyong},
+ journal={Bioinformatics},
+ volume={40},
+ number={2},
+ pages={btae075},
+ year={2024}
+}
+
+@article{DBLP:journals/corr/abs-2202-01110,
+ author = {Huayang Li and
+ Yixuan Su and
+ Deng Cai and others},
+ title = {A Survey on Retrieval-Augmented Text Generation},
+ journal = {arxiv:2202.01110},
+ year = {2022}
+}
+
+@article{retrieval-lm-tutorial,
+ author = { Asai, Akari and Min, Sewon and Zhong, Zexuan and Chen, Danqi },
+ title = { ACL 2023 Tutorial: Retrieval-based Language Models and Applications },
+ journal = { ACL 2023 },
+ year = { 2023 },
+}
+
+@article{DBLP:journals/corr/abs-2312-10997,
+ author = {Yunfan Gao and
+ Yun Xiong and others},
+ title = {Retrieval-Augmented Generation for Large Language Models: {A} Survey},
+ journal = {arxiv:2312.10997},
+ year = {2023}
+}
+
+@inproceedings{DBLP:conf/emnlp/ZhaoCWJLQDGLLJ23,
+ author = {Ruochen Zhao and
+ Hailin Chen and others},
+ title = {Retrieving Multimodal Information for Augmented Generation: {A} Survey},
+ booktitle = {{EMNLP}},
+ year = {2023}
+}
+
+@article{ding2024survey,
+ title={A Survey on RAG Meets LLMs: Towards Retrieval-Augmented Large Language Models},
+ author={Ding, Yujuan and Fan, Wenqi and others},
+ journal={arXiv:2405.06211},
+ year={2024}
+}
+
+@inproceedings{DBLP:conf/cvpr/ChenGY0E22,
+ author = {Jun Chen and
+ Han Guo and
+ Kai Yi and others},
+ title = {VisualGPT: Data-efficient Adaptation of Pretrained Language Models
+ for Image Captioning},
+ booktitle = {{CVPR}},
+ year = {2022}
+}
+
+@article{EfficientTransformers,
+ author = {Yi Tay and
+ Mostafa Dehghani and
+ Dara Bahri and
+ Donald Metzler},
+ title = {Efficient Transformers: {A} Survey},
+ journal = {CSUR},
+ volume = {55},
+ number = {6},
+ pages = {109:1--109:28},
+ year = {2023}
+}
+
+@article{lstm_survey,
+ author = {Greg Van Houdt and others},
+ title = {A review on the long short-term memory model},
+ journal = {Artif. Intell. Rev.},
+ volume = {53},
+ number = {8},
+ pages = {5929--5955},
+ year = {2020}
+}
+
+@article{yang2023diffsurvey,
+ title={Diffusion models: A comprehensive survey of methods and applications},
+ author={Yang, Ling and Zhang, Zhilong and others},
+ journal={CSUR},
+ volume={56},
+ number={4},
+ pages={1--39},
+ year={2023}
+}
+
+@article{GAN_Survey,
+ author = {Jie Gui and
+ Zhenan Sun and
+ Yonggang Wen and others},
+ title = {A Review on Generative Adversarial Networks: Algorithms, Theory, and
+ Applications},
+ journal = {TKDE},
+ volume = {35},
+ number = {4},
+ pages = {3313--3332},
+ year = {2023}
+}
+
+@inproceedings{DBLP:conf/sigir/RobertsonW97,
+ author = {Stephen E. Robertson and
+ Steve Walker},
+ title = {On Relevance Weights with Little Relevance Information},
+ booktitle = {{SIGIR}},
+ year = {1997}
+}
+
+@inproceedings{DBLP:conf/sigir/LaffertyZ01,
+ author = {John D. Lafferty and
+ ChengXiang Zhai},
+ title = {Document Language Models, Query Models, and Risk Minimization for
+ Information Retrieval},
+ booktitle = {SIGIR},
+ year = {2001}
+}
+
+@inproceedings{DBLP:conf/icassp/HersheyCEGJMPPS17,
+ author = {Shawn Hershey and
+ Sourish Chaudhuri and others},
+ title = {{CNN} architectures for large-scale audio classification},
+ booktitle = {ICASSP},
+ year = {2017}
+}
+
+@inproceedings{DBLP:conf/cvpr/DongLXJH0W19,
+ author = {Jianfeng Dong and
+ Xirong Li and
+ Chaoxi Xu and others},
+ title = {Dual Encoding for Zero-Example Video Retrieval},
+ booktitle = {CVPR},
+ year = {2019},
+}
+
+@inproceedings{DBLP:conf/iclr/XiongXLTLBAO21,
+ author = {Lee Xiong and
+ Chenyan Xiong and
+ Ye Li and others},
+ title = {Approximate Nearest Neighbor Negative Contrastive Learning for Dense
+ Text Retrieval},
+ booktitle = {{ICLR}},
+ year = {2021}
+}
+
+@article{bentley1975multidimensional,
+ title={Multidimensional binary search trees used for associative searching},
+ author={Bentley, Jon Louis},
+ journal={CACM},
+ volume={18},
+ number={9},
+ pages={509--517},
+ year={1975}
+}
+
+@inproceedings{li2023learning,
+ title={Learning balanced tree indexes for large-scale vector retrieval},
+ author={Li, Wuchao and Feng, Chao and Lian, Defu and others},
+ booktitle={SIGKDDg},
+ year={2023}
+}
+
+@inproceedings{datar2004locality,
+ title={Locality-sensitive hashing scheme based on p-stable distributions},
+ author={Datar, Mayur and Immorlica, Nicole and Indyk, Piotr and others},
+ booktitle={SCG},
+ year={2004}
+}
+
+@article{malkov2018efficient,
+ title={Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs},
+ author={Malkov, Yu A and Yashunin, Dmitry A},
+ journal={TPAMI},
+ volume={42},
+ number={4},
+ pages={824--836},
+ year={2018}
+}
+
+@article{jayaram2019diskann,
+ title={Diskann: Fast accurate billion-point nearest neighbor search on a single node},
+ author={Jayaram Subramanya, Suhas and Devvrit, Fnu and others},
+ journal={NeurIPS},
+ year={2019}
+}
+
+@inproceedings{DBLP:conf/nips/WangHWMWCXCZL0022,
+ author = {Yujing Wang and
+ Yingyan Hou and
+ Haonan Wang and others},
+ title = {A Neural Corpus Indexer for Document Retrieval},
+ booktitle = {NeurIPS},
+ year = {2022}
+}
+
+@inproceedings{DBLP:conf/nips/ZhangWCCZMHDMWP23,
+ author = {Hailin Zhang and
+ Yujing Wang and
+ Qi Chen and others},
+ title = {Model-enhanced Vector Index},
+ booktitle = {NeurIPS},
+ year = {2023}
+}
+
+@inproceedings{DBLP:conf/emnlp/HayatiOAYTN18,
+ author = {Shirley Anugrah Hayati and
+ Rapha{\"{e}}l Olivier and
+ Pravalika Avvaru and others},
+ title = {Retrieval-Based Neural Code Generation},
+ booktitle = {EMNLP},
+ year = {2018}
+}
+
+@inproceedings{DBLP:conf/icse/ZhangW00020,
+ author = {Jian Zhang and
+ Xu Wang and
+ Hongyu Zhang and others},
+ title = {Retrieval-based neural source code summarization},
+ booktitle = {ICSE},
+ year = {2020}
+}
+
+@inproceedings{DBLP:conf/iclr/PoesiaP00SMG22,
+ author = {Gabriel Poesia and
+ Alex Polozov and
+ Vu Le and others},
+ title = {Synchromesh: Reliable Code Generation from Pre-trained Language Models},
+ booktitle = {ICLR},
+ year = {2022}
+}
+
+@inproceedings{DBLP:conf/acl/YeYHZX22,
+ author = {Xi Ye and
+ Semih Yavuz and others},
+ title = {{RNG-KBQA:} Generation Augmented Iterative Ranking for Knowledge Base Question Answering},
+ booktitle = {ACL},
+ year = {2022}
+}
+
+@article{DBLP:journals/corr/abs-2210-12925,
+ author = {Yiheng Shu and
+ Zhiwei Yu others},
+ title = {{TIARA:} Multi-grained Retrieval for Robust Question Answering over Large Knowledge Bases},
+ journal = {arXiv:2210.12925},
+ year = {2022}
+}
+
+@article{lin2020bridging,
+ title={Bridging textual and tabular data for cross-domain text-to-sql semantic parsing},
+ author={Lin, Xi Victoria and Socher, Richard and others},
+ journal={arXiv:2012.12627},
+ year={2020}
+}
+
+@article{Self-RAG,
+ author = {Akari Asai and
+ Zeqiu Wu and
+ Yizhong Wang and others},
+ title = {Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection},
+ journal = {arxiv:2310.11511},
+ year = {2023}
+}
+
+@article{REPLUG,
+ title={Replug: Retrieval-augmented black-box language models},
+ author={Shi, Weijia and Min, Sewon and Yasunaga, Michihiro and others},
+ journal={arXiv:2301.12652},
+ year={2023}
+}
+
+@article{RALM,
+ title={In-context retrieval-augmented language models},
+ author={Ram, Ori and Levine, Yoav and Dalmedigos, Itay and others},
+ journal={arXiv:2302.00083},
+ year={2023}
+}
+
+@inproceedings{DBLP:conf/emnlp/ZanCLGWL22,
+ author = {Daoguang Zan and
+ Bei Chen and
+ Zeqi Lin and others},
+ title = {When Language Model Meets Private Library},
+ booktitle = {EMNLP Findings},
+ year = {2022}
+}
+
+@inproceedings{DBLP:conf/icse/NashidSM23,
+ author = {Noor Nashid and
+ Mifta Sintaha and
+ Ali Mesbah},
+ title = {Retrieval-Based Prompt Selection for Code-Related Few-Shot Learning},
+ booktitle = {ICSE},
+ year = {2023}
+}
+
+@inproceedings{DBLP:conf/sigsoft/JinSTSLSS23,
+ author = {Matthew Jin and
+ Syed Shahriar and
+ Michele Tufano and others},
+ title = {InferFix: End-to-End Program Repair with LLMs},
+ booktitle = {ESEC/FSE},
+ year = {2023}
+}
+
+@inproceedings{DBLP:conf/acl/LuDHGHS22,
+ author = {Shuai Lu and
+ Nan Duan and
+ Hojae Han and others},
+ title = {ReACC: {A} Retrieval-Augmented Code Completion Framework},
+ booktitle = {ACL},
+ year = {2022}
+}
+
+@inproceedings{DBLP:conf/acl/Liu22,
+ title = "Uni-Parser: Unified Semantic Parser for Question Answering on Knowledge Base and Database",
+ author = "Liu, Ye and
+ Yavuz, Semih and
+ Meng, Rui and
+ Radev, Dragomir and
+ Xiong, Caiming and
+ Zhou, Yingbo",
+ editor = "Goldberg, Yoav and
+ Kozareva, Zornitsa and
+ Zhang, Yue",
+ booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
+ month = dec,
+ year = "2022",
+ address = "Abu Dhabi, United Arab Emirates",
+ publisher = "Association for Computational Linguistics",
+ url = "https://aclanthology.org/2022.emnlp-main.605/",
+ doi = "10.18653/v1/2022.emnlp-main.605",
+ pages = "8858--8869",
+ abstract = "Parsing natural language questions into executable logical forms is a useful and interpretable way to perform question answering on structured data such as knowledge bases (KB) or databases (DB). However, existing approaches on semantic parsing cannot adapt to both modalities, as they suffer from the exponential growth of the logical form candidates and can hardly generalize to unseen data.In this work, we propose Uni-Parser, a unified semantic parser for question answering (QA) on both KB and DB. We define the primitive (relation and entity in KB, and table name, column name and cell value in DB) as the essential element in our framework. The number of primitives grows only at a linear rate to the number of retrieved relations in KB and DB, preventing us from exponential logic form candidates. We leverage the generator to predict final logical forms by altering and composing top-ranked primitives with different operations (e.g. select, where, count). With sufficiently pruned search space by a contrastive primitive ranker, the generator is empowered to capture the composition of primitives enhancing its generalization ability. We achieve competitive results on multiple KB and DB QA benchmarks with more efficiency, especially in the compositional and zero-shot settings."
+}
+
+@inproceedings{DBLP:conf/eacl/YangDCC23,
+ author = {Zonglin Yang and
+ Xinya Du and
+ Erik Cambria and others},
+ title = {End-to-end Case-Based Reasoning for Commonsense Knowledge Base Completion},
+ booktitle = {EACL},
+ year = {2023}
+}
+
+@inproceedings{shi2023retrieval,
+ author={Shi, Wenqi and Zhuang, Yuchen and Zhu, Yuanda and others},
+ title={Retrieval-augmented large language models for adolescent idiopathic scoliosis patients in shared decision-making},
+ booktitle={ACM-BCB},
+ year={2023}
+}
+
+@inproceedings{casanova2021instance,
+ title={Instance-conditioned gan},
+ author={Casanova, Arantxa and Careil, Marlene and Verbeek, Jakob and others},
+ booktitle={NeurIPS},
+ year={2021}
+}
+
+@article{Bertsch2023UnlimiformerLT,
+ author = {Amanda Bertsch and
+ Uri Alon and
+ Graham Neubig and
+ Matthew R. Gormley},
+ title = {Unlimiformer: Long-Range Transformers with Unlimited Length Input},
+ booktitle = {NeurIPS},
+ year = {2024},
+
+}
+
+@article{RMT-R,
+ title={In Search of Needles in a 10M Haystack: Recurrent Memory Finds What LLMs Miss},
+ author={Kuratov, Yuri and Bulatov, Aydar and others},
+ journal={arXiv:2402.10790},
+ year={2024}
+}
+
+@inproceedings{DBLP:conf/kbse/LiL000J21,
+ author = {Jia Li and
+ Yongmin Li and
+ Ge Li and others},
+ title = {EditSum: {A} Retrieve-and-Edit Framework for Source Code Summarization},
+ booktitle = {ASE},
+ year = {2021}
+}
+
+@inproceedings{DBLP:conf/icsm/YuYCLZ22,
+ author = {Chi Yu and
+ Guang Yang and
+ Xiang Chen and others},
+ title = {BashExplainer: Retrieval-Augmented Bash Code Comment Generation based on Fine-tuned CodeBERT},
+ booktitle = {ICSME},
+ year = {2022}
+}
+
+@inproceedings{DBLP:conf/nips/HashimotoGOL18,
+ author = {Tatsunori B. Hashimoto and
+ Kelvin Guu and
+ Yonatan Oren and
+ Percy Liang},
+ title = {A Retrieve-and-Edit Framework for Predicting Structured Outputs},
+ booktitle = {NeurIPS},
+ year = {2018}
+}
+
+@inproceedings{DBLP:conf/kbse/WeiLLXJ20,
+ author = {Bolin Wei and
+ Yongmin Li and
+ Ge Li and others},
+ title = {Retrieve and Refine: Exemplar-based Neural Comment Generation},
+ booktitle = {ASE},
+ year = {2020}
+}
+
+@inproceedings{DBLP:conf/emnlp/ShiW0DZHZ022,
+ author = {Ensheng Shi and
+ Yanlin Wang and
+ Wei Tao and others},
+ title = {{RACE:} Retrieval-augmented Commit Message Generation},
+ booktitle = {EMNLP},
+ year = {2022}
+}
+
+@inproceedings{chen2022re,
+ title={Re-imagen: Retrieval-augmented text-to-image generator},
+ author={Chen, Wenhu and Hu, Hexiang and Saharia, Chitwan and Cohen, William W},
+ booktitle={ICLR},
+ year={2023}
+}
+
+@inproceedings{sheynin2022knn,
+ title={Knn-diffusion: Image generation via large-scale retrieval},
+ author={Sheynin, Shelly and Ashual, Oron and Polyak, Adam and others},
+ booktitle={ICLR},
+ year={2023}
+}
+
+@inproceedings{blattmann2022retrieval,
+ title={Retrieval-augmented diffusion models},
+ author={Blattmann, Andreas and Rombach, Robin and Oktay, Kaan and others},
+ booktitle={NeurIPS},
+ year={2022}
+}
+
+@article{rombach2022text,
+ title={Text-guided synthesis of artistic images with retrieval-augmented diffusion models},
+ author={Rombach, Robin and Blattmann, Andreas and Ommer, Bj{\"o}rn},
+ journal={arXiv:2207.13038},
+ year={2022}
+}
+
+@article{li2022memory,
+ title={Memory-driven text-to-image generation},
+ author={Li, Bowen and Torr, Philip HS and Lukasiewicz, Thomas},
+ journal={arXiv:2208.07022},
+ year={2022}
+}
+
+@inproceedings{DBLP:conf/naacl/OguzCKPOSGMY22,
+ author = {Barlas Oguz and
+ Xilun Chen and
+ Vladimir Karpukhin and others},
+ title = {UniK-QA: Unified Representations of Structured and Unstructured Knowledge for Open-Domain Question Answering},
+ booktitle = {NAACL Findings},
+ year = {2022}
+}
+
+@inproceedings{DBLP:conf/iclr/YuZNZL0HWWX23,
+ author = {Donghan Yu and
+ Sheng Zhang and others},
+ title = {DecAF: Joint Decoding of Answers and Logical Forms for Question Answering over Knowledge Bases},
+ booktitle = {ICLR},
+ year = {2023}
+}
+
+@inproceedings{DBLP:conf/cikm/DongLWZXX23,
+ author = {Guanting Dong and
+ Rumei Li and
+ Sirui Wang and others},
+ title = {Bridging the KB-Text Gap: Leveraging Structured Knowledge-aware Pre-training for {KBQA}},
+ booktitle = {CIKM},
+ year = {2023}
+}
+
+@article{DBLP:journals/corr/abs-2308-13259,
+ author = {Keheng Wang and
+ Feiyu Duan and
+ Sirui Wang and others},
+ title = {Knowledge-Driven CoT: Exploring Faithful Reasoning in LLMs for Knowledge-intensive Question Answering},
+ journal = {arXiv:2308.13259},
+ year = {2023}
+}
+
+@inproceedings{DBLP:conf/sigir/YuY23,
+ author = {Donghan Yu and
+ Yiming Yang},
+ title = {Retrieval-Enhanced Generative Model for Large-Scale Knowledge Graph Completion},
+ booktitle = {SIGIR},
+ year = {2023}
+}
+
+@inproceedings{TOME,
+ title={Mention Memory: incorporating textual knowledge into Transformers through entity mention attention},
+ author={de Jong, Michiel and Zemlyanskiy, Yury and FitzGerald, Nicholas and others},
+ booktitle={ICLR},
+ year={2021}
+}
+
+@inproceedings{EaE,
+ title={Entities as Experts: Sparse Memory Access with Entity Supervision},
+ author={F{\'e}vry, Thibault and Soares, Livio Baldini and others},
+ booktitle={EMNLP},
+ year={2020}
+}
+
+@inproceedings{jing2023amd,
+ title={Amd: Anatomical motion diffusion with interpretable motion decomposition and fusion},
+ author={Jing, Beibei and Zhang, Youjia and Song, Zikai and others},
+ booktitle={AAAI},
+ year={2024}
+}
+
+@inproceedings{DBLP:journals/corr/abs-2309-08051,
+ title={Retrieval-augmented text-to-audio generation},
+ author={Yuan, Yi and Liu, Haohe and Liu, Xubo and others},
+ booktitle={ICASSP},
+ year={2024}
+}
+
+@article{DBLP:journals/tip/YangCZ23,
+ author = {Bang Yang and
+ Meng Cao and
+ Yuexian Zou},
+ title = {Concept-Aware Video Captioning: Describing Videos With Effective Prior
+ Information},
+ journal = {TIP},
+ volume = {32},
+ pages = {5366--5378},
+ year = {2023},
+}
+
+@inproceedings{TRIME,
+ author = {Zexuan Zhong and
+ Tao Lei and
+ Danqi Chen},
+ title = {Training Language Models with Memory Augmentation},
+ booktitle = {EMNLP},
+ year = {2022}
+}
+
+@inproceedings{NPM,
+ author = {Sewon Min and
+ Weijia Shi and
+ Mike Lewis and others},
+ title = {Nonparametric Masked Language Modeling},
+ booktitle = {ACL Findings},
+ year = {2023}
+}
+
+@inproceedings{DBLP:conf/emnlp/Zhang0YC23,
+ author = {Xiangyu Zhang and
+ Yu Zhou and
+ Guang Yang and
+ Taolue Chen},
+ title = {Syntax-Aware Retrieval Augmented Code Generation},
+ booktitle = {EMNLP Findings},
+ year = {2023}
+}
+
+@inproceedings{fei2021memory,
+ title={Memory-augmented image captioning},
+ author={Fei, Zhengcong},
+ booktitle={AAAI},
+ year={2021}
+}
+
+@inproceedings{Speculative_Decoding,
+ author = {Yaniv Leviathan and
+ Matan Kalman and
+ Yossi Matias},
+ title = {Fast Inference from Transformers via Speculative Decoding},
+ booktitle = {{ICML}},
+ year = {2023}
+}
+
+@inproceedings{COG,
+ author = {Tian Lan and
+ Deng Cai and
+ Yan Wang and others},
+ title = {Copy is All You Need},
+ booktitle = {ICLR},
+ year = {2023}
+}
+
+@article{RetrievalisAccurateGeneration,
+ title={Retrieval is Accurate Generation},
+ author={Cao, Bowen and Cai, Deng and Cui, Leyang and others},
+ journal={arXiv:2402.17532},
+ year={2024}
+}
+
+@inproceedings{Query2doc,
+ author = {Liang Wang and
+ Nan Yang and
+ Furu Wei},
+ title = {Query2doc: Query Expansion with Large Language Models},
+ booktitle = {EMNLP},
+ year = {2023}
+}
+
+@inproceedings{HyDE,
+ author = {Luyu Gao and
+ Xueguang Ma and
+ Jimmy Lin and
+ Jamie Callan},
+ title = {Precise Zero-Shot Dense Retrieval without Relevance Labels},
+ booktitle = {ACL},
+ year = {2023}
+}
+
+@inproceedings{TOC,
+ title={Tree of Clarifications: Answering Ambiguous Questions with Retrieval-Augmented Large Language Models},
+ author={Kim, Gangwoo and Kim, Sungdong and Jeon, Byeongguk and others},
+ booktitle={EMNLP},
+ year={2023}
+}
+
+@article{RQ-RAG,
+ title={RQ-RAG: Learning to Refine Queries for Retrieval Augmented Generation},
+ author={Chan, Chi-Min and Xu, Chunpu and others},
+ journal={arXiv:2404.00610},
+ year={2024}
+}
+
+@inproceedings{tayal2024dynamic,
+ title={Dynamic Contexts for Generating Suggestion Questions in RAG Based Conversational Systems},
+ author={Tayal, Anuja and Tyagi, Aman},
+ booktitle={WWW’24 Companion},
+ year={2024}
+}
+
+@article{LESS,
+ author = {Mengzhou Xia and
+ Sadhika Malladi and
+ Suchin Gururangan and others},
+ title = {{LESS:} Selecting Influential Data for Targeted Instruction Tuning},
+ journal = {arXiv:2402.04333},
+ year = {2024}
+}
+
+@article{Telco-RAG,
+ title={Telco-RAG: Navigating the challenges of retrieval-augmented language models for telecommunications},
+ author={Bornea, Andrei-Laurentiu and Ayed, Fadhel and others},
+ journal={arXiv:2404.15939},
+ year={2024}
+}
+
+@inproceedings{ReAct,
+ author = {Shunyu Yao and
+ Jeffrey Zhao and
+ Dian Yu and others},
+ title = {ReAct: Synergizing Reasoning and Acting in Language Models},
+ booktitle = {{ICLR}},
+ year = {2023}
+}
+
+@inproceedings{COT,
+ author = {Jason Wei and
+ Xuezhi Wang and
+ Dale Schuurmans and others},
+ title = {Chain-of-Thought Prompting Elicits Reasoning in Large Language Models},
+ booktitle = {NeurIPS},
+ year = {2022}
+}
+
+@article{RATP,
+ title={Retrieval-Augmented Thought Process as Sequential Decision Making},
+ author={Pouplin, Thomas and Sun, Hao and Holt, Samuel and Van der Schaar, Mihaela},
+ journal={arXiv:2402.07812},
+ year={2024}
+}
+
+@software{LlamaIndex,
+author = {Liu, Jerry},
+month = {11},
+title = {{LlamaIndex}},
+url = {https://github.com/jerryjliu/llama_index},
+year = {2022}
+}
+
+@inproceedings{RAPTOR,
+ title={RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval},
+ author={Sarthi, Parth and Abdullah, Salman and Tuli, Aditi and others},
+ booktitle={ICLR},
+ year={2023}
+}
+
+@article{Prompt-RAG,
+ title={Prompt-RAG: Pioneering Vector Embedding-Free Retrieval-Augmented Generation in Niche Domains, Exemplified by Korean Medicine},
+ author={Kang, Bongsu and Kim, Jundong and others},
+ journal={arXiv:2401.11246},
+ year={2024}
+}
+
+@article{raina2024question,
+ title={Question-Based Retrieval using Atomic Units for Enterprise RAG},
+ author={Raina, Vatsal and others},
+ journal={arXiv:2405.12363},
+ year={2024}
+}
+
+@article{bge_embedding,
+ author={Shitao Xiao and Zheng Liu and Peitian Zhang and others},
+ title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
+ journal={arxiv:2309.07597},
+ year={2023}
+}
+
+@article{bge_m3,
+ author={Chen, Jianlv and Xiao, Shitao and Zhang, Peitian and others},
+ title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
+ journal = {arxiv:2309.07597},
+ year={2023}
+}
+
+@article{cocktail,
+ author={Shitao Xiao and Zheng Liu and Peitian Zhang and Xingrun Xing},
+ title={LM-Cocktail: Resilient Tuning of Language Models via Model Merging},
+ journal={arxiv:2311.13534},
+ year={2023}
+}
+
+@article{llm_embedder,
+ author={Peitian Zhang and Shitao Xiao and Zheng Liu and Zhicheng Dou and Jian-Yun Nie},
+ title={Retrieve Anything To Augment Large Language Models},
+ journal={arxiv:2310.07554},
+ year={2023}
+}
+
+@article{RL4RAG,
+ author = {Mandar Kulkarni and
+ Praveen Tangarajan and
+ Kyung Kim and others},
+ title = {Reinforcement Learning for Optimizing {RAG} for Domain Chatbots},
+ journal = {arXiv:2401.06800},
+ year = {2024}
+}
+
+@inproceedings{DBLP:conf/sigsoft/Wang0JH23,
+ author = {Weishi Wang and
+ Yue Wang and others},
+ title = {RAP-Gen: Retrieval-Augmented Patch Generation with CodeT5 for Automatic Program Repair},
+ booktitle = {ESEC/FSE},
+ year = {2023}
+}
+
+@article{Blended-RAG,
+ title={Blended RAG: Improving RAG (Retriever-Augmented Generation) Accuracy with Semantic Search and Hybrid Query-Based Retrievers},
+ author={Sawarkar, Kunal and Mangal, Abhilasha and others},
+ journal={arXiv:2404.07220},
+ year={2024}
+}
+
+@article{CRAG,
+ title={Corrective Retrieval Augmented Generation},
+ author={Yan, Shi-Qi and Gu, Jia-Chen and Zhu, Yun and Ling, Zhen-Hua},
+ journal={arXiv:2401.15884},
+ year={2024}
+}
+
+@inproceedings{RAGAE,
+ author={Huang, Wenyu and Lapata, Mirella and Vougiouklis, Pavlos and others},
+ title={Retrieval Augmented Generation with Rich Answer Encoding},
+ booktitle={IJCNLP-AACL},
+ year={2023}
+}
+
+@article{UniMS-RAG,
+ author = {Hongru Wang and
+ Wenyu Huang and
+ Yang Deng and others},
+ title = {UniMS-RAG: {A} Unified Multi-source Retrieval-Augmented Generation
+ for Personalized Dialogue Systems},
+ journal = {arXiv:2401.13256},
+ year = {2024}
+}
+
+@inproceedings{koley2024you,
+ title={You'll Never Walk Alone: A Sketch and Text Duet for Fine-Grained Image Retrieval},
+ author={Koley, Subhadeep and Bhunia, Ayan Kumar and others},
+ booktitle={CVPR},
+ year={2024}
+}
+
+@inproceedings{Re2G,
+ author = {Michael R. Glass and
+ Gaetano Rossiello and
+ Md. Faisal Mahbub Chowdhury and others},
+ title = {Re2G: Retrieve, Rerank, Generate},
+ booktitle = {{NAACL}},
+ year = {2022}
+}
+
+@article{ReRanker,
+ author = {Rodrigo Frassetto Nogueira and
+ Kyunghyun Cho},
+ title = {Passage Re-ranking with {BERT}},
+ journal = {arxiv:1901.04085},
+ year = {2019}
+}
+
+@article{li2023acecoder,
+ title={AceCoder: Utilizing Existing Code to Enhance Code Generation},
+ author={Li, Jia and Zhao, Yunfei and Li, Yongmin and others},
+ journal={arXiv:2303.17780},
+ year={2023}
+}
+
+@inproceedings{DBLP:conf/emnlp/0010Z0L22,
+ author = {Peng Shi and
+ Rui Zhang and
+ He Bai and
+ Jimmy Lin},
+ title = {{XRICL:} Cross-lingual Retrieval-Augmented In-Context Learning for Cross-lingual Text-to-SQL Semantic Parsing},
+ booktitle = {EMNLP Findings},
+ year = {2022}
+}
+
+@article{rangan2024fine,
+ title={A Fine-tuning Enhanced RAG System with Quantized Influence Measure as AI Judge},
+ author={Rangan, Keshav and Yin, Yiqiao},
+ journal={ arXiv:2402.17081},
+ year={2024}
+}
+
+@inproceedings{UDAPDR,
+ title={UDAPDR: Unsupervised Domain Adaptation via LLM Prompting and Distillation of Rerankers},
+ author={Saad-Falcon, Jon and Khattab, Omar and Santhanam, Keshav and others},
+ booktitle={EMNLP},
+ year={2023}
+}
+
+@article{LLM-R,
+ title={Learning to retrieve in-context examples for large language models},
+ author={Wang, Liang and Yang, Nan and Wei, Furu},
+ journal={arXiv:2307.07164},
+ year={2023}
+}
+
+@article{finardi2024chronicles,
+ title={The Chronicles of RAG: The Retriever, the Chunk and the Generator},
+ author={Finardi, Paulo and Avila, Leonardo and others},
+ journal={arXiv:2401.07883},
+ year={2024}
+}
+
+@article{li2024enhancing,
+ title={Enhancing LLM Factual Accuracy with RAG to Counter Hallucinations: A Case Study on Domain-Specific Queries in Private Knowledge-Bases},
+ author={Li, Jiarui and Yuan, Ye and Zhang, Zehua},
+ journal={arXiv:2403.10446},
+ year={2024}
+}
+
+@article{FILCO,
+ author = {Zhiruo Wang and
+ Jun Araki and
+ Zhengbao Jiang and others},
+ title = {Learning to Filter Context for Retrieval-Augmented Generation},
+ journal = {arxiv:2311.08377},
+ year = {2023}
+}
+
+@inproceedings{FiD-Light,
+ author = {Sebastian Hofst{\"{a}}tter and
+ Jiecao Chen and
+ Karthik Raman and
+ Hamed Zamani},
+ title = {FiD-Light: Efficient and Effective Retrieval-Augmented Text Generation},
+ booktitle = {{SIGIR}},
+ year = {2023}
+}
+
+@article{RRR,
+ author = {Daman Arora and
+ Anush Kini and
+ Sayak Ray Chowdhury and others},
+ title = {GAR-meets-RAG Paradigm for Zero-Shot Information Retrieval},
+ journal = {arXiv:2310.20158},
+ year = {2023}
+}
+
+@misc{Pinecone,
+ howpublished = {\url{https://www.pinecone.io}},
+}
+@misc{TurLens,
+ howpublished = {\url{https://github.com/truera/trulens}},
+}
+
+@article{GENREAD,
+ title={Generate rather than retrieve: Large language models are strong context generators},
+ author={Yu, Wenhao and Iter, Dan and others},
+ journal={arXiv:2209.10063},
+ year={2022}
+}
+
+@article{GRG,
+ title={Generator-retriever-generator: A novel approach to open-domain question answering},
+ author={Abdallah, Abdelrahman and Jatowt, Adam},
+ journal={arXiv:2307.11278},
+ year={2023}
+}
+
+@article{Multi-Head-RAG,
+ title={Multi-Head RAG: Solving Multi-Aspect Problems with LLMs},
+ author={Besta, Maciej and Kubicek, Ales and others},
+ journal={arXiv:2406.05085},
+ year={2024}
+}
+
+@article{Prompt_Engineering_Guide,
+author = {Saravia, Elvis},
+journal = {https://github.com/dair-ai/Prompt-Engineering-Guide},
+month = {12},
+title = {{Prompt Engineering Guide}},
+year = {2022}
+}
+
+@article{StepBack-Prompting,
+ author = {Huaixiu Steven Zheng and
+ Swaroop Mishra and others},
+ title = {Take a Step Back: Evoking Reasoning via Abstraction in Large Language
+ Models},
+ journal = {arxiv:2310.06117},
+ year = {2023}
+}
+
+@article{active-prompt,
+ author = {Shizhe Diao and
+ Pengcheng Wang and
+ Yong Lin and
+ Tong Zhang},
+ title = {Active Prompting with Chain-of-Thought for Large Language Models},
+ journal = {arxiv:2302.12246},
+ year = {2023}
+}
+
+@inproceedings{LLMLingua,
+ author = {Huiqiang Jiang and
+ Qianhui Wu and
+ Chin{-}Yew Lin and others},
+ title = {LLMLingua: Compressing Prompts for Accelerated Inference of Large
+ Language Models},
+ booktitle = {{EMNLP}},
+ year = {2023}
+}
+
+@article{Lost_in_the_middle,
+ author = {Nelson F. Liu and
+ Kevin Lin and
+ John Hewitt and others},
+ title = {Lost in the Middle: How Language Models Use Long Contexts},
+ journal = {arxiv:2307.03172},
+ year = {2023}
+}
+
+@article{ahmed2024automatic,
+ title={Automatic Semantic Augmentation of Language Model Prompts (for Code Summarization)},
+ author={Toufique Ahmed and Kunal Suresh Pai and Premkumar Devanbu and Earl T. Barr},
+ year={2024},
+ journal={arXiv:2304.06815}
+}
+
+@article{ActiveRAG,
+ title={ActiveRAG: Revealing the Treasures of Knowledge via Active Learning},
+ author={Xu, Zhipeng and Liu, Zhenghao and Liu, Yibin and others},
+ journal={arXiv:2402.13547},
+ year={2024}
+}
+
+@article{CODEGEN-MONO,
+ author = {Erik Nijkamp and
+ Bo Pang and
+ Hiroaki Hayashi and others},
+ title = {A Conversational Paradigm for Program Synthesis},
+ journal = {arxiv:2203.13474},
+ year = {2022}
+}
+
+@article{DBLP:journals/corr/abs-2307-06940,
+ title={Animate-a-story: Storytelling with retrieval-augmented video generation},
+ author={He, Yingqing and Xia, Menghan and Chen, Haoxin and others},
+ journal={arXiv:2307.06940},
+ year={2023}
+}
+
+@inproceedings{LoRA,
+ author = {Edward J. Hu and
+ Yelong Shen and
+ Phillip Wallis and others},
+ title = {LoRA: Low-Rank Adaptation of Large Language Models},
+ booktitle = {{ICLR}},
+ year = {2022}
+}
+
+@article{DBLP:journals/corr/abs-2306-06490,
+ author = {Changshu Liu and
+ Pelin {\c{C}}etin and
+ Yogesh Patodia and others},
+ title = {Automated Code Editing with Search-Generate-Modify},
+ journal = {arXiv:2306.06490},
+ year = {2023}
+}
+
+@inproceedings{DBLP:conf/aaai/JoshiSG0VR23,
+ author = {Harshit Joshi and
+ Jos{\'{e}} Pablo Cambronero S{\'{a}}nchez and
+ Sumit Gulwani and others},
+ title = {Repair Is Nearly Generation: Multilingual Program Repair with LLMs},
+ booktitle = {AAAI},
+ year = {2023}
+}
+
+@article{FLARE,
+ title={Active retrieval augmented generation},
+ author={Jiang, Zhengbao and Xu, Frank F and Gao, Luyu and others},
+ journal={arXiv:2305.06983},
+ year={2023}
+}
+
+@article{lm-calibration,
+ author = {Zhengbao Jiang and
+ Jun Araki and
+ Haibo Ding and
+ Graham Neubig},
+ title = {How Can We Know \emph{When} Language Models Know? On the Calibration
+ of Language Models for Question Answering},
+ journal = {TACL},
+ year = {2021}
+}
+
+@inproceedings{LLM_Struggle_to_Learn_Long-Tail_Knowledge,
+ author = {Nikhil Kandpal and
+ Haikang Deng and
+ Adam Roberts and others},
+ title = {Large Language Models Struggle to Learn Long-Tail Knowledge},
+ booktitle = {ICML},
+ year = {2023}
+}
+
+@article{LLM-Knowledge-Boundary,
+ author = {Ruiyang Ren and
+ Yuhao Wang and
+ Yingqi Qu and others},
+ title = {Investigating the Factual Knowledge Boundary of Large Language Models
+ with Retrieval Augmentation},
+ journal = {arxiv:2307.11019},
+ year = {2023}
+}
+
+@inproceedings{SKR,
+ author = {Yile Wang and
+ Peng Li and
+ Maosong Sun and
+ Yang Liu},
+ title = {Self-Knowledge Guided Retrieval Augmentation for Large Language Models},
+ booktitle = {{EMNLP Findings}},
+ year = {2023}
+}
+
+@article{Rowen,
+ author = {Hanxing Ding and
+ Liang Pang and
+ Zihao Wei and others},
+ title = {Retrieve Only When It Needs: Adaptive Retrieval Augmentation for Hallucination
+ Mitigation in Large Language Models},
+ journal = {arXiv:2402.10612},
+ year = {2024},
+}
+
+@article{AdaptiveRAG,
+ title={Adaptive-RAG: Learning to Adapt Retrieval-Augmented Large Language Models through Question Complexity},
+ author={Jeong, Soyeong and Baek, Jinheon and Cho, Sukmin and others},
+ journal={arXiv:2403.14403},
+ year={2024}
+}
+
+@inproceedings{DBLP:conf/emnlp/ZhangCZKLZMLC23,
+ author = {Fengji Zhang and
+ Bei Chen and others},
+ title = {RepoCoder: Repository-Level Code Completion Through Iterative Retrieval and Generation},
+ booktitle = {EMNLP},
+ year = {2023}
+}
+
+@inproceedings{ITER-RETGEN,
+ author = {Zhihong Shao and
+ Yeyun Gong and
+ Yelong Shen and others},
+ title = {Enhancing Retrieval-Augmented Large Language Models with Iterative
+ Retrieval-Generation Synergy},
+ booktitle = {EMNLP Findings},
+ year = {2023}
+}
+
+@inproceedings{SelfMemory,
+ author = {Xin Cheng and
+ Di Luo and
+ Xiuying Chen and others},
+ title = {Lift Yourself Up: Retrieval-augmented Text Generation with Self-Memory},
+ booktitle = {NeurIPS},
+ year = {2023}
+}
+
+@article{RAT,
+ title={RAT: Retrieval Augmented Thoughts Elicit Context-Aware Reasoning in Long-Horizon Generation},
+ author={Wang, Zihao and Liu, Anji and Lin, Haowei and others},
+ journal={arXiv:2403.05313},
+ year={2024}
+}
+
+@article{DBLP:journals/corr/abs-2401-05856,
+ author = {Scott Barnett and
+ Stefanus Kurniawan and
+ Srikanth Thudumu and others},
+ title = {Seven Failure Points When Engineering a Retrieval Augmented Generation
+ System},
+ journal = {arXiv:2401.05856},
+ year = {2024}
+}
+
+@article{DBLP:journals/corr/abs-2401-14887,
+ author = {Florin Cuconasu and
+ Giovanni Trappolini and
+ Federico Siciliano and others},
+ title = {The Power of Noise: Redefining Retrieval for {RAG} Systems},
+ journal = {arXiv:2401.14887},
+ year = {2024}
+}
+
+@article{qiu2022evaluating,
+ title={Evaluating the impact of model scale for compositional generalization in semantic parsing},
+ author={Qiu, Linlu and Shaw, Peter and Pasupat, Panupong and others},
+ journal={arXiv:2205.12253},
+ year={2022}
+}
+
+@article{Query_Expansion_by_Prompting_LLMs,
+ author = {Rolf Jagerman and
+ Honglei Zhuang and
+ Zhen Qin and others},
+ title = {Query Expansion by Prompting Large Language Models},
+ journal = {arxiv:2305.03653},
+ year = {2023}
+}
+
+@article{EA,
+ author = {Hailin Zhang and
+ Penghao Zhao and
+ Xupeng Miao and others},
+ title = {Experimental Analysis of Large-scale Learnable Vector Storage Compression},
+ journal = {{VLDB}},
+ year = {2023}
+}
+
+@article{DBLP:journals/corr/abs-2302-05578,
+ author = {Renat Aksitov and
+ Chung{-}Ching Chang and
+ David Reitter and others},
+ title = {Characterizing Attribution and Fluency Tradeoffs for Retrieval-Augmented
+ Large Language Models},
+ journal = {arXiv:2302.05578},
+ year = {2023}
+}
+
+@article{DBLP:journals/corr/abs-2308-16137,
+ author = {Chi Han and
+ Qifan Wang and
+ Wenhan Xiong and others},
+ title = {LM-Infinite: Simple On-the-Fly Length Generalization for Large Language
+ Models},
+ journal = {arXiv:2308.16137},
+ year = {2023}
+}
+
+@techreport{trajtenberg2018ai,
+ title={AI as the next GPT: a Political-Economy Perspective},
+ author={Trajtenberg, Manuel},
+ year={2018},
+ institution={National Bureau of Economic Research}
+}
+
+@article{liu2023deid,
+ title={Deid-gpt: Zero-shot medical text de-identification by gpt-4},
+ author={Liu, Zhengliang and Huang, Yue and Yu, Xiaowei and Zhang, Lu and Wu, Zihao and Cao, Chao and Dai, Haixing and Zhao, Lin and Li, Yiwei and Shu, Peng and others},
+ journal={arXiv preprint arXiv:2303.11032},
+ year={2023}
+}
+
+@article{leippold2023thus,
+ title={Thus spoke GPT-3: Interviewing a large-language model on climate finance},
+ author={Leippold, Markus},
+ journal={Finance Research Letters},
+ volume={53},
+ pages={103617},
+ year={2023},
+ publisher={Elsevier}
+}
+
+@article{yenduri2024gpt,
+ title={Gpt (generative pre-trained transformer)--a comprehensive review on enabling technologies, potential applications, emerging challenges, and future directions},
+ author={Yenduri, Gokul and Ramalingam, M and Selvi, G Chemmalar and Supriya, Y and Srivastava, Gautam and Maddikunta, Praveen Kumar Reddy and Raj, G Deepti and Jhaveri, Rutvij H and Prabadevi, B and Wang, Weizheng and others},
+ journal={IEEE Access},
+ year={2024},
+ publisher={IEEE}
+}
+
+@article{lewis2020retrieval,
+ title={Retrieval-augmented generation for knowledge-intensive nlp tasks},
+ author={Lewis, Patrick and Perez, Ethan and Piktus, Aleksandra and Petroni, Fabio and Karpukhin, Vladimir and Goyal, Naman and K{\"u}ttler, Heinrich and Lewis, Mike and Yih, Wen-tau and Rockt{\"a}schel, Tim and others},
+ journal={Advances in Neural Information Processing Systems},
+ volume={33},
+ pages={9459--9474},
+ year={2020}
+}
+
+@article{lo2023clear,
+ title={The CLEAR path: A framework for enhancing information literacy through prompt engineering},
+ author={Lo, Leo S},
+ journal={The Journal of Academic Librarianship},
+ volume={49},
+ number={4},
+ pages={102720},
+ year={2023},
+ publisher={Elsevier}
+}
+
+@article{hu2021lora,
+ title={Lora: Low-rank adaptation of large language models},
+ author={Hu, Edward J and Shen, Yelong and Wallis, Phillip and Allen-Zhu, Zeyuan and Li, Yuanzhi and Wang, Shean and Wang, Lu and Chen, Weizhu},
+ journal={arXiv preprint arXiv:2106.09685},
+ year={2021}
+}
+
+@inproceedings{wortsman2022robust,
+ title={Robust fine-tuning of zero-shot models},
+ author={Wortsman, Mitchell and Ilharco, Gabriel and Kim, Jong Wook and Li, Mike and Kornblith, Simon and Roelofs, Rebecca and Lopes, Raphael Gontijo and Hajishirzi, Hannaneh and Farhadi, Ali and Namkoong, Hongseok and others},
+ booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
+ pages={7959--7971},
+ year={2022}
+}
+
+@article{friederich2017fine,
+ title={Fine-tuning},
+ author={Friederich, Simon},
+ journal={The Stanford encyclopedia of philosophy},
+ year={2017}
+}
+
+@article{white2023prompt,
+ title={A prompt pattern catalog to enhance prompt engineering with chatgpt},
+ author={White, Jules and Fu, Quchen and Hays, Sam and Sandborn, Michael and Olea, Carlos and Gilbert, Henry and Elnashar, Ashraf and Spencer-Smith, Jesse and Schmidt, Douglas C},
+ journal={arXiv preprint arXiv:2302.11382},
+ year={2023}
+}
+
+@article{sahoo2024systematic,
+ title={A systematic survey of prompt engineering in large language models: Techniques and applications},
+ author={Sahoo, Pranab and Singh, Ayush Kumar and Saha, Sriparna and Jain, Vinija and Mondal, Samrat and Chadha, Aman},
+ journal={arXiv preprint arXiv:2402.07927},
+ year={2024}
+}
+
+@article{tonmoy2024comprehensive,
+ title={A comprehensive survey of hallucination mitigation techniques in large language models},
+ author={Tonmoy, SM and Zaman, SM and Jain, Vinija and Rani, Anku and Rawte, Vipula and Chadha, Aman and Das, Amitava},
+ journal={arXiv preprint arXiv:2401.01313},
+ year={2024}
+}
+
+@article{touvron2023llama,
+ title={Llama 2: Open foundation and fine-tuned chat models},
+ author={Touvron, Hugo and Martin, Louis and Stone, Kevin and Albert, Peter and Almahairi, Amjad and Babaei, Yasmine and Bashlykov, Nikolay and Batra, Soumya and Bhargava, Prajjwal and Bhosale, Shruti and others},
+ journal={arXiv preprint arXiv:2307.09288},
+ year={2023}
+}
+
+@article{chung2024scaling,
+ title={Scaling instruction-finetuned language models},
+ author={Chung, Hyung Won and Hou, Le and Longpre, Shayne and Zoph, Barret and Tay, Yi and Fedus, William and Li, Yunxuan and Wang, Xuezhi and Dehghani, Mostafa and Brahma, Siddhartha and others},
+ journal={Journal of Machine Learning Research},
+ volume={25},
+ number={70},
+ pages={1--53},
+ year={2024}
+}
+
+@article{dettmers2024qlora,
+ title={Qlora: Efficient finetuning of quantized llms},
+ author={Dettmers, Tim and Pagnoni, Artidoro and Holtzman, Ari and Zettlemoyer, Luke},
+ journal={Advances in Neural Information Processing Systems},
+ volume={36},
+ year={2024}
+}
+
+@misc{brown2020languagemodelsfewshotlearners,
+ title={Language Models are Few-Shot Learners},
+ author={Tom B. Brown and Benjamin Mann and Nick Ryder and Melanie Subbiah and Jared Kaplan and Prafulla Dhariwal and Arvind Neelakantan and Pranav Shyam and Girish Sastry and Amanda Askell and Sandhini Agarwal and Ariel Herbert-Voss and Gretchen Krueger and Tom Henighan and Rewon Child and Aditya Ramesh and Daniel M. Ziegler and Jeffrey Wu and Clemens Winter and Christopher Hesse and Mark Chen and Eric Sigler and Mateusz Litwin and Scott Gray and Benjamin Chess and Jack Clark and Christopher Berner and Sam McCandlish and Alec Radford and Ilya Sutskever and Dario Amodei},
+ year={2020},
+ eprint={2005.14165},
+ archivePrefix={arXiv},
+ primaryClass={cs.CL},
+ url={https://arxiv.org/abs/2005.14165},
+}
+
+@misc{raffel2023exploringlimitstransferlearning,
+ title={Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
+ author={Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu},
+ year={2023},
+ eprint={1910.10683},
+ archivePrefix={arXiv},
+ primaryClass={cs.LG},
+ url={https://arxiv.org/abs/1910.10683},
+}