{
  "ctfidf_model": {
    "bm25_weighting": false,
    "reduce_frequent_words": false
  },
  "vectorizer_model": {
    "params": {
      "analyzer": "word",
      "binary": false,
      "decode_error": "strict",
      "encoding": "utf-8",
      "input": "content",
      "lowercase": true,
      "max_df": 1.0,
      "max_features": null,
      "min_df": 2,
      "ngram_range": [
        1,
        5
      ],
      "stop_words": "english",
      "strip_accents": null,
      "token_pattern": "(?u)\\b\\w\\w+\\b",
      "vocabulary": null
    },
    "vocab": {
      "bert": 1152,
      "language": 5758,
      "understanding": 11761,
      "latest": 6156,
      "work": 12246,
      "representations": 9731,
      "carefully": 1398,
      "integrates": 5458,
      "contextualized": 2156,
      "features": 4014,
      "model": 7098,
      "training": 11539,
      "enables": 3310,
      "series": 10292,
      "success": 10909,
      "especially": 3471,
      "various": 12050,
      "machine": 6751,
      "reading": 9368,
      "comprehension": 1923,
      "natural": 7707,
      "inference": 5270,
      "tasks": 11157,
      "existing": 3677,
      "representation": 9725,
      "models": 7250,
      "including": 5175,
      "gpt": 4665,
      "exploit": 3829,
      "plain": 8511,
      "character": 1515,
      "word": 12237,
      "embeddings": 3233,
      "rarely": 9346,
      "consider": 2081,
      "incorporating": 5215,
      "structured": 10819,
      "semantic": 10226,
      "information": 5288,
      "provide": 9149,
      "rich": 10003,
      "semantics": 10252,
      "promote": 8979,
      "propose": 9054,
      "incorporate": 5210,
      "explicit": 3825,
      "contextual": 2155,
      "pretrained": 8742,
      "role": 10050,
      "labeling": 5733,
      "introduce": 5534,
      "improved": 5139,
      "capable": 1375,
      "explicitly": 3827,
      "backbone": 1002,
      "keeps": 5625,
      "convenient": 2195,
      "usability": 11880,
      "light": 6314,
      "finetuning": 4119,
      "way": 12174,
      "substantial": 10892,
      "taskspecific": 11307,
      "modifications": 7573,
      "compared": 1837,
      "simple": 10458,
      "concept": 1992,
      "powerful": 8652,
      "obtains": 7999,
      "new": 7805,
      "stateoftheart": 10701,
      "substantially": 10899,
      "improves": 5147,
      "results": 9877,
      "zeroshot": 12309,
      "paraphrase": 8300,
      "generation": 4511,
      "multilingual": 7616,
      "leveraging": 6293,
      "parallel": 8276,
      "texts": 11423,
      "automatically": 950,
      "generate": 4439,
      "paraphrases": 8301,
      "drawn": 3093,
      "attention": 888,
      "size": 10492,
      "highquality": 4908,
      "corpus": 2232,
      "limited": 6349,
      "translation": 11634,
      "known": 5721,
      "pivoting": 8508,
      "method": 6932,
      "typical": 11720,
      "approach": 728,
      "end": 3346,
      "notice": 7912,
      "process": 8881,
      "involves": 5576,
      "multiple": 7649,
      "likely": 6336,
      "incur": 5235,
      "drift": 3097,
      "twostep": 11714,
      "translations": 11646,
      "paper": 8207,
      "inspired": 5373,
      "transformerbased": 11616,
      "unified": 11800,
      "paraphrasing": 8302,
      "purely": 9216,
      "trained": 11533,
      "data": 2380,
      "conduct": 2019,
      "step": 10744,
      "generated": 4471,
      "semantically": 10247,
      "similar": 10453,
      "input": 5346,
      "sentence": 10261,
      "shares": 10334,
      "architecture": 789,
      "radford": 9303,
      "et": 3488,
      "al": 558,
      "2018": 37,
      "able": 173,
      "pretrain": 8741,
      "largescale": 6126,
      "fluency": 4170,
      "output": 8165,
      "sentences": 10266,
      "addition": 372,
      "mechanism": 6889,
      "denoising": 2710,
      "autoencoder": 930,
      "improve": 5118,
      "diversity": 3031,
      "robustness": 10045,
      "experimental": 3736,
      "surpasses": 11012,
      "terms": 11356,
      "relevance": 9625,
      "efficiency": 3184,
      "knowledgeenhanced": 5713,
      "pretraining": 8772,
      "commonsense": 1800,
      "story": 10766,
      "generating": 4495,
      "reasonable": 9397,
      "leading": 6170,
      "context": 2139,
      "important": 5099,
      "challenging": 1493,
      "task": 11111,
      "spite": 10658,
      "modeling": 7243,
      "local": 6687,
      "coherence": 1757,
      "neural": 7797,
      "gpt2": 4671,
      "suffer": 10935,
      "repetition": 9702,
      "logic": 6698,
      "conflicts": 2059,
      "lack": 5739,
      "stories": 10765,
      "conjecture": 2063,
      "difficulty": 2924,
      "relevant": 9627,
      "knowledge": 5649,
      "causal": 1423,
      "relationships": 9610,
      "planning": 8513,
      "entities": 3423,
      "events": 3593,
      "proper": 9048,
      "temporal": 11346,
      "order": 8107,
      "devise": 2847,
      "utilize": 11988,
      "external": 3914,
      "bases": 1079,
      "capture": 1390,
      "dependencies": 2715,
      "employ": 3283,
      "multitask": 7674,
      "learning": 6183,
      "combines": 1782,
      "discriminative": 2967,
      "objective": 7974,
      "distinguish": 3001,
      "true": 11676,
      "fake": 3986,
      "automatic": 935,
      "manual": 6817,
      "evaluation": 3538,
      "shows": 10390,
      "baselines": 1072,
      "particularly": 8312,
      "global": 4647,
      "effect": 3134,
      "paragraph": 8275,
      "sequence": 10279,
      "tokens": 11489,
      "text": 11381,
      "read": 9366,
      "article": 809,
      "study": 10849,
      "implicit": 5094,
      "affect": 487,
      "quality": 9236,
      "specifically": 10622,
      "better": 1172,
      "stage": 10674,
      "english": 3379,
      "lead": 6166,
      "higher": 4879,
      "bleu": 1224,
      "score": 10156,
      "lower": 6738,
      "perplexity": 8474,
      "experiments": 3761,
      "selfcollected": 10216,
      "chinese": 1620,
      "essay": 3475,
      "dataset": 2475,
      "level": 6269,
      "lm": 6683,
      "challenge": 1468,
      "closedbook": 1683,
      "science": 10146,
      "exam": 3610,
      "based": 1025,
      "question": 9264,
      "answering": 664,
      "prior": 8834,
      "standardized": 10685,
      "exams": 3622,
      "requires": 9765,
      "support": 10998,
      "large": 6000,
      "targeted": 11108,
      "retrieving": 9958,
      "timeconsuming": 11476,
      "questions": 9287,
      "embedded": 3230,
      "complex": 1895,
      "retrieval": 9944,
      "dual": 3103,
      "theory": 11449,
      "cognitive": 1753,
      "framework": 4235,
      "intuitive": 5555,
      "reasoning": 9401,
      "module": 7578,
      "efficiently": 3200,
      "solve": 10547,
      "problems": 8871,
      "related": 9598,
      "example": 3616,
      "relying": 9644,
      "evaluate": 3499,
      "arc": 787,
      "yields": 12304,
      "considerable": 2082,
      "classification": 1651,
      "performance": 8360,
      "emerging": 3261,
      "types": 11717,
      "provided": 9170,
      "significantly": 10422,
      "accuracy": 230,
      "competitive": 1875,
      "advantage": 472,
      "retrievalbased": 9952,
      "qa": 9226,
      "methods": 6974,
      "multihop": 7611,
      "long": 6703,
      "main": 6769,
      "problem": 8859,
      "lies": 6308,
      "sentencelevel": 10265,
      "traditional": 11515,
      "generative": 4591,
      "address": 385,
      "mrg": 7600,
      "incorporates": 5214,
      "graph": 4729,
      "learn": 6176,
      "consists": 2100,
      "realization": 9379,
      "responsible": 9859,
      "searching": 10183,
      "paths": 8324,
      "imitate": 5070,
      "imagination": 5068,
      "human": 4948,
      "writing": 12285,
      "transfer": 11594,
      "inferred": 5281,
      "generates": 4490,
      "complete": 1886,
      "unlike": 11831,
      "previous": 8805,
      "blackbox": 1217,
      "infers": 5283,
      "path": 8323,
      "provides": 9173,
      "explanatory": 3824,
      "views": 12132,
      "proposed": 9111,
      "works": 12271,
      "representative": 9736,
      "review": 9972,
      "product": 8921,
      "description": 2739,
      "informative": 5321,
      "strong": 10803,
      "design": 2744,
      "implementation": 5087,
      "chatbot": 1529,
      "using": 11935,
      "learningbased": 6249,
      "corresponding": 2250,
      "levels": 6271,
      "systematically": 11052,
      "speech": 10648,
      "recognition": 9510,
      "correction": 2239,
      "specific": 10606,
      "domain": 3051,
      "conversation": 2200,
      "simulation": 10476,
      "highest": 4886,
      "communication": 1807,
      "agent": 495,
      "academic": 191,
      "contribution": 2183,
      "implement": 5086,
      "explain": 3817,
      "following": 4185,
      "explainable": 3819,
      "artificial": 812,
      "intelligence": 5468,
      "connections": 2075,
      "network": 7784,
      "perspective": 8480,
      "integrated": 5457,
      "wechat": 12193,
      "finetuned": 4109,
      "backend": 1005,
      "interpret": 5521,
      "responses": 9849,
      "consistency": 2090,
      "coherency": 1759,
      "enhanced": 3398,
      "demands": 2640,
      "maintain": 6778,
      "characters": 1521,
      "shown": 10372,
      "achieved": 263,
      "good": 4658,
      "observe": 7987,
      "issues": 5593,
      "exist": 3674,
      "categorized": 1418,
      "folds": 4182,
      "hand": 4797,
      "guarantee": 4772,
      "usually": 11978,
      "contain": 2123,
      "errors": 3466,
      "does": 3045,
      "account": 222,
      "discourse": 2957,
      "relations": 9605,
      "directly": 2945,
      "enhance": 3385,
      "twostage": 11711,
      "organize": 8115,
      "outline": 8126,
      "depicts": 2720,
      "second": 10184,
      "expand": 3718,
      "controlled": 2191,
      "supervision": 10993,
      "signals": 10398,
      "incorporated": 5213,
      "reduce": 9542,
      "auxiliary": 969,
      "relation": 9603,
      "outperforms": 8145,
      "baseline": 1063,
      "approaches": 766,
      "metrics": 7023,
      "datatotext": 2555,
      "augmentation": 913,
      "application": 693,
      "domains": 3056,
      "obstacle": 7991,
      "numbers": 7958,
      "instances": 5387,
      "available": 972,
      "samples": 10089,
      "novel": 7914,
      "fewshot": 4028,
      "setting": 10315,
      "augments": 926,
      "replacing": 9709,
      "values": 12025,
      "alternative": 605,
      "ones": 8023,
      "category": 1420,
      "ii": 5053,
      "iii": 5054,
      "proposing": 9127,
      "noise": 7888,
      "use": 11884,
      "make": 6787,
      "sure": 11005,
      "given": 4630,
      "sample": 10086,
      "correctly": 2241,
      "reconstructed": 9527,
      "having": 4829,
      "formulated": 4209,
      "benchmarks": 1134,
      "weakly": 12182,
      "supervised": 10984,
      "paradigm": 8272,
      "outperform": 8132,
      "fully": 4301,
      "seq2seq": 10277,
      "10": 1,
      "annotations": 649,
      "utilizing": 11998,
      "annotated": 642,
      "boost": 1238,
      "standard": 10681,
      "points": 8556,
      "establishing": 3484,
      "datasets": 2512,
      "llm": 6401,
      "helps": 4855,
      "optimize": 8098,
      "crystal": 2333,
      "surface": 11006,
      "conventional": 2196,
      "optimization": 8092,
      "expert": 3814,
      "physics": 8492,
      "algorithms": 563,
      "trend": 11662,
      "automation": 957,
      "entire": 3422,
      "industry": 5263,
      "drawback": 3089,
      "relatively": 9613,
      "laborintensive": 5736,
      "suboptimal": 10885,
      "refinement": 9563,
      "technical": 11326,
      "dilemma": 2931,
      "remained": 9647,
      "emergence": 3245,
      "llms": 6440,
      "openais": 8038,
      "chatgpt": 1534,
      "googles": 4663,
      "bard": 1016,
      "explores": 3857,
      "possibility": 8603,
      "applying": 722,
      "gpt35": 4684,
      "gpt4": 4688,
      "simply": 10471,
      "conversations": 2206,
      "assisted": 861,
      "difference": 2871,
      "time": 11472,
      "code": 1699,
      "deep": 2596,
      "reinforcement": 9591,
      "acquire": 321,
      "optimized": 8099,
      "solution": 10541,
      "spanning": 10585,
      "proposition": 9128,
      "ideas": 5040,
      "perform": 8349,
      "detailed": 2793,
      "break": 1266,
      "converse": 2208,
      "posing": 8593,
      "openended": 8049,
      "heuristic": 4860,
      "definitive": 2625,
      "commands": 1788,
      "guide": 4780,
      "processes": 8897,
      "conceptual": 1995,
      "humanai": 4993,
      "strategies": 10775,
      "practical": 8662,
      "implications": 5093,
      "achieve": 237,
      "significant": 10400,
      "milestone": 7036,
      "automated": 933,
      "production": 8922,
      "pipeline": 8502,
      "rank": 9324,
      "math": 6857,
      "critical": 2309,
      "processing": 8898,
      "recent": 9448,
      "studies": 10836,
      "adopted": 435,
      "sequencetosequence": 10287,
      "transform": 11603,
      "descriptions": 2741,
      "mathematical": 6863,
      "expressions": 3873,
      "prone": 9045,
      "minor": 7056,
      "mistakes": 7066,
      "handle": 4800,
      "limitation": 6338,
      "ranking": 9328,
      "joint": 5608,
      "learns": 6251,
      "correct": 2233,
      "incorrect": 5221,
      "treebased": 11658,
      "specially": 10604,
      "designed": 2759,
      "online": 8025,
      "update": 11866,
      "demonstrate": 2645,
      "effectiveness": 3167,
      "benchmark": 1110,
      "consistently": 2094,
      "classical": 1649,
      "784": 99,
      "improving": 5157,
      "ability": 133,
      "focus": 4173,
      "structure": 10817,
      "general": 4400,
      "numerical": 7959,
      "properties": 9050,
      "robustly": 10044,
      "measurement": 6884,
      "estimation": 3487,
      "leverages": 6283,
      "embedding": 3231,
      "encode": 3320,
      "number": 7948,
      "individual": 5250,
      "loss": 6723,
      "function": 4309,
      "integrate": 5455,
      "extensive": 3883,
      "different": 2874,
      "experiment": 3733,
      "range": 9316,
      "comparison": 1862,
      "magnitude": 6768,
      "ablation": 167,
      "conducted": 2040,
      "impact": 5075,
      "topic": 11502,
      "transferable": 11599,
      "table": 11073,
      "weaklysupervised": 12184,
      "transformer": 11608,
      "jointly": 5610,
      "encoding": 3332,
      "produce": 8915,
      "query": 9261,
      "settings": 10316,
      "systems": 11055,
      "deployed": 2722,
      "corpora": 2230,
      "distributions": 3006,
      "quite": 9301,
      "distinct": 2998,
      "simulate": 10472,
      "shift": 10346,
      "scenario": 10120,
      "designing": 2769,
      "consisting": 2099,
      "splits": 10660,
      "groups": 4764,
      "popular": 8570,
      "empirically": 3282,
      "despite": 2777,
      "opendomain": 8043,
      "degrades": 2628,
      "evaluated": 3517,
      "unseen": 11847,
      "topics": 11504,
      "response": 9843,
      "pragmatic": 8677,
      "adaptation": 353,
      "comprising": 1958,
      "vocabulary": 12161,
      "injection": 5340,
      "texttotext": 11434,
      "generator": 4623,
      "t5": 11069,
      "focused": 4178,
      "logical": 6699,
      "form": 4194,
      "reasonably": 9399,
      "believe": 1107,
      "split": 10659,
      "robust": 10041,
      "solutions": 10545,
      "suited": 10954,
      "deployment": 2724,
      "synthetic": 11043,
      "books": 1237,
      "ways": 12176,
      "written": 12286,
      "aided": 528,
      "ai": 508,
      "technologies": 11337,
      "like": 6320,
      "gpt3": 4679,
      "eventually": 3594,
      "replace": 9705,
      "authored": 928,
      "publications": 9207,
      "kind": 5643,
      "tools": 11497,
      "purpose": 9217,
      "introduced": 5548,
      "stands": 10688,
      "created": 2299,
      "deploying": 2723,
      "technology": 11338,
      "precisely": 8681,
      "autoregressive": 963,
      "humanlike": 5005,
      "supported": 11001,
      "case": 1404,
      "value": 12022,
      "discussed": 2977,
      "emphasizes": 3270,
      "artistic": 819,
      "issue": 5586,
      "comes": 1787,
      "aigenerated": 532,
      "content": 2130,
      "introduces": 5549,
      "projects": 8958,
      "interactive": 5495,
      "andor": 638,
      "combined": 1781,
      "focuses": 4179,
      "aesthetics": 486,
      "art": 808,
      "search": 10174,
      "decoder": 2580,
      "transformers": 11625,
      "continued": 2160,
      "increasing": 5226,
      "scale": 10106,
      "reaching": 9364,
      "hundreds": 5025,
      "billions": 1202,
      "parameters": 8291,
      "sets": 10312,
      "prompting": 9006,
      "foundation": 4217,
      "remain": 9646,
      "fields": 4055,
      "prevents": 8804,
      "possibly": 8607,
      "organizations": 8114,
      "train": 11526,
      "separate": 10273,
      "58": 82,
      "billion": 1199,
      "previously": 8822,
      "best": 1163,
      "margin": 6829,
      "175": 28,
      "measured": 6883,
      "result": 9868,
      "files": 4060,
      "freely": 4284,
      "endtoend": 3361,
      "unpaired": 11841,
      "technique": 11331,
      "encoderdecoder": 3328,
      "acoustic": 320,
      "units": 11820,
      "pseudo": 9195,
      "codes": 1738,
      "derived": 2732,
      "offline": 8019,
      "predict": 8685,
      "masked": 6837,
      "encoder": 3322,
      "lets": 6268,
      "reconstruct": 9526,
      "autoregressively": 968,
      "instead": 5390,
      "textual": 11436,
      "scripts": 10165,
      "original": 8118,
      "comprehensive": 1924,
      "error": 3459,
      "rate": 9347,
      "20": 35,
      "subsets": 10891,
      "release": 9617,
      "sources": 10578,
      "enriching": 3413,
      "wikidata": 12229,
      "completion": 1893,
      "augmenting": 921,
      "additional": 378,
      "divides": 3036,
      "steps": 10752,
      "subject": 10879,
      "suggestion": 10946,
      "populating": 8580,
      "gap": 4376,
      "filling": 4061,
      "remaining": 9648,
      "present": 8713,
      "idea": 5038,
      "combining": 1783,
      "base": 1021,
      "interpretation": 5524,
      "free": 4280,
      "suggest": 10940,
      "metadata": 6931,
      "headers": 4833,
      "property": 9051,
      "linking": 6375,
      "candidate": 1333,
      "synthesize": 11039,
      "prompts": 9028,
      "finally": 4069,
      "verify": 12113,
      "synthesized": 11040,
      "linked": 6373,
      "web": 12188,
      "source": 10571,
      "wikipedia": 12230,
      "prototypical": 9139,
      "calibration": 1328,
      "incontext": 5203,
      "gptlike": 4706,
      "recognized": 9516,
      "handcrafted": 4798,
      "templates": 11345,
      "demonstration": 2706,
      "adaptively": 366,
      "decision": 2572,
      "boundary": 1255,
      "zero": 12305,
      "greedy": 4755,
      "decoding": 2583,
      "concretely": 2011,
      "adopts": 438,
      "mixture": 7082,
      "distribution": 3005,
      "estimate": 3485,
      "clusters": 1694,
      "categories": 1416,
      "assign": 850,
      "cluster": 1693,
      "label": 5728,
      "solving": 10558,
      "weighted": 12197,
      "matching": 6854,
      "prediction": 8689,
      "calibrated": 1326,
      "likelihood": 6335,
      "improvement": 5142,
      "diverse": 3011,
      "set": 10308,
      "analysis": 617,
      "scales": 10112,
      "indicates": 5244,
      "expected": 3725,
      "greatly": 4753,
      "class": 1646,
      "imbalance": 5069,
      "selfsupervised": 10223,
      "autoencoders": 931,
      "extensively": 3909,
      "explored": 3850,
      "years": 12292,
      "seen": 10197,
      "wide": 12205,
      "adoption": 437,
      "contrastive": 2174,
      "heavily": 4840,
      "relies": 9639,
      "structural": 10815,
      "complicated": 1911,
      "dominant": 3068,
      "progress": 8944,
      "graphs": 4737,
      "far": 3998,
      "reached": 9362,
      "potential": 8615,
      "identify": 5045,
      "examine": 3613,
      "negatively": 7777,
      "development": 2833,
      "reconstruction": 9529,
      "metric": 7022,
      "mitigates": 7074,
      "reconstructing": 9528,
      "structures": 10821,
      "feature": 4009,
      "masking": 6841,
      "strategy": 10783,
      "scaled": 10111,
      "benefit": 1148,
      "21": 42,
      "public": 9199,
      "careful": 1397,
      "outperformance": 8139,
      "demonstrates": 2693,
      "allinone": 592,
      "lowresource": 6745,
      "nlp": 7859,
      "leverage": 6272,
      "rules": 10072,
      "synonym": 11033,
      "replacement": 9707,
      "finetune": 4103,
      "generalpurpose": 4436,
      "consequently": 2080,
      "trivial": 11675,
      "yielding": 12303,
      "lowquality": 6740,
      "combat": 1777,
      "goal": 4652,
      "single": 10482,
      "quickly": 9300,
      "grasp": 4742,
      "inherent": 5327,
      "synthesis": 11038,
      "law": 6161,
      "target": 11104,
      "reformulates": 9572,
      "examples": 3618,
      "heterogeneous": 4859,
      "format": 4197,
      "employs": 3290,
      "objectives": 7979,
      "granularity": 4728,
      "partial": 8306,
      "attempt": 884,
      "apply": 721,
      "100": 4,
      "produced": 8918,
      "successfully": 10929,
      "deberta": 2563,
      "transfers": 11602,
      "confidence": 2050,
      "speaker": 10596,
      "key": 5627,
      "variability": 12028,
      "compact": 1813,
      "dependent": 2717,
      "hidden": 4863,
      "unit": 11819,
      "contributions": 2184,
      "used": 11897,
      "facilitate": 3952,
      "adaptive": 365,
      "sat": 10093,
      "test": 11366,
      "unsupervised": 11855,
      "sensitivity": 10259,
      "reduced": 9545,
      "selection": 10208,
      "trustworthy": 11680,
      "subset": 10890,
      "smooth": 10521,
      "probabilities": 8853,
      "serving": 10305,
      "scores": 10159,
      "increased": 5224,
      "sparsity": 10593,
      "addressed": 413,
      "outperformed": 8140,
      "independent": 5237,
      "adapted": 359,
      "12": 10,
      "absolute": 181,
      "90": 110,
      "79": 100,
      "89": 109,
      "relative": 9611,
      "respectively": 9836,
      "consistent": 2092,
      "improvements": 5144,
      "lstm": 6749,
      "rescoring": 9772,
      "impressive": 5107,
      "huge": 4946,
      "generally": 4434,
      "incurs": 5236,
      "high": 4866,
      "cost": 2257,
      "recently": 9490,
      "augment": 911,
      "smaller": 10512,
      "retriever": 9956,
      "demonstrated": 2670,
      "promising": 8964,
      "capabilities": 1337,
      "remains": 9649,
      "unclear": 11737,
      "competitively": 1880,
      "counterparts": 2277,
      "generalization": 4424,
      "downstream": 3074,
      "heldout": 4845,
      "prompted": 9005,
      "parametric": 8298,
      "taskagnostic": 11152,
      "unlabeled": 11825,
      "potentially": 8641,
      "noisy": 7889,
      "retrieved": 9954,
      "fusion": 4342,
      "perceiver": 8344,
      "gated": 4392,
      "crossattention": 2316,
      "notably": 7907,
      "16": 26,
      "seven": 10323,
      "scaling": 10114,
      "backtranslation": 1009,
      "aims": 542,
      "translate": 11632,
      "spoken": 10661,
      "scarcity": 10117,
      "labeled": 5729,
      "translating": 11633,
      "indomain": 5254,
      "applied": 716,
      "alleviate": 584,
      "limits": 6362,
      "overcome": 8177,
      "prompt": 8985,
      "randomly": 9314,
      "concatenates": 1989,
      "induce": 5256,
      "style": 10873,
      "varied": 12037,
      "languages": 5994,
      "increases": 5225,
      "demonstrating": 2702,
      "facilitating": 3959,
      "future": 4345,
      "research": 9773,
      "field": 4044,
      "variational": 12034,
      "tags": 11093,
      "node": 7885,
      "associated": 863,
      "ideal": 5039,
      "integrating": 5460,
      "networks": 7793,
      "gnns": 4651,
      "computational": 1969,
      "complexity": 1908,
      "brought": 1295,
      "efficient": 3190,
      "effective": 3136,
      "fusing": 4341,
      "em": 3227,
      "called": 1329,
      "simultaneously": 10481,
      "big": 1194,
      "proposes": 9124,
      "modules": 7581,
      "procedure": 8878,
      "allows": 596,
      "separately": 10275,
      "allowing": 594,
      "interact": 5484,
      "business": 1319,
      "helped": 4851,
      "advance": 440,
      "numerous": 7961,
      "opened": 8048,
      "door": 3072,
      "modalities": 7094,
      "images": 5065,
      "music": 7689,
      "argue": 800,
      "unique": 11815,
      "characteristics": 1516,
      "mining": 7055,
      "making": 6794,
      "tackle": 11080,
      "challenges": 1474,
      "include": 5172,
      "multimodal": 7622,
      "privacy": 8843,
      "concerns": 1999,
      "memorizing": 6910,
      "cross": 2315,
      "codebook": 1736,
      "findings": 4085,
      "memorization": 6908,
      "capability": 1367,
      "contributes": 2181,
      "lot": 6726,
      "inspires": 5383,
      "bring": 1281,
      "memory": 6911,
      "memorize": 6909,
      "uses": 11932,
      "place": 8509,
      "phases": 8486,
      "addressing": 417,
      "restoring": 9862,
      "named": 7692,
      "reaches": 9363,
      "superior": 10972,
      "means": 6881,
      "obtain": 7993,
      "gains": 4370,
      "importance": 5098,
      "feasibility": 4006,
      "sheds": 10344,
      "direction": 2937,
      "crosstask": 2322,
      "highly": 4899,
      "sensitive": 10258,
      "choice": 1636,
      "selecting": 10207,
      "highperforming": 4907,
      "labels": 5734,
      "zps": 12328,
      "selects": 10211,
      "gradient": 4713,
      "humanwritten": 5022,
      "ensemble": 3414,
      "extend": 3876,
      "advantages": 474,
      "tuning": 11688,
      "translator": 11647,
      "yes": 12298,
      "engine": 3373,
      "report": 9712,
      "preliminary": 8705,
      "adopt": 434,
      "trigger": 11668,
      "differences": 2872,
      "evaluating": 3521,
      "performs": 8466,
      "commercial": 1791,
      "products": 8925,
      "google": 4661,
      "highresource": 4914,
      "explore": 3838,
      "interesting": 5505,
      "asks": 824,
      "pivot": 8505,
      "biomedical": 1212,
      "abstracts": 186,
      "reddit": 9541,
      "comments": 1790,
      "exhibits": 3667,
      "launch": 6158,
      "boosted": 1241,
      "comparable": 1816,
      "words": 12244,
      "hallucination": 4789,
      "interactivity": 5500,
      "quantitatively": 9252,
      "publicly": 9208,
      "carry": 1403,
      "23": 46,
      "covering": 2286,
      "common": 1792,
      "aspects": 829,
      "newly": 7848,
      "nonlatin": 7895,
      "script": 10163,
      "intermediate": 5511,
      "accurate": 234,
      "average": 980,
      "unreliable": 11844,
      "reasoner": 9400,
      "deductive": 2595,
      "inductive": 5259,
      "suffers": 10938,
      "hallucinations": 4796,
      "access": 201,
      "collaboration": 1767,
      "underlying": 11748,
      "rouge1": 10059,
      "summarization": 10958,
      "chrf": 1642,
      "multiturn": 7682,
      "engineering": 3374,
      "fashion": 4000,
      "extraction": 3927,
      "construction": 2117,
      "financial": 4081,
      "effort": 3203,
      "built": 1315,
      "approximately": 781,
      "raw": 9359,
      "glue": 4648,
      "superglue": 10971,
      "driven": 3098,
      "advancements": 457,
      "enabling": 3314,
      "comparisons": 1865,
      "drawing": 3091,
      "inspiration": 5368,
      "includes": 5174,
      "aim": 535,
      "released": 9623,
      "project": 8954,
      "understand": 11753,
      "comparative": 1829,
      "attracted": 896,
      "great": 4743,
      "fluent": 4171,
      "attains": 883,
      "remarkable": 9665,
      "quantitative": 9248,
      "chatgpts": 1609,
      "little": 6382,
      "comparing": 1860,
      "bertstyle": 1161,
      "falls": 3991,
      "short": 10349,
      "handling": 4805,
      "similarity": 10457,
      "achieves": 278,
      "sentiment": 10268,
      "questionanswering": 9284,
      "additionally": 381,
      "advanced": 443,
      "systematic": 11047,
      "adversarial": 478,
      "normal": 7901,
      "pushed": 9222,
      "toxic": 11508,
      "risk": 10017,
      "undesired": 11791,
      "alter": 602,
      "demanding": 2639,
      "computation": 1963,
      "requirements": 9762,
      "rely": 9641,
      "rulebased": 10071,
      "promptbased": 9001,
      "token": 11485,
      "elimination": 3225,
      "overall": 8171,
      "meaning": 6878,
      "center": 1439,
      "probability": 8854,
      "ultimately": 11727,
      "considered": 2086,
      "point": 8552,
      "rl": 10022,
      "literature": 6380,
      "cover": 2283,
      "uncertain": 11734,
      "outcomes": 8124,
      "utilizes": 11992,
      "share": 10330,
      "importantly": 5103,
      "require": 9753,
      "internal": 5517,
      "crucial": 2325,
      "servers": 10300,
      "accessible": 205,
      "apis": 684,
      "techniques": 11333,
      "showcasing": 10365,
      "abilities": 121,
      "complexities": 1907,
      "open": 8031,
      "world": 12278,
      "assessing": 841,
      "stability": 10669,
      "aspect": 826,
      "exploring": 3858,
      "transformations": 11605,
      "nlu": 7879,
      "indicate": 5240,
      "encounters": 3341,
      "degradation": 2626,
      "faces": 3949,
      "instability": 5385,
      "insights": 5364,
      "valuable": 12018,
      "limitations": 6340,
      "guiding": 4786,
      "meets": 6906,
      "feedback": 4020,
      "oracle": 8105,
      "realworld": 9386,
      "applications": 702,
      "cases": 1407,
      "assessed": 838,
      "rlhf": 10027,
      "prominent": 8961,
      "guidance": 4776,
      "algorithm": 561,
      "theoretical": 11444,
      "random": 9311,
      "descent": 2733,
      "proven": 9144,
      "policy": 8558,
      "reward": 9994,
      "makes": 6791,
      "optimizes": 8101,
      "precollected": 8683,
      "furthermore": 4324,
      "diffusion": 2927,
      "rounds": 10064,
      "advances": 466,
      "optimizing": 8102,
      "functions": 4314,
      "offers": 8015,
      "aligning": 570,
      "intentions": 5482,
      "know": 5647,
      "intent": 5480,
      "conversational": 2202,
      "rewriting": 10000,
      "aggregating": 504,
      "represent": 9723,
      "users": 11921,
      "real": 9370,
      "59": 83,
      "wrt": 12290,
      "highlighting": 4895,
      "vast": 12103,
      "survey": 11024,
      "legal": 6256,
      "transformed": 11607,
      "computer": 1980,
      "vision": 12136,
      "increasingly": 5230,
      "utilized": 11991,
      "automate": 932,
      "document": 3041,
      "integration": 5464,
      "raised": 9307,
      "bias": 1187,
      "explainability": 3818,
      "discuss": 2969,
      "arise": 801,
      "resources": 9828,
      "directions": 2941,
      "conclude": 2006,
      "doing": 3050,
      "hope": 4932,
      "overview": 8195,
      "current": 2345,
      "state": 10695,
      "highlight": 4890,
      "benefits": 1150,
      "aigc": 530,
      "need": 7763,
      "goes": 4655,
      "headlines": 4834,
      "analyze": 630,
      "create": 2296,
      "media": 6893,
      "coverage": 2284,
      "impossible": 5104,
      "miss": 7063,
      "opportunity": 8084,
      "certain": 1442,
      "era": 3451,
      "pure": 9215,
      "creation": 2303,
      "worth": 12282,
      "noting": 7913,
      "just": 5619,
      "tool": 11491,
      "people": 8339,
      "variants": 12033,
      "help": 4846,
      "unify": 11812,
      "diversified": 3030,
      "needed": 7770,
      "offering": 8010,
      "look": 6719,
      "ranging": 9323,
      "modern": 7565,
      "gan": 4374,
      "introducing": 5551,
      "fundamental": 4316,
      "type": 11715,
      "videos": 12129,
      "3d": 62,
      "summarize": 10964,
      "mainstream": 6774,
      "industries": 5262,
      "education": 3132,
      "creativity": 2305,
      "currently": 2365,
      "faced": 3948,
      "outlook": 8129,
      "evolve": 3603,
      "near": 7756,
      "learners": 6181,
      "evidence": 3597,
      "narrative": 7699,
      "unknown": 11824,
      "really": 9383,
      "prominently": 8962,
      "basis": 1088,
      "updating": 11869,
      "reveal": 9965,
      "chatbots": 1531,
      "analyzed": 634,
      "components": 1913,
      "special": 10598,
      "instrument": 5449,
      "analyzing": 637,
      "revealed": 9968,
      "performed": 8462,
      "referential": 9556,
      "worse": 12281,
      "syntactic": 11035,
      "simplicity": 10469,
      "initial": 5333,
      "version": 12120,
      "updated": 11867,
      "resulting": 9874,
      "facilitated": 3957,
      "lagged": 5752,
      "correlation": 2247,
      "suggests": 10948,
      "correlated": 2246,
      "group": 4763,
      "surprising": 11020,
      "constructed": 2115,
      "inputoutput": 5357,
      "variations": 12036,
      "formats": 4199,
      "appropriate": 778,
      "essential": 3476,
      "revisit": 9977,
      "view": 12130,
      "fixed": 4161,
      "attributes": 905,
      "unsatisfactory": 11846,
      "observation": 7982,
      "interpretable": 5523,
      "manner": 6815,
      "grammatical": 4722,
      "cuttingedge": 2374,
      "developed": 2827,
      "openai": 8036,
      "surprisingly": 11021,
      "followup": 4191,
      "compare": 1834,
      "gec": 4397,
      "testing": 11377,
      "outputs": 8168,
      "change": 1510,
      "expression": 3872,
      "maintaining": 6779,
      "correctness": 2242,
      "confirms": 2057,
      "produces": 8919,
      "unleashing": 11830,
      "incorporation": 5220,
      "particular": 8310,
      "vital": 12157,
      "immersive": 5074,
      "experiences": 3732,
      "gaining": 4367,
      "dynamic": 3106,
      "personalized": 8478,
      "possible": 8605,
      "legitimate": 6262,
      "ethical": 3492,
      "readers": 9367,
      "influence": 5284,
      "effectively": 3149,
      "engaging": 3371,
      "virtual": 12135,
      "environment": 3435,
      "opportunities": 8080,
      "obstacles": 7992,
      "signal": 10396,
      "rethinking": 9940,
      "established": 3481,
      "age": 493,
      "cognition": 1752,
      "subjective": 10880,
      "intelligent": 5476,
      "needs": 7771,
      "chat": 1523,
      "initially": 5336,
      "realized": 9381,
      "massive": 6845,
      "researchers": 9809,
      "answer": 655,
      "mathematically": 6866,
      "accurately": 235,
      "described": 2734,
      "machines": 6766,
      "truly": 11677,
      "starts": 10694,
      "basic": 1086,
      "concepts": 1993,
      "presents": 8728,
      "investigate": 5560,
      "relationship": 9609,
      "transformation": 11604,
      "decomposition": 2589,
      "composition": 1919,
      "scheme": 10142,
      "conversion": 2210,
      "implementing": 5091,
      "knowledgebased": 5711,
      "instruction": 5398,
      "empirical": 3273,
      "efforts": 3204,
      "replicate": 9711,
      "instructiontuning": 5444,
      "factor": 3967,
      "achieving": 308,
      "enhances": 3402,
      "patterns": 8329,
      "amounts": 611,
      "major": 6783,
      "merely": 6926,
      "leads": 6172,
      "continuous": 2162,
      "flat": 4163,
      "causes": 1432,
      "phenomena": 8487,
      "specialized": 10600,
      "hard": 4807,
      "checkpoints": 1617,
      "informed": 5322,
      "clauses": 1667,
      "linguistics": 6372,
      "fail": 3978,
      "investigates": 5566,
      "linguistic": 6368,
      "difficulties": 2922,
      "modified": 7574,
      "widely": 12215,
      "scenarios": 10121,
      "35": 60,
      "knowledgeable": 5710,
      "solver": 10556,
      "investigation": 5568,
      "wellknown": 12202,
      "gpts": 4707,
      "aware": 999,
      "struggle": 10824,
      "required": 9760,
      "raise": 9306,
      "mechanisms": 6892,
      "disentangled": 2984,
      "symbolic": 11031,
      "discovered": 2960,
      "dnns": 3039,
      "sparse": 10591,
      "encodes": 3331,
      "disentangle": 2983,
      "dialogue": 2859,
      "small": 10505,
      "states": 10734,
      "transferability": 11598,
      "encoded": 3321,
      "exhibit": 3656,
      "exact": 3607,
      "reasons": 9441,
      "accountable": 224,
      "dearth": 2561,
      "area": 797,
      "showcase": 10359,
      "chainofthought": 1458,
      "cot": 2266,
      "official": 8017,
      "evaluations": 3583,
      "excellent": 3627,
      "detection": 2801,
      "corrected": 2238,
      "overcorrection": 8185,
      "tendencies": 11350,
      "adhering": 427,
      "principle": 8832,
      "minimal": 7049,
      "edits": 3130,
      "nonenglish": 7891,
      "highlights": 4897,
      "adapter": 360,
      "family": 3997,
      "parameterefficient": 8284,
      "led": 6254,
      "costeffective": 2262,
      "alternatives": 606,
      "alpaca": 600,
      "peft": 8336,
      "undoubtedly": 11792,
      "enable": 3307,
      "easytouse": 3115,
      "adapters": 361,
      "execute": 3647,
      "llama": 6387,
      "bloom": 1231,
      "opt": 8085,
      "gptj": 4705,
      "lora": 6722,
      "researchfriendly": 9817,
      "modular": 7576,
      "largerscale": 6125,
      "7b": 101,
      "extra": 3918,
      "trainable": 11531,
      "175b": 31,
      "arithmetic": 804,
      "emerged": 3239,
      "calculate": 1323,
      "revolutionizing": 9989,
      "cell": 1435,
      "power": 8644,
      "annotation": 646,
      "rna": 10029,
      "sequencing": 10289,
      "annotating": 645,
      "biology": 1211,
      "bing": 1208,
      "2023": 40,
      "revolutionized": 9982,
      "scientific": 10151,
      "providing": 9182,
      "breakthrough": 1268,
      "reviews": 9976,
      "uncover": 11742,
      "annotate": 640,
      "rare": 9345,
      "differentiation": 2919,
      "trajectories": 11591,
      "overlooked": 8190,
      "cancer": 1332,
      "discovery": 2962,
      "cells": 1438,
      "pathway": 8325,
      "life": 6309,
      "sciences": 10150,
      "history": 4925,
      "harnessing": 4824,
      "endeavor": 3356,
      "highlighted": 4894,
      "analyses": 615,
      "requiring": 9770,
      "construct": 2112,
      "outofdistribution": 8130,
      "roberta": 10032,
      "early": 3110,
      "api": 682,
      "drops": 3101,
      "suite": 10953,
      "rrhf": 10067,
      "align": 564,
      "facilitates": 3958,
      "alignment": 572,
      "preferences": 8701,
      "enhancing": 3404,
      "interactions": 5493,
      "humans": 5016,
      "instructgpt": 5396,
      "implements": 5092,
      "stages": 10678,
      "sft": 10327,
      "proximal": 9189,
      "ppo": 8661,
      "minimum": 7054,
      "contrast": 2167,
      "sampling": 10092,
      "policies": 8557,
      "extension": 3882,
      "simpler": 10468,
      "coding": 1751,
      "accomplished": 214,
      "session": 10307,
      "helpful": 4852,
      "stock": 10760,
      "movement": 7596,
      "variety": 12040,
      "predicting": 8688,
      "market": 6834,
      "tweets": 11708,
      "historical": 4924,
      "underperforms": 11749,
      "linear": 6364,
      "regression": 9584,
      "subpar": 10887,
      "suggesting": 10944,
      "serves": 10301,
      "aimed": 540,
      "social": 10524,
      "giant": 4628,
      "agi": 506,
      "plus": 8550,
      "november": 7941,
      "2022": 39,
      "unprecedented": 11842,
      "motivated": 7593,
      "according": 218,
      "500": 75,
      "articles": 810,
      "titles": 11484,
      "mentioning": 6924,
      "considering": 2087,
      "urgently": 11877,
      "realize": 9380,
      "ranked": 9327,
      "susceptible": 11030,
      "biases": 1191,
      "unfairness": 11795,
      "consequences": 2079,
      "ethics": 3497,
      "ensuring": 3419,
      "primarily": 8824,
      "employed": 3286,
      "guided": 4783,
      "inefficiencies": 5265,
      "frequently": 4287,
      "successful": 10928,
      "sufficient": 10939,
      "behavior": 1100,
      "subsequently": 10889,
      "filtered": 4063,
      "identifying": 5048,
      "detect": 2797,
      "eye": 3941,
      "growing": 4766,
      "lexical": 6303,
      "stylistic": 10876,
      "teaching": 11320,
      "balanced": 1014,
      "machinegenerated": 6764,
      "paired": 8202,
      "roughly": 10062,
      "equal": 3442,
      "matched": 6852,
      "hire": 4923,
      "exposed": 3867,
      "61": 87,
      "detecting": 2800,
      "67": 91,
      "round": 10063,
      "tend": 11348,
      "detectors": 2812,
      "build": 1301,
      "exponential": 3863,
      "growth": 4771,
      "electronic": 3209,
      "health": 4835,
      "records": 9532,
      "poses": 8588,
      "clinicians": 1676,
      "clinical": 1674,
      "management": 6807,
      "concise": 2002,
      "summaries": 10956,
      "distill": 2991,
      "documents": 3044,
      "rapid": 9332,
      "advancement": 452,
      "plms": 8543,
      "raising": 9310,
      "uptodate": 11874,
      "begin": 1092,
      "foundational": 4230,
      "followed": 4184,
      "indepth": 5238,
      "community": 1811,
      "line": 6363,
      "leaderboard": 6169,
      "useful": 11904,
      "resource": 9822,
      "track": 11511,
      "guidelines": 4784,
      "gptbased": 4704,
      "identification": 5042,
      "addresses": 415,
      "rapidly": 9343,
      "evolving": 3605,
      "database": 2470,
      "multistep": 7672,
      "included": 5173,
      "filtering": 4064,
      "keyword": 5639,
      "precision": 8682,
      "recall": 9442,
      "finding": 4084,
      "captured": 1392,
      "94": 114,
      "publication": 9206,
      "volume": 12162,
      "trends": 11663,
      "revealing": 9969,
      "degree": 2629,
      "countries": 2278,
      "institutions": 5393,
      "identified": 5043,
      "scholarly": 10144,
      "interdisciplinary": 5503,
      "nature": 7752,
      "players": 8532,
      "investigating": 5567,
      "reranking": 9771,
      "generalize": 4430,
      "ir": 5583,
      "properly": 9049,
      "instructed": 5395,
      "deliver": 2632,
      "27": 50,
      "delve": 2636,
      "distilling": 2996,
      "reproduce": 9745,
      "equipped": 3445,
      "emotional": 3265,
      "evaluates": 3520,
      "avenues": 978,
      "democratizing": 2644,
      "opensource": 8055,
      "excelling": 3630,
      "beneficial": 1147,
      "restrictions": 9866,
      "empowering": 3300,
      "follow": 4183,
      "instructions": 5430,
      "brings": 1284,
      "manually": 6824,
      "creating": 2302,
      "avenue": 977,
      "varying": 12099,
      "starting": 10692,
      "evolinstruct": 3600,
      "rewrite": 9999,
      "mix": 7078,
      "humancreated": 4999,
      "preferred": 8703,
      "capacity": 1380,
      "17": 27,
      "skills": 10498,
      "httpsgithubcomnlpxucanwizardlm": 4945,
      "brains": 1261,
      "customized": 2372,
      "prevalent": 8802,
      "room": 10055,
      "unstable": 11852,
      "inability": 5164,
      "think": 11450,
      "randomness": 9315,
      "thinking": 11451,
      "possess": 8599,
      "perspectives": 8483,
      "consolidating": 2103,
      "decisionmaking": 2575,
      "objectively": 7977,
      "comprehensively": 1950,
      "languagebased": 5991,
      "backpropagation": 1008,
      "devised": 2848,
      "problemsolving": 8876,
      "texttosql": 11433,
      "converts": 2213,
      "sql": 10665,
      "retrieve": 9953,
      "syntax": 11037,
      "llmbased": 6432,
      "retrieves": 9957,
      "schemes": 10143,
      "queries": 9260,
      "similarities": 10456,
      "demonstrations": 2708,
      "extracts": 3935,
      "schema": 10140,
      "items": 5601,
      "tables": 11076,
      "filter": 4062,
      "adapts": 368,
      "balance": 1013,
      "length": 6263,
      "fallback": 3990,
      "fails": 3980,
      "crossdomain": 2317,
      "constrained": 2108,
      "lengthy": 6265,
      "inputs": 5358,
      "unleash": 11827,
      "composed": 1917,
      "stream": 10790,
      "controller": 2192,
      "iteratively": 5605,
      "longterm": 6716,
      "shortterm": 10357,
      "precise": 8679,
      "coherent": 1760,
      "memories": 6907,
      "activated": 333,
      "modification": 7572,
      "involving": 5579,
      "supply": 10997,
      "covers": 2288,
      "longtext": 6718,
      "intellectual": 5467,
      "protection": 9134,
      "revolutionary": 9980,
      "expensive": 3727,
      "computing": 1986,
      "hardware": 4810,
      "architectures": 793,
      "costly": 2263,
      "assets": 849,
      "protect": 9132,
      "reproduction": 9750,
      "abuse": 188,
      "evolution": 3601,
      "watermarking": 12172,
      "taxonomy": 11313,
      "190": 34,
      "definition": 2624,
      "threats": 11468,
      "merits": 6929,
      "discussion": 2981,
      "13": 13,
      "reliable": 9632,
      "tailored": 11095,
      "template": 11344,
      "icl": 5034,
      "establish": 3480,
      "proficient": 8931,
      "recognize": 9515,
      "poorly": 8569,
      "parsing": 8304,
      "ml": 7085,
      "gained": 4360,
      "widespread": 12226,
      "demand": 2637,
      "adapting": 362,
      "nontrivial": 7899,
      "predominant": 8696,
      "consuming": 2121,
      "developers": 2828,
      "engineers": 3377,
      "incredible": 5233,
      "reason": 9396,
      "experience": 3730,
      "difficult": 2921,
      "bridge": 1270,
      "develop": 2822,
      "extending": 3879,
      "comprehend": 1921,
      "thorough": 11454,
      "dedicated": 2593,
      "competitiveness": 1881,
      "mt": 7602,
      "brainstorm": 1262,
      "stylized": 10877,
      "privacypreserving": 8848,
      "mitigate": 7068,
      "risks": 10020,
      "illustrate": 5055,
      "mentioned": 6923,
      "achievements": 277,
      "fullysupervised": 4308,
      "shortcomings": 10353,
      "low": 6728,
      "regarding": 9578,
      "entity": 3425,
      "inclination": 5171,
      "wrongly": 12289,
      "classify": 1659,
      "predefined": 8684,
      "aforementioned": 492,
      "gold": 4656,
      "widelyused": 12222,
      "sota": 10565,
      "performances": 8459,
      "derivativefree": 2729,
      "lacks": 5751,
      "versatility": 12119,
      "inappropriate": 5170,
      "assumption": 868,
      "nearly": 7757,
      "optimal": 8087,
      "confirm": 2055,
      "regardless": 9581,
      "refining": 9566,
      "answers": 677,
      "iterative": 5604,
      "iterations": 5603,
      "removing": 9694,
      "intervention": 5527,
      "par": 8271,
      "surpass": 11011,
      "superiority": 10982,
      "stepbystep": 10749,
      "decompose": 2584,
      "procedures": 8879,
      "completing": 1891,
      "obtained": 7997,
      "tune": 11684,
      "sizes": 10494,
      "everyday": 3596,
      "plan": 8512,
      "actions": 331,
      "goaloriented": 4653,
      "exploited": 3831,
      "lms": 6684,
      "abstract": 182,
      "goals": 4654,
      "activities": 339,
      "leaves": 6252,
      "constraints": 2111,
      "understudied": 11790,
      "define": 2622,
      "constraint": 2110,
      "faithfulness": 3985,
      "endowing": 3359,
      "chatgptlike": 1608,
      "plays": 8535,
      "industrial": 5260,
      "maintenance": 6782,
      "failures": 3981,
      "necessary": 7759,
      "measures": 6885,
      "taken": 11098,
      "service": 10303,
      "reliability": 9630,
      "reducing": 9548,
      "costs": 2265,
      "energy": 3367,
      "condition": 2014,
      "monitoring": 7586,
      "fault": 4005,
      "marks": 6835,
      "entry": 3434,
      "evolved": 3604,
      "represents": 9743,
      "landmark": 5754,
      "achievement": 276,
      "consensus": 2078,
      "respond": 9842,
      "roadmap": 10031,
      "developments": 2844,
      "answered": 663,
      "applicable": 691,
      "interpreter": 5525,
      "noncausal": 7890,
      "upgrading": 11870,
      "express": 3870,
      "excels": 3631,
      "capturing": 1394,
      "causality": 1428,
      "event": 3591,
      "density": 2713,
      "distance": 2990,
      "aiassisted": 526,
      "forms": 4204,
      "tagging": 11092,
      "direct": 2934,
      "mapping": 6827,
      "errorprone": 3465,
      "limiting": 6361,
      "scalability": 10102,
      "automating": 956,
      "coder": 1737,
      "grammar": 4721,
      "approaching": 777,
      "aid": 527,
      "scalable": 10103,
      "keywords": 5640,
      "uncovering": 11744,
      "shallow": 10328,
      "highlevel": 4888,
      "segmentation": 10201,
      "adapt": 347,
      "paradigms": 8274,
      "chain": 1448,
      "thought": 11462,
      "reveals": 9970,
      "annotator": 651,
      "refine": 9561,
      "typically": 11722,
      "static": 10736,
      "closed": 1680,
      "fall": 3988,
      "emerges": 3260,
      "necessitates": 7761,
      "extract": 3920,
      "dynamically": 3107,
      "changing": 1513,
      "retraining": 9941,
      "convert": 2211,
      "principles": 8833,
      "expansion": 3722,
      "vertical": 12123,
      "hybrid": 5027,
      "uie": 11726,
      "dubbed": 3105,
      "contains": 2128,
      "prefix": 8704,
      "instructor": 5448,
      "vanilla": 12026,
      "knowledgeintensive": 5715,
      "attempted": 885,
      "outcome": 8123,
      "latent": 6153,
      "adding": 371,
      "try": 11683,
      "inject": 5338,
      "consolidation": 2104,
      "proves": 9148,
      "stored": 10763,
      "misuse": 7067,
      "passive": 8317,
      "specificity": 10643,
      "whitebox": 12204,
      "embed": 3229,
      "watermarks": 12173,
      "dividing": 3037,
      "list": 6377,
      "adjusting": 430,
      "watermarked": 12171,
      "instance": 5386,
      "providers": 9172,
      "interests": 5507,
      "allow": 593,
      "autonomously": 962,
      "usage": 11882,
      "binary": 1206,
      "compute": 1978,
      "computed": 1979,
      "conform": 2060,
      "representing": 9742,
      "selectively": 10209,
      "contextbased": 2151,
      "statistical": 10737,
      "retranslation": 9942,
      "polishing": 8565,
      "substitution": 10903,
      "attacks": 881,
      "arduous": 796,
      "remove": 9693,
      "compromising": 1962,
      "maybe": 6876,
      "exploration": 3834,
      "unlock": 11836,
      "tens": 11352,
      "millions": 7039,
      "unaffordable": 11730,
      "decrease": 2591,
      "conducts": 2049,
      "identifies": 5044,
      "observations": 7985,
      "specialization": 10599,
      "taskrelated": 11155,
      "protecting": 9133,
      "copyright": 2226,
      "backdoor": 1004,
      "companies": 1814,
      "begun": 1096,
      "offer": 8006,
      "vulnerable": 12169,
      "cause": 1429,
      "losses": 6725,
      "extremely": 3939,
      "containing": 2127,
      "weight": 12196,
      "insertion": 5360,
      "transferred": 11600,
      "verification": 12110,
      "minimizing": 7053,
      "utility": 11982,
      "propagation": 9046,
      "core": 2227,
      "insight": 5361,
      "nodes": 7887,
      "edges": 3122,
      "building": 1308,
      "blocks": 1230,
      "passes": 8316,
      "graphstructured": 4740,
      "empower": 3292,
      "domainspecific": 3064,
      "popularity": 8579,
      "microsoft": 7033,
      "encountered": 3340,
      "interaction": 5487,
      "commonly": 1798,
      "perceived": 8343,
      "perceive": 8342,
      "gender": 4399,
      "preregistered": 8711,
      "identity": 5049,
      "summarizing": 10968,
      "absence": 179,
      "eliciting": 3218,
      "asking": 823,
      "default": 2617,
      "perception": 8346,
      "reverse": 9971,
      "user": 11908,
      "economic": 3117,
      "rationality": 9358,
      "assess": 833,
      "examines": 3614,
      "instructing": 5397,
      "budgetary": 1299,
      "decisions": 2577,
      "food": 4192,
      "measure": 6882,
      "maximization": 6873,
      "classic": 1648,
      "preference": 8700,
      "largely": 6120,
      "rational": 9353,
      "subjects": 10883,
      "slightly": 10499,
      "heterogeneity": 4858,
      "contexts": 2152,
      "frames": 4234,
      "situations": 10491,
      "forward": 4212,
      "boosts": 1243,
      "exhibited": 3661,
      "emergent": 3255,
      "ordinary": 8112,
      "hardly": 4809,
      "extended": 3878,
      "singleturn": 10488,
      "analogy": 614,
      "exploiting": 3832,
      "divide": 3033,
      "times": 11482,
      "accumulated": 226,
      "manipulating": 6811,
      "keyvalue": 5638,
      "matrices": 6868,
      "selfattention": 10214,
      "takes": 11099,
      "concatenating": 1990,
      "applies": 720,
      "learned": 6179,
      "multiplechoice": 7664,
      "assistance": 855,
      "owing": 8197,
      "broad": 1287,
      "choose": 1638,
      "testbed": 11374,
      "collect": 1770,
      "reframe": 9575,
      "instructionfollowing": 5426,
      "llmempowered": 6437,
      "patient": 8326,
      "mental": 6919,
      "receiving": 9447,
      "developing": 2829,
      "collaborate": 1766,
      "closely": 1686,
      "recruit": 9536,
      "patients": 8327,
      "engage": 3368,
      "diagnostic": 2855,
      "collecting": 1773,
      "ratings": 9351,
      "assessment": 844,
      "designs": 2770,
      "treat": 11652,
      "black": 1215,
      "boxes": 1259,
      "accessing": 208,
      "gradients": 4719,
      "extractor": 3933,
      "classifier": 1657,
      "augmented": 920,
      "parameter": 8279,
      "ease": 3111,
      "powered": 8651,
      "enhancement": 3399,
      "connected": 2067,
      "availability": 970,
      "collected": 1772,
      "customize": 2371,
      "active": 335,
      "interfaces": 5509,
      "options": 8104,
      "flexibility": 4165,
      "meet": 6904,
      "accelerate": 195,
      "contemplation": 2129,
      "reliance": 9636,
      "grounded": 4758,
      "roles": 10054,
      "student": 10831,
      "teacher": 11317,
      "accordingly": 220,
      "maximize": 6874,
      "increase": 5223,
      "rise": 10015,
      "bertscore": 1160,
      "applicability": 690,
      "experts": 3816,
      "aligned": 569,
      "treated": 11653,
      "crafting": 2295,
      "elicit": 3215,
      "ask": 820,
      "conditioned": 2016,
      "background": 1006,
      "assistant": 856,
      "96": 117,
      "counterfactual": 2276,
      "strengths": 10795,
      "weaknesses": 12186,
      "areas": 799,
      "underexplored": 11746,
      "factors": 3968,
      "slms": 10503,
      "enhancements": 3401,
      "pivotal": 8506,
      "interestingly": 5506,
      "illustrates": 5056,
      "regulate": 9588,
      "plenty": 8540,
      "storytelling": 10769,
      "jobs": 5607,
      "replaced": 9706,
      "divergent": 3009,
      "opinions": 8079,
      "conclusion": 2008,
      "conducting": 2048,
      "regard": 9576,
      "databases": 2471,
      "professional": 8926,
      "discussions": 2982,
      "shed": 10337,
      "reach": 9361,
      "medicine": 6903,
      "deficiency": 2621,
      "inadequate": 5169,
      "resolve": 9820,
      "continual": 2157,
      "add": 369,
      "extracted": 3924,
      "nlg": 7856,
      "references": 9555,
      "poor": 8567,
      "actually": 345,
      "expressed": 3871,
      "reflect": 9567,
      "hypotheses": 5031,
      "reference": 9553,
      "image": 5058,
      "caption": 1387,
      "782": 98,
      "ratio": 9352,
      "reformulation": 9574,
      "turn": 11705,
      "determine": 2817,
      "avoid": 987,
      "mimicking": 7044,
      "rewritten": 10001,
      "limit": 6337,
      "reformulate": 9571,
      "infusion": 5325,
      "intuition": 5554,
      "adequately": 422,
      "prevailing": 8801,
      "component": 1912,
      "unexplored": 11794,
      "welldesigned": 12200,
      "utterance": 12005,
      "channels": 1514,
      "recursively": 9540,
      "mild": 7035,
      "assumptions": 869,
      "rendering": 9696,
      "compatible": 1867,
      "probabilistic": 8852,
      "utterances": 12006,
      "bounds": 1257,
      "simulated": 10473,
      "alleviating": 591,
      "situation": 10490,
      "confounders": 2061,
      "unresolved": 11845,
      "fair": 3982,
      "evaluators": 3590,
      "adopting": 436,
      "modelsllms": 7563,
      "easily": 3113,
      "altering": 604,
      "appearance": 689,
      "manipulation": 6812,
      "appear": 688,
      "considerably": 2084,
      "80": 102,
      "tested": 11375,
      "evaluator": 3589,
      "assigning": 851,
      "position": 8594,
      "orders": 8111,
      "final": 4065,
      "humanintheloop": 5001,
      "entropy": 3433,
      "seeks": 10196,
      "vicuna": 12127,
      "closer": 1687,
      "judgments": 5618,
      "dont": 3071,
      "excel": 3626,
      "accommodate": 209,
      "referred": 9557,
      "paramount": 8299,
      "methodology": 6973,
      "uncertainty": 11735,
      "answerable": 662,
      "discovering": 2961,
      "intrinsic": 5533,
      "proficiency": 8930,
      "recognizing": 9517,
      "humancomputer": 4997,
      "psychological": 9197,
      "behavioral": 1101,
      "displayed": 2987,
      "employing": 3287,
      "personality": 8476,
      "crosslingual": 2319,
      "effects": 3180,
      "changes": 1512,
      "cues": 2335,
      "maintains": 6781,
      "shedding": 10342,
      "anticipate": 680,
      "serve": 10295,
      "catalyst": 1412,
      "explanation": 3821,
      "discrepancy": 2964,
      "unstructured": 11853,
      "finetunes": 4118,
      "textbased": 11419,
      "prove": 9141,
      "insufficient": 5452,
      "bridging": 1277,
      "synthesizing": 11042,
      "margins": 6831,
      "actual": 344,
      "commonsenseqa": 1805,
      "openbookqa": 8041,
      "functioning": 4313,
      "behave": 1097,
      "responsibly": 9860,
      "helping": 4854,
      "personal": 8475,
      "depth": 2727,
      "completeness": 1890,
      "security": 10191,
      "executors": 3652,
      "empowered": 3295,
      "verifying": 12116,
      "generic": 4625,
      "learnable": 6178,
      "ui": 11725,
      "homepage": 4931,
      "international": 5519,
      "april": 784,
      "china": 1619,
      "diagnosis": 2853,
      "investigations": 5569,
      "humanlevel": 5002,
      "validation": 12016,
      "interpretability": 5522,
      "doctors": 3040,
      "preserving": 8739,
      "integrity": 5466,
      "remote": 9692,
      "mitigating": 7076,
      "concern": 1996,
      "straightforward": 10770,
      "practice": 8674,
      "invoking": 5572,
      "primary": 8829,
      "directed": 2936,
      "thoughts": 11465,
      "concluding": 2007,
      "statement": 10699,
      "activity": 340,
      "batch": 1089,
      "students": 10834,
      "divided": 3035,
      "rendered": 9695,
      "run": 10073,
      "anomaly": 653,
      "logs": 6702,
      "play": 8525,
      "software": 10535,
      "fruitful": 4294,
      "notable": 7905,
      "cloud": 1689,
      "face": 3945,
      "consumption": 2122,
      "adaptability": 350,
      "lightweight": 6319,
      "tda": 11314,
      "realtime": 9385,
      "log": 6696,
      "runs": 10075,
      "2x": 51,
      "faster": 4003,
      "pandalm": 8206,
      "hyperparameter": 5030,
      "involved": 5574,
      "tuned": 11685,
      "judge": 5615,
      "extends": 3880,
      "adherence": 426,
      "ensure": 3416,
      "humanannotated": 4994,
      "gpt4s": 4701,
      "f1score": 3944,
      "evidenced": 3598,
      "alpacas": 601,
      "depend": 2714,
      "avoiding": 994,
      "leakage": 6174,
      "generalized": 4431,
      "3000": 54,
      "embodied": 3234,
      "relied": 9638,
      "attain": 882,
      "creates": 2300,
      "multiagent": 7604,
      "400": 65,
      "50": 74,
      "feasible": 4008,
      "draw": 3087,
      "conclusions": 2009,
      "unfortunately": 11797,
      "comprises": 1956,
      "tabular": 11078,
      "papers": 8270,
      "verifier": 12112,
      "locate": 6693,
      "shared": 10331,
      "flexible": 4166,
      "measuring": 6887,
      "psychology": 9198,
      "f1": 3942,
      "reported": 9719,
      "inefficient": 5266,
      "inaccurate": 5167,
      "adjusts": 433,
      "fewer": 4025,
      "reports": 9720,
      "behaves": 1098,
      "guessing": 4775,
      "finegrained": 4100,
      "instructiontuned": 5442,
      "programming": 8938,
      "tests": 11380,
      "norm": 7900,
      "subtask": 10904,
      "stems": 10743,
      "yielded": 12300,
      "centered": 1440,
      "pattern": 8328,
      "thoroughly": 11458,
      "firmly": 4153,
      "distillation": 2993,
      "necessitate": 7760,
      "intricate": 5530,
      "commence": 1789,
      "elicitation": 3217,
      "multimodality": 7647,
      "dimensions": 2933,
      "conditional": 2015,
      "textonly": 11421,
      "concerning": 1998,
      "unveiling": 11864,
      "return": 9960,
      "trading": 11514,
      "revolves": 9993,
      "investment": 5570,
      "portfolio": 8583,
      "adjustments": 432,
      "implementations": 5089,
      "subsequent": 10888,
      "rigorous": 10012,
      "encompassing": 3336,
      "aiming": 541,
      "efficacy": 3181,
      "news": 7852,
      "distinctive": 3000,
      "languagespecific": 5999,
      "volumes": 12163,
      "summary": 10969,
      "proceed": 8880,
      "running": 10074,
      "realistic": 9374,
      "constructing": 2116,
      "element": 3212,
      "extracting": 3925,
      "expertise": 3815,
      "beings": 1105,
      "squad": 10666,
      "strengthens": 10794,
      "generalizing": 4433,
      "race": 9302,
      "keeping": 5624,
      "beginning": 1094,
      "emotion": 3263,
      "multimodalities": 7646,
      "visual": 12148,
      "clues": 1692,
      "assume": 865,
      "emotions": 3267,
      "explanations": 3822,
      "predictions": 8694,
      "predicted": 8687,
      "plausible": 8524,
      "necessity": 7762,
      "multifaceted": 7608,
      "deal": 2559,
      "longstanding": 6713,
      "ambiguity": 609,
      "chart": 1522,
      "revolutionize": 9981,
      "proprietary": 9129,
      "leveraged": 6282,
      "strides": 10800,
      "finance": 4080,
      "digital": 2930,
      "vector": 12105,
      "solely": 10539,
      "opensourced": 8065,
      "curated": 2343,
      "holistically": 4930,
      "societal": 10528,
      "100k": 7,
      "stereotypes": 10754,
      "14": 21,
      "culture": 2338,
      "curation": 2344,
      "ambiguous": 610,
      "stringent": 10801,
      "control": 2185,
      "exhibiting": 3666,
      "extent": 3912,
      "harmful": 4814,
      "moral": 7588,
      "exceptional": 3632,
      "threestage": 11470,
      "llmdriven": 6436,
      "datadriven": 2473,
      "laws": 6162,
      "universal": 11821,
      "variables": 12030,
      "recovering": 9534,
      "pioneering": 8500,
      "emphasize": 3269,
      "frontier": 4290,
      "opening": 8051,
      "encourage": 3342,
      "misleading": 7062,
      "emphasizing": 3271,
      "pro": 8850,
      "pairwise": 8205,
      "contrasting": 2173,
      "prioritize": 8841,
      "progressively": 8950,
      "transforms": 11628,
      "longer": 6709,
      "sequences": 10286,
      "regarded": 9577,
      "recommendation": 9518,
      "individuals": 5253,
      "specified": 10644,
      "narrowed": 7702,
      "discover": 2959,
      "existence": 3676,
      "thanks": 11441,
      "stable": 10670,
      "decades": 2567,
      "sam": 10085,
      "generalizable": 4423,
      "blank": 1223,
      "scope": 10155,
      "curate": 2342,
      "clear": 1670,
      "clean": 1668,
      "meta": 6930,
      "instantiation": 5389,
      "communities": 1810,
      "15": 23,
      "setups": 10322,
      "safety": 10080,
      "fostering": 4216,
      "uniquely": 11818,
      "separates": 10276,
      "pairs": 8203,
      "total": 11506,
      "questionanswer": 9281,
      "gathered": 4393,
      "contributing": 2182,
      "safe": 10079,
      "page": 8199,
      "url": 11879,
      "blueprint": 1234,
      "assumes": 866,
      "significance": 10399,
      "pursuit": 9220,
      "routes": 10065,
      "coupled": 2280,
      "trial": 11664,
      "barrier": 1017,
      "motivate": 7592,
      "landing": 5753,
      "puzzle": 9223,
      "dissect": 2988,
      "inner": 5342,
      "workings": 12270,
      "parts": 8313,
      "posed": 8587,
      "modest": 7571,
      "retrievalaugmented": 9949,
      "strict": 10798,
      "operators": 8077,
      "sharing": 10336,
      "intents": 5483,
      "assisting": 862,
      "firstly": 4155,
      "unifying": 11813,
      "clarifying": 1645,
      "executable": 3646,
      "encompass": 3333,
      "architectural": 788,
      "innovations": 5343,
      "benchmarking": 1133,
      "regular": 9587,
      "breakthroughs": 1269,
      "bigger": 1195,
      "picture": 8493,
      "imperative": 5084,
      "treatment": 11655,
      "details": 2796,
      "pay": 8333,
      "accounting": 225,
      "broader": 1292,
      "discusses": 2979,
      "intended": 5478,
      "quick": 9299,
      "practitioners": 8676,
      "differentiable": 2917,
      "action": 327,
      "innovative": 5344,
      "verb": 12108,
      "truth": 11681,
      "assignment": 852,
      "matrix": 6869,
      "moderate": 7564,
      "submission": 10884,
      "top1": 11501,
      "discourseaware": 2958,
      "overlook": 8189,
      "totally": 11507,
      "literary": 6379,
      "judgment": 5617,
      "professionals": 8929,
      "teach": 11315,
      "teaches": 11319,
      "fact": 3964,
      "concentrate": 1991,
      "acts": 343,
      "justification": 5620,
      "pose": 8584,
      "negative": 7772,
      "impacts": 5083,
      "mainly": 6772,
      "neglecting": 7779,
      "criteria": 2307,
      "induced": 5257,
      "github": 4629,
      "strategic": 10773,
      "expanding": 3720,
      "exciting": 3644,
      "synergistic": 11032,
      "seeking": 10195,
      "contribute": 2179,
      "central": 1441,
      "services": 10304,
      "credibility": 2306,
      "considerations": 2085,
      "transformative": 11606,
      "mutual": 7691,
      "proposal": 9053,
      "factual": 3972,
      "prowess": 9188,
      "boundaries": 1254,
      "affects": 489,
      "examining": 3615,
      "awareness": 1000,
      "formulating": 4210,
      "games": 4373,
      "presented": 8726,
      "caused": 1431,
      "overcoming": 8182,
      "treats": 11656,
      "game": 4371,
      "voting": 12164,
      "cooperative": 2223,
      "accompanied": 211,
      "actorcritic": 342,
      "editing": 3126,
      "showcased": 10360,
      "discrepancies": 2963,
      "refines": 9565,
      "taking": 11101,
      "performing": 8465,
      "criticizing": 2314,
      "24": 47,
      "induction": 5258,
      "98": 119,
      "ood": 8030,
      "catalyzed": 1413,
      "smallscale": 10519,
      "stark": 10689,
      "embarks": 3228,
      "focusing": 4181,
      "65b": 90,
      "indistribution": 5248,
      "unveil": 11863,
      "detector": 2811,
      "outperforming": 8142,
      "intriguing": 5531,
      "phenomenon": 8488,
      "spaces": 10582,
      "anisotropic": 639,
      "observed": 7989,
      "environments": 3438,
      "restricted": 9864,
      "bilingual": 1196,
      "atomic": 874,
      "restrict": 9863,
      "decent": 2568,
      "advent": 477,
      "past": 8318,
      "couple": 2279,
      "gradually": 4720,
      "statistically": 10740,
      "option": 8103,
      "remarkably": 9688,
      "zerofewshot": 12307,
      "fewshort": 4027,
      "hoping": 4935,
      "kg": 5642,
      "recommending": 9525,
      "friendly": 4289,
      "behaviors": 1104,
      "tail": 11094,
      "mines": 7047,
      "recommendations": 9522,
      "prefer": 8699,
      "tackling": 11089,
      "unable": 11728,
      "weak": 12179,
      "boosting": 1242,
      "builds": 1314,
      "reflecting": 9568,
      "backward": 1010,
      "majority": 6785,
      "calculation": 1324,
      "matters": 6871,
      "sequentially": 10291,
      "concatenated": 1988,
      "locality": 6688,
      "modeled": 7242,
      "forgetting": 4193,
      "shifting": 10347,
      "concurrently": 2012,
      "13b": 19,
      "97": 118,
      "played": 8530,
      "occur": 8004,
      "softmax": 10533,
      "establishes": 3482,
      "avoids": 998,
      "autonomous": 958,
      "utilities": 11981,
      "pass": 8314,
      "reduces": 9546,
      "calls": 1331,
      "mind": 7046,
      "vs": 12165,
      "inconsistency": 5201,
      "researches": 9816,
      "constructs": 2118,
      "twolevel": 11710,
      "conscious": 2077,
      "statements": 10700,
      "biased": 1190,
      "contradicts": 2166,
      "corroborate": 2254,
      "emerge": 3238,
      "strengthen": 10793,
      "theories": 11448,
      "closedsource": 1685,
      "instrumental": 5450,
      "depends": 2719,
      "deeply": 2615,
      "utilization": 11983,
      "arising": 803,
      "strictly": 10799,
      "culminating": 2336,
      "variant": 12032,
      "reliant": 9637,
      "reality": 9378,
      "determined": 2818,
      "player": 8531,
      "steer": 10742,
      "texttoimage": 11427,
      "craft": 2292,
      "narratives": 7700,
      "shape": 10329,
      "elements": 3213,
      "gameplay": 4372,
      "possibilities": 8602,
      "fresh": 4288,
      "labourintensive": 5738,
      "acceleration": 198,
      "chemistry": 1618,
      "material": 6855,
      "eliminates": 3220,
      "injects": 5341,
      "showcases": 10362,
      "overarching": 8176,
      "prosperity": 9131,
      "mllm": 7087,
      "mllms": 7088,
      "closedloop": 1684,
      "bridges": 1276,
      "loop": 6721,
      "weakness": 12185,
      "incremental": 5234,
      "collection": 1774,
      "targeting": 11109,
      "multiround": 7668,
      "participation": 8309,
      "implied": 5097,
      "freeform": 4283,
      "valid": 12010,
      "infusing": 5324,
      "convergence": 2199,
      "instructional": 5424,
      "reflects": 9570,
      "curriculum": 2366,
      "selfinstruction": 10221,
      "ignores": 5051,
      "multistage": 7671,
      "selfinstruct": 10220,
      "introspective": 5553,
      "tuningfree": 11704,
      "claude": 1662,
      "gpt4tools": 4702,
      "facial": 3951,
      "encapsulate": 3319,
      "conveying": 2217,
      "arbitrary": 786,
      "styles": 10875,
      "eliminating": 3223,
      "termed": 11355,
      "yield": 12299,
      "expressive": 3874,
      "controllable": 2189,
      "epa": 3441,
      "accomplishes": 215,
      "desired": 2772,
      "thousand": 11466,
      "windows": 12232,
      "sophisticated": 10563,
      "lacking": 5750,
      "struggles": 10830,
      "compression": 1954,
      "counseling": 2273,
      "decisionsupport": 2579,
      "landscape": 5756,
      "underscores": 11752,
      "profound": 8934,
      "counselors": 2274,
      "interventions": 5528,
      "pressing": 8740,
      "assist": 853,
      "harness": 4817,
      "meaningful": 6879,
      "affirm": 490,
      "compelling": 1868,
      "lays": 6165,
      "organization": 8113,
      "elaborate": 3207,
      "spider": 10656,
      "execution": 3650,
      "bar": 1015,
      "explorations": 3836,
      "disadvantages": 2954,
      "deeper": 2611,
      "refers": 9560,
      "expanded": 3719,
      "possesses": 8601,
      "infer": 5269,
      "subtasks": 10905,
      "dealing": 2560,
      "teams": 11325,
      "decides": 2571,
      "considers": 2088,
      "invokes": 5571,
      "chosen": 1641,
      "bootstrapping": 1247,
      "sparked": 10588,
      "modality": 7095,
      "quantities": 9255,
      "bootstraps": 1248,
      "frozen": 4291,
      "segment": 10198,
      "transcript": 11592,
      "transcripts": 11593,
      "obtaining": 7998,
      "accomplish": 212,
      "equips": 3447,
      "connect": 2066,
      "controllers": 2193,
      "userfriendly": 11920,
      "library": 6306,
      "seamless": 10169,
      "equip": 3444,
      "1000": 6,
      "localized": 6690,
      "sketch": 10495,
      "cut": 2373,
      "shortage": 10352,
      "trees": 11659,
      "transforming": 11627,
      "hierarchy": 4865,
      "divideandconquer": 3034,
      "chose": 1640,
      "depths": 2728,
      "degrees": 2630,
      "secondly": 10187,
      "impractical": 5105,
      "highdimensional": 4878,
      "spirit": 10657,
      "selfdriven": 10218,
      "grounding": 4759,
      "agents": 497,
      "skill": 10496,
      "hinders": 4919,
      "generality": 4421,
      "ground": 4757,
      "hypothesis": 5032,
      "subgoals": 10878,
      "interacting": 5486,
      "verified": 12111,
      "phase": 8485,
      "imitation": 5071,
      "proving": 9186,
      "showing": 10368,
      "chatgptbased": 1606,
      "aipowered": 553,
      "indicated": 5243,
      "proved": 9142,
      "supplementary": 10995,
      "complementing": 1885,
      "operations": 8076,
      "multiplication": 7666,
      "billionparameter": 1201,
      "surpassing": 11016,
      "43": 68,
      "budget": 1298,
      "successes": 10927,
      "consolidate": 2102,
      "knowledgeoriented": 5720,
      "rule": 10070,
      "minimize": 7051,
      "glm130b": 4646,
      "checkpoint": 1616,
      "verbalizer": 12109,
      "space": 10580,
      "cloze": 1691,
      "mask": 6836,
      "labor": 5735,
      "nonlinear": 7898,
      "locally": 6692,
      "neighborhood": 7780,
      "preserves": 8738,
      "32": 58,
      "stimulates": 10757,
      "exists": 3717,
      "existed": 3675,
      "facing": 3963,
      "implemented": 5090,
      "encounter": 3337,
      "managing": 6809,
      "request": 9751,
      "targets": 11110,
      "edge": 3121,
      "decomposed": 2586,
      "manage": 6805,
      "configuration": 2053,
      "processed": 8896,
      "decomposing": 2588,
      "artificially": 818,
      "molecule": 7584,
      "cornerstone": 2229,
      "materials": 6856,
      "drug": 3102,
      "crossmodal": 2320,
      "molecular": 7583,
      "descriptive": 2743,
      "advancing": 471,
      "inconsistencies": 5200,
      "socalled": 10523,
      "appealing": 687,
      "selfevaluation": 10219,
      "operates": 8073,
      "updates": 11868,
      "modify": 7575,
      "attack": 875,
      "defense": 2618,
      "19": 33,
      "postprocessing": 8611,
      "practically": 8673,
      "shortens": 10355,
      "postprocessed": 8610,
      "post": 8608,
      "versatile": 12118,
      "seamlessly": 10171,
      "preceding": 8678,
      "219": 43,
      "68": 93,
      "book": 1236,
      "inhouse": 5332,
      "wordlevel": 12243,
      "biasing": 1192,
      "private": 8849,
      "tutoring": 11707,
      "chaining": 1452,
      "course": 2282,
      "cater": 1421,
      "interconnected": 5502,
      "reflection": 9569,
      "reaction": 9365,
      "storage": 10761,
      "gets": 4627,
      "testify": 11376,
      "connecting": 2068,
      "optimizers": 8100,
      "crafted": 2293,
      "discrete": 2965,
      "fast": 4001,
      "humanreadable": 5014,
      "population": 8581,
      "25": 48,
      "inspire": 5369,
      "combination": 1778,
      "spotting": 10664,
      "names": 7698,
      "texttospeech": 11432,
      "convolutional": 2218,
      "cnn": 1695,
      "match": 6850,
      "englishonly": 3384,
      "codeswitching": 1749,
      "llmgenerated": 6438,
      "satisfactory": 10094,
      "lean": 6175,
      "daily": 2376,
      "paid": 8200,
      "classifying": 1660,
      "multilevel": 7613,
      "correspondingly": 2253,
      "depending": 2718,
      "surrounding": 11023,
      "purposes": 9219,
      "characterized": 1520,
      "catastrophic": 1414,
      "rewards": 9998,
      "prevent": 8803,
      "strategically": 10774,
      "rates": 9348,
      "investigated": 5564,
      "studied": 10835,
      "mixing": 7080,
      "tendency": 11351,
      "distractors": 3003,
      "defined": 2623,
      "threshold": 11471,
      "compose": 1916,
      "educational": 3133,
      "adeptly": 420,
      "navigate": 7754,
      "alongside": 599,
      "15b": 24,
      "functionality": 4312,
      "amazing": 607,
      "intertask": 5526,
      "openchat": 8042,
      "nowadays": 7943,
      "mixed": 7079,
      "equally": 3443,
      "rankingbased": 9331,
      "proportion": 9052,
      "regards": 9582,
      "coarsegrained": 1696,
      "complementary": 1884,
      "solved": 10555,
      "validate": 12011,
      "qualitative": 9232,
      "contract": 2164,
      "saving": 10099,
      "represented": 9741,
      "constrain": 2107,
      "nested": 7782,
      "captures": 1393,
      "llmassisted": 6431,
      "contracts": 2165,
      "promise": 8963,
      "emulate": 3305,
      "solid": 10540,
      "acting": 326,
      "extraordinary": 3936,
      "plans": 8518,
      "flaws": 4164,
      "hindered": 4917,
      "irrelevant": 5584,
      "inaccuracies": 5166,
      "barriers": 1018,
      "encouraging": 3344,
      "scratch": 10161,
      "realm": 9384,
      "discern": 2955,
      "recognise": 9509,
      "dialogues": 2869,
      "taskoriented": 11154,
      "spectrum": 10646,
      "capacities": 1379,
      "zhou": 12327,
      "fund": 4315,
      "generalizability": 4422,
      "superficial": 10970,
      "develops": 2845,
      "advice": 483,
      "healthcare": 4839,
      "sourced": 10577,
      "internet": 5520,
      "cleansing": 1669,
      "origins": 8121,
      "supports": 11004,
      "aids": 529,
      "propelling": 9047,
      "communications": 1809,
      "fms": 4172,
      "physical": 8490,
      "profoundly": 8935,
      "avoidance": 993,
      "sorting": 10564,
      "qualitatively": 9235,
      "preprocessing": 8710,
      "missing": 7065,
      "incomplete": 5198,
      "hinder": 4916,
      "usefulness": 11906,
      "aroused": 806,
      "attempting": 886,
      "layers": 6164,
      "multihead": 7610,
      "qformer": 9230,
      "encoders": 3330,
      "segments": 10204,
      "exceeding": 3624,
      "humangenerated": 5000,
      "selected": 10206,
      "humancrafted": 4998,
      "greater": 4752,
      "rationale": 9354,
      "competence": 1870,
      "exclusively": 3645,
      "injecting": 5339,
      "calibrating": 1327,
      "gptstyle": 4708,
      "ubiquitous": 11724,
      "devices": 2846,
      "societies": 10529,
      "contextaware": 2150,
      "enabled": 3309,
      "autoagents": 929,
      "requests": 9752,
      "sensor": 10260,
      "later": 6155,
      "follows": 4190,
      "foster": 4214,
      "operating": 8074,
      "mobile": 7091,
      "scheduling": 10139,
      "naturally": 7751,
      "wonder": 12236,
      "fairly": 3983,
      "endows": 3360,
      "deepen": 2609,
      "satisfy": 10096,
      "peoples": 8341,
      "lives": 6386,
      "nonetheless": 7893,
      "empowers": 3304,
      "suit": 10949,
      "criterion": 2308,
      "constitute": 2106,
      "team": 11324,
      "vulnerability": 12167,
      "formulation": 4211,
      "quantifiable": 9245,
      "guarantees": 4774,
      "theoretically": 11446,
      "oversight": 8194,
      "misalignment": 7060,
      "adaptable": 352,
      "specify": 10645,
      "versus": 12122,
      "indicating": 5245,
      "believed": 1109,
      "hold": 4926,
      "pilot": 8497,
      "necessarily": 7758,
      "merge": 6927,
      "mimic": 7042,
      "calibrate": 1325,
      "aligns": 583,
      "merges": 6928,
      "markedly": 6833,
      "88": 108,
      "rectifies": 9537,
      "elevating": 3214,
      "standalone": 10680,
      "intensive": 5479,
      "consist": 2089,
      "language understanding": 5984,
      "latest work": 6157,
      "language model": 5779,
      "model training": 7231,
      "training enables": 11551,
      "various machine": 12076,
      "machine reading": 6757,
      "reading comprehension": 9369,
      "natural language": 7708,
      "language inference": 5768,
      "tasks existing": 11201,
      "existing language": 3691,
      "models including": 7358,
      "gpt bert": 4666,
      "word embeddings": 12238,
      "semantic information": 10235,
      "rich semantics": 10008,
      "semantics language": 10254,
      "propose incorporate": 9072,
      "incorporate explicit": 5211,
      "representation model": 9730,
      "compared bert": 1841,
      "obtains new": 8000,
      "new stateoftheart": 7838,
      "substantially improves": 10900,
      "tasks zeroshot": 11305,
      "multilingual language": 7617,
      "language models": 5809,
      "automatically generate": 953,
      "involves multiple": 5578,
      "machine translation": 6758,
      "translation models": 11637,
      "paper inspired": 8233,
      "transformerbased language": 11617,
      "models propose": 7499,
      "propose simple": 9098,
      "parallel data": 8277,
      "data conduct": 2397,
      "semantically similar": 10250,
      "radford et": 9304,
      "et al": 3489,
      "al 2018": 559,
      "model largescale": 7174,
      "addition introduce": 374,
      "robustness model": 10048,
      "model experimental": 7142,
      "experimental results": 3739,
      "results model": 9915,
      "model surpasses": 7224,
      "pretraining model": 8790,
      "story generation": 10767,
      "generation generating": 4533,
      "important challenging": 5100,
      "challenging task": 1505,
      "neural language": 7799,
      "language generation": 5764,
      "generation models": 4553,
      "models gpt2": 7346,
      "coherence generated": 1758,
      "generated stories": 4488,
      "commonsense knowledge": 1801,
      "knowledge understanding": 5708,
      "causal relationships": 1427,
      "paper devise": 8219,
      "generation propose": 4567,
      "knowledge external": 5670,
      "external knowledge": 3915,
      "knowledge bases": 5654,
      "generate reasonable": 4465,
      "multitask learning": 7678,
      "learning combines": 6200,
      "automatic manual": 941,
      "evaluation shows": 3579,
      "model generate": 7153,
      "stateoftheart baselines": 10705,
      "sequence tokens": 10284,
      "text generation": 11395,
      "generation specifically": 4579,
      "pretrained language": 8746,
      "model gpt2": 7157,
      "learning generate": 6211,
      "finetuning stage": 4145,
      "lead higher": 6167,
      "bleu score": 1225,
      "conduct experiments": 2028,
      "pretraining experimental": 8778,
      "question answering": 9267,
      "prior work": 8840,
      "large text": 6119,
      "text corpus": 11387,
      "retrieving knowledge": 9959,
      "knowledge large": 5684,
      "large corpus": 6005,
      "semantic representation": 10239,
      "specifically method": 10636,
      "method based": 6939,
      "method large": 6956,
      "large language": 6008,
      "evaluate method": 3510,
      "question types": 9278,
      "information provided": 5311,
      "significantly improves": 10435,
      "improves accuracy": 5148,
      "multihop reasoning": 7612,
      "long text": 6707,
      "generation long": 4549,
      "generation important": 4536,
      "generative models": 4605,
      "models suffer": 7539,
      "address problem": 409,
      "problem propose": 8868,
      "reasoning generation": 9421,
      "generation mrg": 4554,
      "approach incorporates": 746,
      "knowledge graph": 5673,
      "process human": 8885,
      "human writing": 4992,
      "unlike previous": 11833,
      "proposed model": 9122,
      "experiments representative": 3797,
      "representative tasks": 9740,
      "tasks including": 11221,
      "description generation": 2740,
      "generation automatic": 4516,
      "proposed method": 9119,
      "method generate": 6952,
      "strong baselines": 10807,
      "models design": 7300,
      "language learning": 5778,
      "transfer learning": 11596,
      "learning paper": 6231,
      "finetuning dataset": 4123,
      "learning including": 6218,
      "speech recognition": 10650,
      "semantic level": 10237,
      "specific domain": 10609,
      "level language": 6270,
      "artificial intelligence": 816,
      "neural network": 7801,
      "learning agent": 6186,
      "gpt2 model": 4675,
      "task demands": 11124,
      "previous works": 8821,
      "works shown": 12277,
      "largescale language": 6134,
      "model achieved": 7102,
      "good performance": 4659,
      "generation observe": 4557,
      "usually contain": 11979,
      "propose twostage": 9107,
      "generation framework": 4532,
      "second stage": 10185,
      "supervision signals": 10994,
      "errors improve": 3468,
      "dataset model": 2502,
      "model outperforms": 7190,
      "outperforms baseline": 8146,
      "baseline approaches": 1064,
      "terms automatic": 11358,
      "automatic metrics": 943,
      "metrics human": 7026,
      "human evaluation": 4962,
      "new application": 7806,
      "training neural": 11573,
      "training data": 11545,
      "propose novel": 9084,
      "data available": 2390,
      "based gpt2": 1039,
      "data samples": 2455,
      "order make": 8110,
      "weakly supervised": 12183,
      "training paradigm": 11576,
      "able outperform": 177,
      "fully supervised": 4307,
      "annotated data": 643,
      "data model": 2434,
      "model boost": 7117,
      "boost performance": 1239,
      "performance standard": 8431,
      "seq2seq model": 10278,
      "model bleu": 7116,
      "design optimization": 2753,
      "emergence large": 3251,
      "models llms": 7388,
      "openais chatgpt": 8039,
      "googles bard": 4664,
      "paper explores": 8229,
      "applying llms": 727,
      "specifically utilize": 10641,
      "gpt35 gpt4": 4685,
      "deep reinforcement": 2606,
      "reinforcement learning": 9592,
      "learning code": 6199,
      "perform better": 8350,
      "converse gpt": 2209,
      "paper shows": 8267,
      "llms chatgpt": 6474,
      "technical level": 11327,
      "propose new": 9081,
      "practical implications": 8667,
      "achieve significant": 260,
      "math word": 6860,
      "word problems": 12242,
      "word problem": 12241,
      "critical task": 2313,
      "task natural": 11134,
      "language processing": 5960,
      "recent studies": 9476,
      "generation task": 4581,
      "problem descriptions": 8861,
      "address limitation": 402,
      "framework based": 4238,
      "based generative": 1034,
      "generative pretrained": 4606,
      "training generation": 11557,
      "ranking model": 9330,
      "model learns": 7176,
      "correct incorrect": 2237,
      "specially designed": 10605,
      "demonstrate effectiveness": 2651,
      "effectiveness proposed": 3174,
      "method benchmark": 6940,
      "benchmark results": 1129,
      "results method": 9911,
      "method consistently": 6944,
      "consistently outperforms": 2097,
      "outperforms baselines": 8148,
      "pretrained models": 8763,
      "models gpt": 7345,
      "modeling language": 7246,
      "language structure": 5976,
      "texts models": 11426,
      "models consider": 7283,
      "numerical reasoning": 7960,
      "reasoning tasks": 9439,
      "paper propose": 8251,
      "pretrained model": 8762,
      "model explicitly": 7145,
      "specifically leverages": 10635,
      "loss function": 6724,
      "pretraining objective": 8791,
      "conduct extensive": 2029,
      "extensive experiments": 3893,
      "experiments different": 3778,
      "different datasets": 2879,
      "datasets evaluate": 2528,
      "experiment results": 3734,
      "baseline models": 1069,
      "ablation studies": 169,
      "studies conducted": 10837,
      "conducted evaluate": 2041,
      "evaluate impact": 3508,
      "table question": 11074,
      "models achieved": 7254,
      "performance using": 8442,
      "using pretrained": 11963,
      "pretraining corpus": 8773,
      "pretraining large": 8787,
      "opendomain text": 8047,
      "performance models": 8411,
      "response propose": 9846,
      "t5 gpt2": 11071,
      "gpt2 based": 4673,
      "based natural": 1049,
      "question generation": 9276,
      "logical form": 6700,
      "better suited": 1182,
      "practical deployment": 8665,
      "ai technology": 524,
      "autoregressive language": 964,
      "able generate": 175,
      "generate humanlike": 4450,
      "humanlike text": 5010,
      "case studies": 1405,
      "aigenerated content": 533,
      "models end": 7314,
      "end paper": 3350,
      "paper focuses": 8231,
      "sentence embeddings": 10262,
      "increasing scale": 5229,
      "hundreds billions": 5026,
      "billions parameters": 1204,
      "sets stateoftheart": 10314,
      "stateoftheart results": 10726,
      "results various": 9934,
      "various language": 12072,
      "language tasks": 5978,
      "finetuning large": 4128,
      "large foundation": 6006,
      "foundation models": 4223,
      "related fields": 9600,
      "separate models": 10274,
      "end propose": 3352,
      "billion parameters": 1200,
      "best sentence": 1170,
      "175 billion": 29,
      "code models": 1722,
      "pretraining transformer": 8799,
      "transformer decoder": 11610,
      "data paper": 2441,
      "automatic speech": 948,
      "learning framework": 6210,
      "pretraining tasks": 8798,
      "masked language": 6838,
      "language modeling": 5807,
      "instead generating": 5391,
      "generating textual": 4510,
      "speech information": 10649,
      "generate correct": 4442,
      "comprehensive experiments": 1940,
      "word error": 12239,
      "error rate": 3464,
      "release code": 9618,
      "code model": 1720,
      "completion task": 1894,
      "standard benchmark": 10682,
      "knowledge base": 5652,
      "free text": 4281,
      "better prompts": 1181,
      "prompts text": 9041,
      "linked knowledge": 6374,
      "fewshot learning": 4033,
      "models incontext": 7360,
      "incontext learning": 5204,
      "work propose": 12260,
      "zero fewshot": 12306,
      "fewshot classification": 4030,
      "greedy decoding": 4756,
      "method adopts": 6937,
      "diverse set": 3027,
      "tasks extensive": 11209,
      "extensive analysis": 3885,
      "different scales": 2904,
      "selfsupervised learning": 10224,
      "extensively explored": 3911,
      "explored recent": 3853,
      "recent years": 9485,
      "success natural": 10920,
      "wide adoption": 12206,
      "bert gpt": 1153,
      "heavily relies": 4842,
      "data augmentation": 2386,
      "dominant approach": 3069,
      "paper identify": 8232,
      "impact development": 5078,
      "generative selfsupervised": 4621,
      "masking strategy": 6842,
      "public datasets": 9202,
      "datasets different": 2526,
      "learning tasks": 6246,
      "tasks results": 11276,
      "study provides": 10865,
      "demonstrates potential": 2696,
      "model data": 7129,
      "nlp tasks": 7872,
      "tasks training": 11294,
      "training set": 11581,
      "limited existing": 6352,
      "existing solutions": 3708,
      "heuristic rules": 4861,
      "synonym replacement": 11034,
      "gpt2 using": 4677,
      "limited training": 6359,
      "produce new": 8917,
      "synthetic data": 11044,
      "knowledge limited": 5692,
      "issue propose": 5592,
      "propose knowledge": 9076,
      "model pretrained": 7201,
      "novel framework": 7921,
      "framework knowledge": 4263,
      "knowledge single": 5701,
      "target task": 11107,
      "tasks unified": 11295,
      "unified texttotext": 11805,
      "texttotext format": 11435,
      "training objectives": 11574,
      "objectives different": 7980,
      "best knowledge": 1164,
      "multitask training": 7680,
      "data produced": 2444,
      "improves performance": 5150,
      "performance strong": 8434,
      "models bert": 7266,
      "large margin": 6106,
      "nlp benchmark": 7861,
      "parameters experiments": 8294,
      "performance improvements": 8399,
      "models used": 7555,
      "models multiple": 7469,
      "multiple tasks": 7663,
      "tasks large": 11235,
      "achieved impressive": 266,
      "impressive zeroshot": 5117,
      "zeroshot ability": 12310,
      "model size": 7218,
      "high cost": 4870,
      "smaller language": 10513,
      "model external": 7149,
      "demonstrated promising": 2684,
      "modeling capabilities": 7244,
      "capabilities remains": 1357,
      "remains unclear": 9663,
      "models perform": 7487,
      "perform competitively": 8351,
      "generalization downstream": 4428,
      "downstream tasks": 3078,
      "tasks work": 11301,
      "work introduce": 12254,
      "model best": 7112,
      "model demonstrate": 7131,
      "strong zeroshot": 10812,
      "zeroshot performance": 12319,
      "performance wide": 8455,
      "wide range": 12209,
      "unseen tasks": 11849,
      "shows significant": 10393,
      "significant improvement": 10412,
      "fusion module": 4344,
      "notably proposed": 7909,
      "evaluation tasks": 3580,
      "spoken language": 10662,
      "texts challenging": 11424,
      "alleviate data": 585,
      "data scarcity": 2456,
      "scarcity problem": 10118,
      "lack largescale": 5745,
      "largescale highquality": 6132,
      "language text": 5981,
      "text data": 11388,
      "overcome limitation": 8178,
      "limitation propose": 6339,
      "largescale indomain": 6133,
      "data specifically": 2462,
      "texts generated": 11425,
      "significantly outperforms": 10444,
      "outperforms compared": 8150,
      "achieve improvements": 253,
      "demonstrating effectiveness": 2703,
      "effectiveness approach": 3168,
      "approach release": 756,
      "code data": 1705,
      "data facilitating": 2410,
      "facilitating future": 3960,
      "future research": 4350,
      "research field": 9791,
      "inference paper": 5273,
      "solution problem": 10543,
      "integrating text": 5463,
      "graph structure": 4735,
      "information large": 5302,
      "graph neural": 4732,
      "neural networks": 7802,
      "networks gnns": 7795,
      "high computational": 4868,
      "computational complexity": 1970,
      "training large": 11561,
      "propose efficient": 9064,
      "efficient effective": 3194,
      "learning large": 6221,
      "framework called": 4242,
      "allows training": 597,
      "training modules": 11571,
      "experiments multiple": 3789,
      "multiple data": 7652,
      "efficiency effectiveness": 3185,
      "proposed approach": 9112,
      "stateoftheart performance": 10721,
      "performance numerous": 8419,
      "numerous natural": 7962,
      "data modalities": 2433,
      "unique characteristics": 11817,
      "tasks like": 11242,
      "decision making": 2573,
      "unique challenges": 11816,
      "challenges applying": 1475,
      "privacy concerns": 8844,
      "processing nlp": 8905,
      "nlp demonstrate": 7864,
      "success large": 10914,
      "models llm": 7387,
      "model learn": 7175,
      "representations paper": 9734,
      "model named": 7186,
      "extensive experimental": 3890,
      "results public": 9927,
      "superior performance": 10975,
      "performance stateoftheart": 8432,
      "stateoftheart approaches": 10703,
      "scaling law": 10115,
      "performance gains": 8388,
      "demonstrates importance": 2695,
      "sheds light": 10345,
      "promising research": 8973,
      "research direction": 9784,
      "language prompts": 5972,
      "crosstask generalization": 2323,
      "models limited": 7385,
      "limited labeled": 6354,
      "highly sensitive": 4906,
      "challenging given": 1497,
      "address issue": 394,
      "labeled data": 5730,
      "gradient update": 4716,
      "unlabeled data": 11826,
      "fewshot setting": 4039,
      "prompt tuning": 9000,
      "model tuning": 7232,
      "chatgpt good": 1568,
      "report provides": 9718,
      "preliminary evaluation": 8706,
      "evaluation chatgpt": 3546,
      "including translation": 5197,
      "multilingual translation": 7621,
      "translation ability": 11635,
      "candidate prompts": 1335,
      "minor performance": 7057,
      "performance differences": 8378,
      "evaluating number": 3531,
      "number benchmark": 7949,
      "benchmark test": 1131,
      "test sets": 11372,
      "chatgpt performs": 1587,
      "performs competitively": 8469,
      "google translate": 4662,
      "target language": 11105,
      "improves translation": 5153,
      "translation performance": 11639,
      "performance significantly": 8428,
      "chatgpt does": 1552,
      "good results": 4660,
      "performance chatgpt": 8369,
      "chatgpt significantly": 1598,
      "multilingual multimodal": 7619,
      "paper proposes": 8259,
      "evaluating interactive": 3525,
      "interactive llms": 5497,
      "chatgpt using": 1601,
      "using publicly": 11965,
      "publicly available": 9210,
      "covering different": 2287,
      "tasks evaluate": 11196,
      "aspects chatgpt": 830,
      "based data": 1028,
      "newly designed": 7849,
      "multimodal dataset": 7627,
      "dataset chatgpt": 2482,
      "chatgpt outperforms": 1585,
      "llms zeroshot": 6682,
      "zeroshot learning": 12316,
      "finetuned models": 4116,
      "models tasks": 7542,
      "nonlatin script": 7896,
      "script languages": 10164,
      "generate multimodal": 4456,
      "multimodal content": 7625,
      "textual prompts": 11439,
      "intermediate code": 5512,
      "code generation": 1714,
      "10 different": 2,
      "different reasoning": 2903,
      "logical reasoning": 6701,
      "commonsense reasoning": 1803,
      "access external": 202,
      "llm improve": 6415,
      "improve performance": 5132,
      "prompt engineering": 8990,
      "evaluation set": 3578,
      "new chinese": 7813,
      "pretraining language": 8786,
      "model based": 7111,
      "based t5": 1059,
      "t5 model": 11072,
      "different sources": 2907,
      "general domain": 4403,
      "comprehensive benchmarks": 1929,
      "benchmarks like": 1140,
      "glue superglue": 4650,
      "significant advancements": 10401,
      "model pretraining": 7205,
      "drawing inspiration": 3092,
      "understanding generation": 11772,
      "evaluation benchmark": 3543,
      "datasets covering": 2522,
      "generation tasks": 4582,
      "aim facilitate": 539,
      "facilitate research": 3956,
      "research development": 9783,
      "benchmark released": 1128,
      "largescale pretrained": 6140,
      "chatgpt understand": 1600,
      "comparative study": 1832,
      "study chatgpt": 10850,
      "chatgpt finetuned": 1562,
      "finetuned bert": 4110,
      "recently chatgpt": 9491,
      "chatgpt attracted": 1538,
      "great attention": 4745,
      "generate fluent": 4447,
      "highquality responses": 4912,
      "responses human": 9855,
      "prior studies": 8837,
      "studies shown": 10845,
      "shown chatgpt": 10373,
      "generation ability": 4513,
      "compared existing": 1845,
      "existing models": 3703,
      "quantitative analysis": 9249,
      "understanding ability": 11762,
      "little attention": 6383,
      "ability chatgpt": 134,
      "chatgpt evaluating": 1555,
      "glue benchmark": 4649,
      "bertstyle models": 1162,
      "models chatgpt": 7276,
      "falls short": 3992,
      "short handling": 10350,
      "tasks chatgpt": 11171,
      "outperforms bert": 8149,
      "bert models": 1156,
      "models inference": 7364,
      "chatgpt achieves": 1536,
      "achieves comparable": 283,
      "comparable performance": 1823,
      "performance compared": 8374,
      "sentiment analysis": 10269,
      "questionanswering tasks": 9286,
      "tasks additionally": 11163,
      "combining advanced": 1784,
      "prompting strategies": 9024,
      "existing large": 3692,
      "llms generating": 6549,
      "training llm": 11565,
      "computation requirements": 1967,
      "methods rely": 7007,
      "learning rl": 6240,
      "approach called": 732,
      "significantly smaller": 10450,
      "method does": 6946,
      "does require": 3049,
      "internal representations": 5518,
      "llm token": 6427,
      "probability distribution": 8855,
      "applied various": 719,
      "various llms": 12074,
      "llms including": 6562,
      "approach significantly": 762,
      "compared base": 1838,
      "comprehensive study": 1947,
      "study language": 10858,
      "understanding tasks": 11785,
      "models demonstrated": 7297,
      "demonstrated impressive": 2679,
      "impressive performance": 5111,
      "performance various": 8446,
      "various natural": 12079,
      "showcasing strong": 10366,
      "understanding reasoning": 11781,
      "reasoning capabilities": 9409,
      "handle various": 4803,
      "explored especially": 3851,
      "comprehensive experimental": 1937,
      "test samples": 11370,
      "understanding nlu": 11778,
      "nlu tasks": 7882,
      "findings indicate": 4090,
      "outperforms existing": 8152,
      "average performance": 984,
      "inference sentiment": 5274,
      "analysis tasks": 628,
      "challenges including": 1483,
      "addressing challenges": 418,
      "overall performance": 8173,
      "performance generalization": 8393,
      "generalization abilities": 4425,
      "human feedback": 4969,
      "paper focus": 8230,
      "realworld applications": 9387,
      "applications particularly": 711,
      "function assessed": 4310,
      "assessed human": 839,
      "learning human": 6214,
      "feedback rlhf": 4023,
      "recent works": 9484,
      "improve quality": 5135,
      "llms human": 6559,
      "guidance propose": 4778,
      "policy search": 8562,
      "search problem": 10179,
      "problem reinforcement": 8869,
      "promising alternative": 8965,
      "furthermore demonstrate": 4328,
      "improving quality": 5163,
      "images generated": 5066,
      "generative model": 4603,
      "ranking feedback": 9329,
      "feedback experiments": 4021,
      "significantly enhance": 10429,
      "generated images": 4478,
      "overall work": 8175,
      "effective approach": 3137,
      "human machine": 4980,
      "code released": 1731,
      "paper present": 8244,
      "leverages large": 6284,
      "prompting methods": 9019,
      "methods generate": 6990,
      "generate multiple": 4458,
      "datasets including": 2534,
      "approach achieves": 729,
      "achieves significant": 298,
      "significant improvements": 10413,
      "existing baselines": 3679,
      "significantly outperform": 10442,
      "outperform stateoftheart": 8137,
      "potential large": 8628,
      "models conversational": 7289,
      "including natural": 5189,
      "processing computer": 8900,
      "computer vision": 1981,
      "learning models": 6230,
      "models significant": 7524,
      "significant impact": 10411,
      "impact field": 5080,
      "integration llms": 5465,
      "problems including": 8873,
      "llms field": 6537,
      "various applications": 12051,
      "applications llms": 707,
      "challenges arise": 1476,
      "data resources": 2454,
      "finally discuss": 4073,
      "promising directions": 8968,
      "current state": 2361,
      "highlight potential": 4893,
      "potential benefits": 8617,
      "generative ai": 4593,
      "chatgpt gpt4": 1569,
      "text images": 11400,
      "worth noting": 12283,
      "model gpt4": 7158,
      "help chatgpt": 4847,
      "content creation": 2133,
      "answering question": 673,
      "comprehensive review": 1946,
      "review existing": 9974,
      "techniques applications": 11334,
      "model architecture": 7108,
      "pretraining generative": 8784,
      "generative modeling": 4604,
      "methods like": 6998,
      "diffusion models": 2928,
      "models introducing": 7367,
      "tasks based": 11168,
      "including text": 5195,
      "3d content": 63,
      "discuss challenges": 2970,
      "chatgpt deep": 1547,
      "generate texts": 4470,
      "given topics": 4644,
      "chatgpt chinese": 1542,
      "results revealed": 9930,
      "performed better": 8463,
      "chatgpt human": 1573,
      "fewshot prompting": 4037,
      "prompting large": 9012,
      "models large": 7372,
      "ability perform": 160,
      "models directly": 7305,
      "prior research": 8836,
      "appropriate prompt": 779,
      "improving performance": 5162,
      "specifically introduce": 10633,
      "introduce metric": 5543,
      "lead unsatisfactory": 6168,
      "quality based": 9237,
      "based observation": 1052,
      "observation propose": 7983,
      "strategy based": 10784,
      "mainstream models": 6777,
      "models gpt3": 7347,
      "various downstream": 12061,
      "results indicate": 9909,
      "enhance models": 3391,
      "learning performance": 6233,
      "evaluating chatgpt": 3523,
      "grammatical error": 4724,
      "error correction": 3461,
      "chatgpt cuttingedge": 1546,
      "lot attention": 6727,
      "strong ability": 10805,
      "report aim": 9713,
      "evaluate chatgpt": 3504,
      "stateoftheart models": 10719,
      "benchmark dataset": 1114,
      "baselines terms": 1078,
      "automatic evaluation": 936,
      "evaluation metrics": 3568,
      "grammatical correctness": 4723,
      "evaluation quantitatively": 3575,
      "results demonstrate": 9891,
      "demonstrate chatgpt": 2648,
      "intelligence ai": 5470,
      "chatgpt large": 1576,
      "model trained": 7230,
      "support dynamic": 11000,
      "ethical issues": 3494,
      "effectively create": 3151,
      "technology based": 11339,
      "information content": 5290,
      "chat generative": 1524,
      "pretrained transformer": 8766,
      "massive data": 6846,
      "years researchers": 12296,
      "basic concepts": 1087,
      "information knowledge": 5301,
      "semantic communication": 10229,
      "furthermore propose": 4336,
      "verify proposed": 12115,
      "instruction data": 5399,
      "models empirical": 7308,
      "empirical study": 3281,
      "realworld use": 9395,
      "success chatgpt": 10910,
      "chatgpt recently": 1592,
      "achieving remarkable": 316,
      "remarkable results": 9685,
      "significantly enhances": 10430,
      "models performance": 7489,
      "generated results": 4486,
      "consistent human": 2093,
      "current research": 2359,
      "impact different": 5079,
      "model performance": 7197,
      "performance especially": 8384,
      "paper explore": 8226,
      "explore performance": 3842,
      "performance large": 8403,
      "models based": 7265,
      "based instruction": 1040,
      "instruction tuning": 5414,
      "tuning different": 11691,
      "data evaluation": 2407,
      "evaluation dataset": 3550,
      "dataset consisting": 2488,
      "base model": 1023,
      "model results": 7209,
      "tasks openended": 11250,
      "openended generation": 8050,
      "data size": 2460,
      "potential future": 8623,
      "research directions": 9787,
      "base models": 1024,
      "models training": 7547,
      "training methods": 11568,
      "tasks release": 11270,
      "evaluation datasets": 3551,
      "model checkpoints": 7123,
      "case study": 1406,
      "tools fail": 11499,
      "paper investigates": 8239,
      "aims enhance": 544,
      "novel twostep": 7940,
      "prompt strategy": 8997,
      "chatgpt currently": 1545,
      "widely used": 12220,
      "zeroshot scenarios": 12325,
      "scenarios demonstrated": 10125,
      "improve average": 5120,
      "problem large": 8862,
      "significant progress": 10417,
      "llms remains": 6631,
      "commonsense questions": 1802,
      "effectively leverage": 3158,
      "answering questions": 674,
      "conduct series": 2035,
      "series experiments": 10293,
      "experiments evaluate": 3780,
      "evaluate chatgpts": 3505,
      "results gpts": 9906,
      "tasks struggle": 11282,
      "knowledge using": 5709,
      "answer question": 659,
      "knowledge llms": 5693,
      "llms instruction": 6568,
      "instruction following": 5404,
      "concepts paper": 1994,
      "llms set": 6645,
      "usually encode": 11980,
      "llm parameters": 6421,
      "open problem": 8033,
      "problem paper": 8866,
      "dialogue tasks": 2866,
      "small number": 10511,
      "exhibit high": 3657,
      "errors chatgpt": 3467,
      "chatgpt highly": 1572,
      "comprehensive evaluation": 1933,
      "shown remarkable": 10386,
      "remarkable potential": 9682,
      "potential various": 8639,
      "exploring potential": 3861,
      "correction gec": 2240,
      "zeroshot chainofthought": 12312,
      "chainofthought cot": 1459,
      "using incontext": 11948,
      "evaluation involves": 3559,
      "chatgpts performance": 1612,
      "official test": 8018,
      "different languages": 2886,
      "results human": 9908,
      "human evaluations": 4966,
      "evaluations demonstrate": 3585,
      "chatgpt excellent": 1557,
      "correct errors": 2236,
      "performance nonenglish": 8418,
      "lowresource settings": 6748,
      "highlights potential": 4898,
      "gec tasks": 4398,
      "tasks analysis": 11164,
      "various types": 12098,
      "chatgpt effectively": 1553,
      "parameterefficient finetuning": 8285,
      "models success": 7535,
      "llms like": 6580,
      "like gpt3": 6330,
      "gpt3 chatgpt": 4680,
      "taskspecific data": 11308,
      "various finetuning": 12067,
      "finetuning methods": 4134,
      "finetuning peft": 4139,
      "requires finetuning": 9766,
      "llms achieving": 6449,
      "achieving comparable": 312,
      "comparable better": 1817,
      "better performance": 1180,
      "peft methods": 8337,
      "llms paper": 6604,
      "paper presents": 8248,
      "framework integrates": 4262,
      "integrates various": 5459,
      "llms different": 6505,
      "different tasks": 2909,
      "framework includes": 4261,
      "llms llama": 6589,
      "framework designed": 4245,
      "evaluation new": 3570,
      "furthermore evaluate": 4330,
      "evaluate effectiveness": 3506,
      "math reasoning": 6858,
      "reasoning datasets": 9419,
      "datasets results": 2547,
      "trainable parameters": 11532,
      "powerful llms": 8660,
      "provide promising": 9164,
      "framework finetuning": 4252,
      "llms downstream": 6509,
      "practical applications": 8664,
      "systems large": 11060,
      "models emerged": 7307,
      "step step": 10748,
      "solving math": 10559,
      "problems requires": 8875,
      "focus evaluating": 4175,
      "ability large": 149,
      "models work": 7560,
      "including gpt4": 5180,
      "gpt4 chatgpt": 4693,
      "llama various": 6393,
      "provide detailed": 9153,
      "detailed analysis": 2794,
      "power large": 8645,
      "cell type": 1436,
      "type annotation": 11716,
      "rna sequencing": 10030,
      "task requires": 11144,
      "chatgpt new": 1583,
      "researchers conduct": 9811,
      "efficiently accurately": 3201,
      "new insights": 7822,
      "using chatgpt": 11939,
      "potentially lead": 8642,
      "reasoning ability": 9403,
      "ability comprehensive": 136,
      "transformer gpt4": 11612,
      "advanced reasoning": 451,
      "tasks report": 11272,
      "popular benchmarks": 8571,
      "comparison chatgpt": 1863,
      "results chatgpt": 9884,
      "significantly better": 10424,
      "reasoning benchmarks": 9408,
      "results gpt4": 9904,
      "higher performance": 4883,
      "datasets benchmarks": 2518,
      "performance drops": 8383,
      "newly released": 7851,
      "reasoning remains": 9436,
      "remains challenging": 9651,
      "benchmark suite": 1130,
      "align language": 567,
      "models human": 7352,
      "human preferences": 4984,
      "significantly enhancing": 10431,
      "interactions humans": 5494,
      "humans models": 5020,
      "supervised finetuning": 10986,
      "finetuning sft": 4143,
      "reward model": 9997,
      "proximal policy": 9190,
      "policy optimization": 8560,
      "optimization ppo": 8095,
      "novel learning": 7924,
      "learning paradigm": 6232,
      "responses generated": 9853,
      "generated different": 4477,
      "align human": 565,
      "model output": 7191,
      "robust finetuning": 10043,
      "alignment process": 579,
      "performance comparable": 8372,
      "recently large": 9499,
      "like chatgpt": 6322,
      "chatgpt demonstrated": 1548,
      "demonstrated remarkable": 2685,
      "remarkable performance": 9676,
      "performance variety": 8445,
      "variety natural": 12043,
      "processing tasks": 8912,
      "tasks effectiveness": 11193,
      "domain specifically": 3055,
      "remains explored": 9655,
      "explored paper": 3852,
      "paper conduct": 8214,
      "capabilities multimodal": 1349,
      "indicate chatgpt": 5241,
      "stateoftheart methods": 10717,
      "traditional methods": 11520,
      "despite potential": 2784,
      "potential chainofthought": 8618,
      "chainofthought prompting": 1463,
      "need specialized": 7769,
      "training finetuning": 11555,
      "provides insights": 9176,
      "foundation future": 4218,
      "future work": 4358,
      "social media": 10526,
      "recently released": 9505,
      "artificial general": 813,
      "general intelligence": 4406,
      "intelligence agi": 5469,
      "november 2022": 7942,
      "chatgpt quickly": 1591,
      "various aspects": 12053,
      "500 articles": 76,
      "urgently needed": 11878,
      "applications challenges": 703,
      "challenges present": 1488,
      "foundation model": 4222,
      "model alignment": 7106,
      "essential step": 3478,
      "models finetuned": 7332,
      "rl algorithms": 10023,
      "end introduce": 3349,
      "introduce new": 5545,
      "new framework": 7820,
      "effectively utilizing": 3166,
      "approach selects": 757,
      "model finetuning": 7151,
      "effectively improve": 3155,
      "improve model": 5130,
      "performance reward": 8427,
      "metrics large": 7029,
      "models diffusion": 7303,
      "gpt models": 4669,
      "ai generated": 516,
      "generated content": 4475,
      "content aigc": 2131,
      "presents considerable": 8731,
      "detect text": 2798,
      "text generated": 11394,
      "generated large": 4479,
      "growing need": 4769,
      "address challenges": 389,
      "machinegenerated texts": 6765,
      "linguistic analyses": 6369,
      "sentences complex": 10267,
      "syntactic structures": 11036,
      "results suggest": 9931,
      "finetuned training": 4117,
      "comprehensive analysis": 1926,
      "generative large": 4597,
      "models publicly": 7504,
      "text summarization": 11416,
      "pretrained large": 8754,
      "models exponential": 7326,
      "exponential growth": 3864,
      "electronic health": 3210,
      "health records": 4837,
      "poses significant": 8591,
      "significant challenge": 10405,
      "clinical information": 1675,
      "tackle challenge": 11081,
      "support clinical": 10999,
      "information retrieval": 5313,
      "aims generating": 548,
      "generating concise": 4497,
      "concise summaries": 2005,
      "key information": 5633,
      "rapid advancement": 9333,
      "nlp techniques": 7878,
      "models plms": 7491,
      "methods datasets": 6981,
      "datasets evaluation": 2529,
      "need comprehensive": 7765,
      "present systematic": 8725,
      "systematic review": 11049,
      "recent advancements": 9453,
      "llms help": 6557,
      "challenges future": 1481,
      "future directions": 4348,
      "available datasets": 973,
      "discuss existing": 2971,
      "existing challenges": 3681,
      "promising future": 8969,
      "era llms": 3456,
      "research community": 9780,
      "study presents": 10861,
      "presents comprehensive": 8730,
      "rapidly evolving": 9344,
      "field artificial": 4046,
      "processing capabilities": 8899,
      "pretrained transformers": 8769,
      "transformers gpt": 11626,
      "effectiveness method": 3173,
      "research area": 9776,
      "llms demonstrated": 6495,
      "remarkable ability": 9667,
      "tasks paper": 11252,
      "generative llms": 4602,
      "retrieval ir": 9946,
      "experiments reveal": 3799,
      "superior results": 10980,
      "supervised methods": 10990,
      "lowresource languages": 6746,
      "capabilities chatgpt": 1338,
      "model small": 7220,
      "chatgpt generated": 1567,
      "generated data": 4476,
      "code reproduce": 1732,
      "reproduce results": 9746,
      "results available": 9880,
      "report presents": 9717,
      "dialogue understanding": 2868,
      "supervised models": 10991,
      "promising results": 8975,
      "results generating": 9903,
      "responses furthermore": 9852,
      "potential avenues": 8616,
      "avenues future": 979,
      "languages paper": 5998,
      "chatgpt language": 1575,
      "achieving competitive": 313,
      "competitive performance": 1877,
      "english chinese": 3380,
      "limited resources": 6357,
      "believe work": 1108,
      "people use": 8340,
      "use chatgpt": 11885,
      "data code": 2392,
      "models available": 7263,
      "empowering large": 3301,
      "complex instructions": 1897,
      "data brings": 2391,
      "struggle produce": 10828,
      "large amounts": 6003,
      "varying levels": 12102,
      "using llm": 11958,
      "starting initial": 10693,
      "set instructions": 10309,
      "instructions use": 5441,
      "data finetune": 2411,
      "finetune llama": 4104,
      "resulting model": 9875,
      "evaluation results": 3576,
      "gpt4 automatic": 4690,
      "findings suggest": 4098,
      "llms code": 6478,
      "data public": 2446,
      "public httpsgithubcomnlpxucanwizardlm": 9203,
      "impressive ability": 5108,
      "interact users": 5485,
      "challenging tasks": 1507,
      "models like": 7380,
      "room improvement": 10056,
      "responses questions": 9858,
      "based chatgpt": 1026,
      "objectively comprehensively": 7978,
      "feedback mechanism": 4022,
      "datasets demonstrate": 2523,
      "task converts": 11120,
      "converts natural": 2214,
      "llms work": 6679,
      "work natural": 12257,
      "tasks specifically": 11278,
      "propose llmbased": 9077,
      "llmbased framework": 6435,
      "demonstration examples": 2707,
      "prompt llms": 8996,
      "questions different": 9291,
      "valuable information": 12019,
      "outperforms stateoftheart": 8160,
      "demonstrates strong": 2697,
      "strong generalization": 10809,
      "generalization ability": 4426,
      "capacity largescale": 1383,
      "agent memory": 496,
      "longterm memory": 6717,
      "generate precise": 4462,
      "memory activated": 6912,
      "model input": 7165,
      "finetuning experimental": 4125,
      "enables llms": 3311,
      "multiturn dialogue": 7686,
      "comparable chatgpt": 1819,
      "scenarios involving": 10129,
      "test set": 11371,
      "abilities llms": 127,
      "survey deep": 11025,
      "deep neural": 2603,
      "networks dnns": 7794,
      "various fields": 12066,
      "high performance": 4874,
      "highquality data": 4909,
      "data expensive": 2409,
      "methods proposed": 7005,
      "rapid evolution": 9341,
      "paper provide": 8261,
      "provide comprehensive": 9152,
      "comprehensive survey": 1948,
      "research chatgpt": 9778,
      "relations paper": 9608,
      "paper aims": 8209,
      "quantitatively evaluate": 9253,
      "evaluate performance": 3511,
      "promising performance": 8970,
      "various tasks": 12094,
      "tasks conduct": 11180,
      "extensive evaluations": 3889,
      "13 datasets": 15,
      "downstream applications": 3075,
      "prompt templates": 8999,
      "zeroshot prompt": 12320,
      "prompt template": 8998,
      "learning icl": 6217,
      "classification tasks": 1655,
      "time chatgpt": 11473,
      "chatgpt exhibits": 1559,
      "exhibits strong": 3672,
      "strong performance": 10811,
      "reasoning causal": 9416,
      "performs poorly": 8472,
      "parsing task": 8305,
      "models solving": 7528,
      "machine learning": 6753,
      "learning ml": 6228,
      "significant demand": 10409,
      "predominant approaches": 8697,
      "understand human": 11757,
      "human developers": 4960,
      "ability understand": 166,
      "paper aim": 8208,
      "aim bridge": 536,
      "bridge gap": 1271,
      "machine intelligence": 6752,
      "leverages stateoftheart": 6291,
      "stateoftheart llms": 10715,
      "llms develop": 6503,
      "novel tasks": 7934,
      "capability llms": 1372,
      "perform thorough": 8359,
      "results new": 9918,
      "new tasks": 7845,
      "achieve high": 249,
      "translation using": 11645,
      "using large": 11951,
      "translation mt": 11638,
      "using deep": 11941,
      "deep learning": 2597,
      "llms gpt3": 6551,
      "chatgpt brings": 1540,
      "new challenges": 7812,
      "challenges opportunities": 1485,
      "using llms": 11959,
      "new evaluation": 7819,
      "mitigate risks": 7072,
      "new directions": 7817,
      "opportunities challenges": 8081,
      "relation extraction": 9604,
      "shortcomings llms": 10354,
      "gap llms": 4379,
      "widelyused datasets": 12223,
      "achieves sota": 299,
      "competitive performances": 1878,
      "blackbox prompt": 1221,
      "derivativefree optimization": 2730,
      "network large": 7788,
      "tasks llms": 11243,
      "llms believe": 6467,
      "tasks target": 11288,
      "shares similarities": 10335,
      "task experiments": 11127,
      "achieves competitive": 286,
      "responses llms": 9857,
      "simple efficient": 10462,
      "efficient approach": 3192,
      "approach based": 731,
      "based prompt": 1057,
      "models introduce": 7366,
      "output quality": 8167,
      "need manual": 7768,
      "manual intervention": 6821,
      "refinement framework": 9564,
      "demonstrate superiority": 2668,
      "superiority proposed": 10983,
      "instructions instruction": 5436,
      "able improve": 176,
      "models challenging": 7275,
      "tasks following": 11214,
      "following instructions": 4186,
      "instructions general": 5433,
      "general lack": 4411,
      "intermediate steps": 5516,
      "steps address": 10753,
      "decompose tasks": 2585,
      "tasks provide": 11262,
      "different model": 2892,
      "model sizes": 7219,
      "analysis indicates": 623,
      "stepbystep instruction": 10750,
      "facilitate future": 3954,
      "research release": 9807,
      "human quality": 4986,
      "quality evaluation": 9240,
      "language planning": 5959,
      "previous work": 8820,
      "models lms": 7462,
      "paper define": 8218,
      "time propose": 11475,
      "approach improve": 745,
      "llms task": 6667,
      "task use": 11148,
      "planning dataset": 8516,
      "empirical results": 3278,
      "demonstrate method": 2659,
      "method significantly": 6965,
      "ability llms": 154,
      "llms especially": 6519,
      "critical role": 2312,
      "remarkable achievements": 9668,
      "data widely": 2469,
      "various industries": 12070,
      "new era": 7818,
      "deep models": 2602,
      "models rapidly": 7506,
      "research paradigm": 9803,
      "represents landmark": 9744,
      "general artificial": 4401,
      "future development": 4347,
      "gap paper": 4381,
      "paper systematically": 8269,
      "key components": 5629,
      "causal reasoning": 1426,
      "ability crucial": 137,
      "nlp applications": 7860,
      "despite impressive": 2783,
      "various nlp": 12085,
      "unclear chatgpt": 11738,
      "reasoning paper": 9431,
      "conduct comprehensive": 2021,
      "experiments chatgpt": 3766,
      "cot techniques": 2271,
      "performs better": 8467,
      "high accuracy": 4867,
      "manual annotation": 6818,
      "timeconsuming errorprone": 11478,
      "study explores": 10853,
      "compare chatgpt": 1835,
      "successfully deployed": 10932,
      "making process": 6803,
      "approaches large": 771,
      "chatbot chatgpt": 1530,
      "potential chatgpt": 8620,
      "summarization performance": 10960,
      "higher level": 4881,
      "study investigates": 10857,
      "varying difficulty": 12100,
      "difficulty levels": 2926,
      "tasks propose": 11260,
      "discriminative generative": 2968,
      "chain thought": 1449,
      "thought cot": 11463,
      "cot approach": 2267,
      "chatgpt achieve": 1535,
      "comparable stateoftheart": 1826,
      "methods reveals": 7010,
      "complex tasks": 1906,
      "understanding complex": 11768,
      "complex structures": 1905,
      "indepth analysis": 5239,
      "difficulties understanding": 2923,
      "findings provide": 4095,
      "graph construction": 4730,
      "information extraction": 5294,
      "closed set": 1682,
      "fall short": 3989,
      "domains new": 3061,
      "automatically extract": 952,
      "new task": 7844,
      "existing datasets": 3682,
      "datasets based": 2517,
      "simple effective": 10459,
      "hope proposed": 4933,
      "code datasets": 1712,
      "datasets available": 2516,
      "models previous": 7496,
      "previous studies": 8818,
      "studies revealed": 10844,
      "lack capacity": 5740,
      "capacity handle": 1382,
      "works attempted": 12272,
      "knowledge plms": 5696,
      "despite promising": 2785,
      "rich knowledge": 10005,
      "knowledge pretrained": 5697,
      "knowledgeintensive tasks": 5717,
      "new paradigm": 7829,
      "prompt like": 8994,
      "model knowledge": 7169,
      "including roberta": 5192,
      "tasks glue": 11217,
      "benchmarks demonstrate": 1136,
      "knowledge stored": 5705,
      "performance code": 8371,
      "code available": 1700,
      "blackbox language": 1218,
      "llms exhibit": 6524,
      "generated text": 4489,
      "detection methods": 2806,
      "adversarial robustness": 480,
      "method proposed": 6962,
      "generation method": 4550,
      "realworld scenarios": 9392,
      "probability distributions": 8856,
      "scenarios specifically": 10133,
      "used identify": 11901,
      "experiments demonstrate": 3771,
      "chinese english": 1624,
      "english datasets": 3382,
      "datasets furthermore": 2533,
      "retranslation polishing": 9943,
      "low training": 6736,
      "data instruction": 2424,
      "tuning large": 11693,
      "llms gained": 6540,
      "gained attention": 4361,
      "unlock potential": 11837,
      "potential llms": 8632,
      "offers advantages": 8016,
      "adaptation large": 354,
      "tasks finetuning": 11212,
      "finetuning approach": 4120,
      "training models": 11570,
      "millions billions": 7040,
      "parameters large": 8295,
      "amounts data": 612,
      "computational costs": 1972,
      "data used": 2467,
      "training costs": 11544,
      "improve data": 5122,
      "data efficiency": 2405,
      "paper conducts": 8217,
      "llm training": 6428,
      "regarding task": 9580,
      "performance specific": 8429,
      "specific task": 10620,
      "instruction types": 5422,
      "tuning data": 11689,
      "taskspecific models": 11311,
      "models results": 7517,
      "models trained": 7545,
      "trained using": 11538,
      "taskrelated data": 11156,
      "powerful capabilities": 8653,
      "capabilities text": 1359,
      "text understanding": 11417,
      "based llms": 1048,
      "cause significant": 1430,
      "llms training": 6671,
      "method called": 6942,
      "effectively transferred": 3164,
      "experiments various": 3810,
      "various datasets": 12057,
      "datasets method": 2537,
      "method effectively": 6948,
      "representation learning": 9728,
      "presents novel": 8733,
      "novel transformer": 7938,
      "transformer architecture": 11609,
      "method fully": 6951,
      "fully consider": 4303,
      "edges graph": 3123,
      "attention module": 891,
      "specifically propose": 10638,
      "attention mechanism": 890,
      "graphstructured data": 4741,
      "architecture named": 792,
      "graph data": 4731,
      "experiments benchmark": 3764,
      "benchmark datasets": 1115,
      "method outperforms": 6959,
      "models better": 7269,
      "empower large": 3293,
      "model perform": 7196,
      "answering large": 669,
      "model llm": 7178,
      "llm gained": 6412,
      "gained popularity": 4364,
      "achieved remarkable": 268,
      "results opendomain": 9919,
      "domainspecific scenarios": 3066,
      "specific knowledge": 10613,
      "attracted widespread": 901,
      "widespread attention": 12227,
      "benchmarks available": 1135,
      "provide benchmark": 9150,
      "answering qa": 672,
      "dataset named": 2503,
      "technical problems": 11328,
      "dataset contains": 2492,
      "addition propose": 376,
      "llm achieve": 6402,
      "achieve better": 240,
      "domainspecific tasks": 3067,
      "demonstrate approach": 2646,
      "model fusion": 7152,
      "framework outperforms": 4271,
      "commonly used": 1799,
      "llm retrieval": 6426,
      "retrieval methods": 9947,
      "chatgpt likely": 1578,
      "different methods": 2890,
      "emotional support": 3266,
      "like gpt": 6329,
      "capabilities language": 1341,
      "processing paper": 8909,
      "paper examines": 8225,
      "score human": 10157,
      "slightly different": 10501,
      "different human": 2883,
      "age gender": 494,
      "based language": 1042,
      "llms make": 6590,
      "understand capabilities": 11755,
      "capabilities limitations": 1346,
      "llms exhibited": 6526,
      "emergent incontext": 3258,
      "models solve": 7527,
      "solve complex": 10549,
      "propose effective": 9063,
      "effective efficient": 3138,
      "twostage framework": 11712,
      "boost reasoning": 1240,
      "reasoning abilities": 9402,
      "llms test": 6669,
      "demonstrations multiple": 2709,
      "query input": 9262,
      "llms effectively": 6511,
      "effectively efficiently": 3152,
      "method achieves": 6936,
      "terms accuracy": 11357,
      "accuracy efficiency": 232,
      "multitask instruction": 7676,
      "tuning llama": 11697,
      "preliminary study": 8708,
      "attracted substantial": 898,
      "academic industrial": 193,
      "fewshot zeroshot": 4041,
      "ability handle": 144,
      "tasks recent": 11267,
      "recent work": 9483,
      "data recently": 2451,
      "recently proposed": 9504,
      "exhibits impressive": 3670,
      "broad range": 1289,
      "range tasks": 9321,
      "performance llms": 8409,
      "explore capabilities": 3839,
      "capabilities llms": 1347,
      "scenarios choose": 10123,
      "data tasks": 2463,
      "data significantly": 2459,
      "insights future": 5366,
      "application evaluation": 695,
      "mental health": 6920,
      "increasing attention": 5227,
      "developing evaluating": 2831,
      "focus exploring": 4176,
      "scenarios evaluation": 10126,
      "evaluation experiments": 3554,
      "assessment findings": 847,
      "findings demonstrate": 4087,
      "demonstrate feasibility": 2657,
      "feasibility using": 4007,
      "impact prompt": 5081,
      "prompt designs": 8989,
      "user experience": 11910,
      "text classification": 11384,
      "promptbased data": 9002,
      "requires substantial": 9769,
      "computation resources": 1968,
      "recent efforts": 9464,
      "tasks practical": 11256,
      "area research": 798,
      "paper investigate": 8238,
      "llms achieve": 6444,
      "blackbox model": 1220,
      "model feature": 7150,
      "feature extractor": 4011,
      "data data": 2399,
      "using promptbased": 11964,
      "smaller parameter": 10516,
      "parameter size": 8280,
      "model extensive": 7147,
      "experiments text": 3805,
      "datasets approach": 2514,
      "performs par": 8471,
      "ai systems": 522,
      "annotated datasets": 644,
      "designed specific": 2766,
      "specific tasks": 10621,
      "tasks difficult": 11191,
      "active learning": 336,
      "learning mechanism": 6225,
      "cases address": 1409,
      "address limitations": 404,
      "limitations present": 6343,
      "learning prompt": 6238,
      "models conduct": 7282,
      "annotation process": 648,
      "process language": 8887,
      "exhibited remarkable": 3664,
      "finetuning models": 4136,
      "expensive timeconsuming": 3728,
      "timeconsuming obtain": 11479,
      "paper introduces": 8236,
      "introduces novel": 5550,
      "unsupervised method": 11858,
      "improves llms": 5149,
      "approach grounded": 744,
      "text quality": 11411,
      "generate text": 4469,
      "building insight": 1311,
      "dual roles": 3104,
      "student teacher": 10833,
      "llm generates": 6414,
      "generates answers": 4492,
      "model parameters": 7195,
      "using reinforcement": 11967,
      "tasks reasoning": 11266,
      "reasoning problems": 9434,
      "effectively improves": 3157,
      "translation tasks": 11642,
      "tasks furthermore": 11215,
      "models different": 7302,
      "different sizes": 2906,
      "prompts paper": 9039,
      "llms answer": 6454,
      "ask llms": 821,
      "llms provide": 6622,
      "answer conditioned": 657,
      "prompting strategy": 9025,
      "strategy produce": 10789,
      "instructionfollowing data": 5428,
      "opensource chat": 8056,
      "higher quality": 4884,
      "existing opensource": 3705,
      "chatgpts capability": 1611,
      "model publicly": 7206,
      "strengths weaknesses": 10797,
      "performance range": 8424,
      "range natural": 9318,
      "tasks ability": 11158,
      "ability generate": 143,
      "remains underexplored": 9664,
      "aims investigate": 551,
      "generation capabilities": 4520,
      "llms analysis": 6453,
      "factors influence": 3969,
      "small language": 10507,
      "models slms": 7526,
      "named entity": 7693,
      "entity recognition": 3427,
      "recognition relation": 9513,
      "various settings": 12093,
      "struggle complex": 10825,
      "analysis reveals": 626,
      "pivotal role": 8507,
      "instructions llms": 5439,
      "llms generate": 6545,
      "provides comprehensive": 9174,
      "generation abilities": 4512,
      "novel perspective": 7929,
      "utilizing llms": 12003,
      "llms data": 6491,
      "domains tasks": 3063,
      "including context": 5178,
      "context understanding": 2148,
      "understanding code": 11766,
      "generation language": 4540,
      "work aim": 12247,
      "data analysis": 2384,
      "propose framework": 9067,
      "tackle problems": 11087,
      "design taskspecific": 2756,
      "compare performance": 1836,
      "professional human": 8927,
      "gpt4 achieve": 4689,
      "achieve comparable": 242,
      "performance humans": 8396,
      "humans provide": 5021,
      "shed light": 10338,
      "technical report": 11329,
      "report large": 9714,
      "like llama": 6333,
      "performances various": 8461,
      "specific domains": 10610,
      "domainspecific knowledge": 3065,
      "problems paper": 8874,
      "domain knowledge": 3054,
      "training stage": 11584,
      "stage design": 10675,
      "model tackle": 7227,
      "practical issues": 8668,
      "alleviate hallucination": 586,
      "hallucination problem": 4795,
      "release data": 9620,
      "nlg evaluation": 7857,
      "generation nlg": 4556,
      "evaluation benchmarks": 3544,
      "benchmarks limited": 1141,
      "result poor": 9870,
      "forms evaluation": 4205,
      "issue paper": 5591,
      "novel method": 7926,
      "method named": 6958,
      "existing evaluation": 3685,
      "leverage large": 6276,
      "nlg tasks": 7858,
      "translation text": 11643,
      "image caption": 5059,
      "correlation human": 2248,
      "query reformulation": 9263,
      "existing methods": 3698,
      "models ability": 7252,
      "ability produce": 162,
      "question paper": 9277,
      "retrieval performance": 9948,
      "performance propose": 8422,
      "crucial aspect": 2327,
      "nlp research": 7869,
      "adequately addressed": 423,
      "including large": 5181,
      "remains largely": 9657,
      "largely unexplored": 6121,
      "model paper": 7193,
      "methods propose": 7004,
      "propose probabilistic": 9097,
      "addresses issue": 416,
      "demonstrate proposed": 2663,
      "realworld datasets": 9388,
      "finally analyze": 4070,
      "analyze performance": 633,
      "issue large": 5588,
      "language modelsllms": 5957,
      "chatgpt evaluator": 1556,
      "effective strategies": 3146,
      "human assistance": 4952,
      "responses chatgpt": 9850,
      "evaluation bias": 3545,
      "alignment human": 576,
      "human judgments": 4976,
      "human annotation": 4949,
      "research large": 9796,
      "research focuses": 9793,
      "enhancing performance": 3409,
      "performance existing": 8385,
      "existing knowledge": 3689,
      "llms limited": 6588,
      "aims evaluate": 545,
      "evaluate llms": 3509,
      "assessing ability": 842,
      "ability identify": 146,
      "introduce automated": 5535,
      "questions diverse": 9293,
      "diverse categories": 3013,
      "gpt3 instructgpt": 4681,
      "models demonstrate": 7296,
      "findings highlight": 4089,
      "capabilities models": 1348,
      "llms remarkable": 6633,
      "advancements field": 460,
      "llms explore": 6532,
      "behavioral characteristics": 1102,
      "behavioral patterns": 1103,
      "furthermore experiments": 4332,
      "llms study": 6663,
      "shedding light": 10343,
      "llms anticipate": 6455,
      "generation generative": 4534,
      "generative pretraining": 4618,
      "task aims": 11113,
      "response user": 9847,
      "user input": 11912,
      "reasoning process": 9435,
      "task challenging": 11118,
      "significant discrepancy": 10410,
      "user queries": 11916,
      "limited scale": 6358,
      "bridging gap": 1278,
      "text structured": 11415,
      "graphs paper": 4739,
      "limitations propose": 6345,
      "novel pretrained": 7930,
      "task specifically": 11146,
      "task pretrain": 11140,
      "model goal": 7155,
      "additionally propose": 384,
      "propose automatic": 9058,
      "large scale": 6118,
      "methods experimental": 6984,
      "baseline systems": 1071,
      "systems remarkable": 11067,
      "analysis demonstrates": 620,
      "task automation": 11116,
      "recent success": 9478,
      "shown promising": 10384,
      "completing tasks": 1892,
      "user instructions": 11913,
      "increasing number": 5228,
      "number tasks": 7953,
      "explore question": 3847,
      "framework facilitate": 4251,
      "users privacy": 11929,
      "generic knowledge": 4626,
      "evaluate proposed": 3513,
      "diverse scenarios": 3026,
      "llm chatgpt": 6404,
      "chatgpt bring": 1539,
      "data science": 2458,
      "questions large": 9294,
      "potential risks": 8634,
      "risks llms": 10021,
      "like gpt4": 6332,
      "traditional ai": 11516,
      "ai tools": 525,
      "llms specifically": 6660,
      "remarkable capabilities": 9669,
      "humanlevel performance": 5004,
      "directly used": 2953,
      "specialized domains": 10602,
      "explore potential": 3844,
      "llms gpt4": 6553,
      "results real": 9929,
      "demonstrate potential": 2662,
      "future advancements": 4346,
      "launch chatgpt": 6159,
      "employ chatgpt": 3284,
      "prompts responses": 9040,
      "question accuracy": 9265,
      "anomaly detection": 654,
      "detection based": 2802,
      "play critical": 8526,
      "reliability software": 9631,
      "software systems": 10538,
      "studies explored": 10841,
      "achieved notable": 267,
      "face limitations": 3947,
      "resource consumption": 9823,
      "detection framework": 2804,
      "framework referred": 4275,
      "accuracy response": 233,
      "log data": 6697,
      "chatgpt provide": 1590,
      "comparable human": 1821,
      "human experts": 4968,
      "reduce manual": 9544,
      "manual verification": 6823,
      "extensively evaluate": 3910,
      "baseline methods": 1067,
      "methods terms": 7016,
      "tuned models": 11686,
      "reliable evaluation": 9633,
      "challenges associated": 1477,
      "privacy protection": 8847,
      "response challenges": 9844,
      "challenges introduce": 1484,
      "superior model": 10974,
      "given llms": 4633,
      "evaluation ability": 3540,
      "models tuned": 7548,
      "avoiding potential": 995,
      "potential data": 8621,
      "data leakage": 2431,
      "crucial achieving": 2326,
      "intelligence existing": 5471,
      "existing approaches": 3678,
      "extremely large": 3940,
      "models gpt4": 7349,
      "zeroshot manner": 12317,
      "supervised learning": 10988,
      "train limited": 11527,
      "models remains": 7513,
      "remains uncertain": 9662,
      "models achieve": 7253,
      "address question": 412,
      "designed automatically": 2760,
      "generate diverse": 4445,
      "models minimal": 7466,
      "minimal human": 7050,
      "human intervention": 4974,
      "spanning 50": 10586,
      "distinct categories": 2999,
      "resulting models": 9876,
      "respectively finally": 9840,
      "finally evaluate": 4076,
      "evaluate ability": 3500,
      "ability models": 157,
      "unseen tools": 11850,
      "training experimental": 11552,
      "like gpt35": 6331,
      "novel task": 7933,
      "task propose": 11142,
      "new benchmark": 7809,
      "tabular data": 11079,
      "academic papers": 194,
      "introduce metrics": 5544,
      "metrics evaluate": 7025,
      "aims identify": 549,
      "modern large": 7566,
      "llms propose": 6620,
      "openais gpt4": 8040,
      "code benchmark": 1701,
      "benchmark publicly": 1127,
      "cognitive ability": 1755,
      "chatgpt shown": 1596,
      "cognitive abilities": 1754,
      "abilities different": 122,
      "different models": 2894,
      "different fields": 2882,
      "test results": 11369,
      "traditional metrics": 11521,
      "evaluating llms": 3529,
      "propose adaptive": 9055,
      "llm evaluation": 6408,
      "evaluation using": 3582,
      "dynamically adjusts": 3108,
      "questions difficulty": 9292,
      "models abilities": 7251,
      "abilities using": 130,
      "llms compared": 6479,
      "compared humans": 1849,
      "humans easily": 5017,
      "nlp models": 7868,
      "models aim": 7257,
      "diagnostic reports": 2856,
      "behaves like": 1099,
      "questions conduct": 9290,
      "llms aspects": 6460,
      "mathematical reasoning": 6865,
      "models significantly": 7525,
      "models using": 7556,
      "evaluating large": 3526,
      "models chinese": 7277,
      "specifically designed": 10626,
      "financial text": 4082,
      "availability data": 971,
      "developing effective": 2830,
      "effective text": 3147,
      "text processing": 11408,
      "advancements large": 462,
      "yielded remarkable": 12301,
      "performance natural": 8412,
      "tasks primarily": 11259,
      "analysis dataset": 619,
      "opensource llms": 8062,
      "llms using": 6676,
      "firmly believe": 4154,
      "serve valuable": 10297,
      "valuable resource": 12021,
      "tasks focus": 11213,
      "dataset publicly": 2506,
      "reasoning capacity": 9415,
      "multimodal comprehension": 7624,
      "study explore": 10852,
      "student model": 10832,
      "intermediate reasoning": 5514,
      "reasoning steps": 9437,
      "llms cot": 6489,
      "cot prompts": 2269,
      "present novel": 8719,
      "distillation method": 2994,
      "stateoftheart accuracy": 10702,
      "crossdomain generalization": 2318,
      "advancement large": 453,
      "llms led": 6579,
      "regarding potential": 9579,
      "llms extract": 6534,
      "financial texts": 4083,
      "development chinese": 2835,
      "provide rigorous": 9166,
      "efficacy various": 3183,
      "specialized domain": 10601,
      "news text": 7853,
      "models generative": 7343,
      "generative llm": 4601,
      "pretrained llm": 8758,
      "finetuned llm": 4115,
      "extraction large": 3929,
      "comparative analysis": 1830,
      "improving llms": 5160,
      "llms performance": 6607,
      "llms evaluated": 6521,
      "benchmark following": 1119,
      "existing systems": 3711,
      "performance human": 8395,
      "human beings": 4954,
      "reasoning methods": 9430,
      "rely external": 9643,
      "structures paper": 10822,
      "highly effective": 4903,
      "pretraining task": 8797,
      "models help": 7350,
      "achieves stateoftheart": 301,
      "different pretrained": 2899,
      "general language": 4412,
      "testing tasks": 11379,
      "era large": 3452,
      "chatgpt comparison": 1543,
      "emotion recognition": 3264,
      "research topic": 9808,
      "states current": 10735,
      "current works": 2364,
      "datasets lack": 2535,
      "enhance reliability": 3396,
      "annotations paper": 650,
      "contrast previous": 2172,
      "takes step": 11100,
      "providing explanations": 9183,
      "introduce benchmark": 5536,
      "metrics observe": 7030,
      "observe necessity": 7988,
      "multimodal large": 7632,
      "longstanding challenge": 6714,
      "understanding capabilities": 11764,
      "capabilities recent": 1356,
      "multimodal llm": 7639,
      "legal large": 6258,
      "bases large": 1081,
      "llms shown": 6648,
      "shown potential": 10382,
      "potential revolutionize": 8633,
      "tasks various": 11299,
      "various domains": 12058,
      "large models": 6109,
      "data quality": 2450,
      "carefully designed": 1402,
      "overcome problem": 8181,
      "legal data": 6257,
      "effectively reduce": 3161,
      "relying solely": 9645,
      "enhance ability": 3386,
      "capabilities large": 1342,
      "models opensourced": 7481,
      "models crucial": 7291,
      "highly capable": 4900,
      "ai models": 518,
      "work present": 12259,
      "dataset consists": 2489,
      "generative language": 4595,
      "culture values": 2339,
      "context generation": 2141,
      "quality control": 9239,
      "coverage high": 2285,
      "effectiveness dataset": 3169,
      "dataset detecting": 2495,
      "model bias": 7115,
      "chinese large": 1628,
      "certain extent": 1444,
      "avoid generating": 990,
      "research opportunities": 9801,
      "data large": 2429,
      "recent research": 9475,
      "given rise": 4640,
      "framework combines": 4243,
      "structure learning": 10818,
      "leverage power": 6280,
      "statistical analysis": 10738,
      "build novel": 1307,
      "learning introduce": 6219,
      "set prompts": 10311,
      "data demonstrate": 2400,
      "demonstrate significant": 2664,
      "critical challenges": 2310,
      "pioneering study": 8501,
      "llms contain": 6487,
      "emphasizing need": 3272,
      "human values": 4990,
      "model typically": 7233,
      "llm responses": 6425,
      "aligning llms": 571,
      "generating responses": 4509,
      "generated llm": 4481,
      "experiments shown": 3801,
      "comparable results": 1825,
      "enhance performance": 3392,
      "alignment chatgpt": 573,
      "study recent": 10868,
      "numerous tasks": 7964,
      "based given": 1038,
      "given text": 4642,
      "text considering": 11385,
      "remarkable abilities": 9666,
      "abilities various": 132,
      "provide preliminary": 9160,
      "task generating": 11131,
      "variety prompting": 12047,
      "explore chatgpts": 3840,
      "chatgpts ability": 1610,
      "chatgpt analyzing": 1537,
      "reveal chatgpt": 9966,
      "chatgpt zeroshot": 1602,
      "prompting performance": 9021,
      "performance gap": 8389,
      "corresponding stateoftheart": 2251,
      "stateoftheart model": 10718,
      "sentiment classification": 10272,
      "learning better": 6196,
      "structured data": 10820,
      "data forms": 2413,
      "present despite": 8717,
      "large pretrained": 6113,
      "domains chatgpt": 3057,
      "common knowledge": 1794,
      "data remains": 2453,
      "work identify": 12253,
      "identify crucial": 5046,
      "research challenges": 9777,
      "data pretraining": 2442,
      "work folds": 12252,
      "pretraining dubbed": 8777,
      "propose implement": 9070,
      "vision natural": 12141,
      "extensive empirical": 3888,
      "performance supervised": 8435,
      "paper introduce": 8234,
      "dataset aimed": 2477,
      "questionanswer qa": 9283,
      "qa pairs": 9228,
      "safety measures": 10082,
      "dataset provides": 2505,
      "development deployment": 2836,
      "deployment llms": 2725,
      "project page": 8956,
      "model outputs": 7192,
      "stepbystep reasoning": 10751,
      "design environment": 2748,
      "alignment safe": 580,
      "improve training": 5138,
      "training stability": 11583,
      "opensource implementations": 8057,
      "significant challenges": 10406,
      "llms alignment": 6452,
      "given natural": 4634,
      "language questions": 5973,
      "prompt learning": 8993,
      "llms emerged": 6513,
      "emerged recent": 3244,
      "prompts lead": 9036,
      "llms understand": 6673,
      "input question": 5353,
      "generate corresponding": 4443,
      "faces challenges": 3950,
      "existing work": 3715,
      "prompts llms": 9038,
      "semantic gap": 10234,
      "prompting method": 9018,
      "related given": 9601,
      "given question": 4639,
      "questions propose": 9298,
      "propose strategies": 9103,
      "leverage llms": 6279,
      "generate executable": 4446,
      "design dynamic": 2747,
      "previously generated": 8823,
      "strong baseline": 10806,
      "models comprehensive": 7281,
      "comprehensive overview": 1944,
      "llms recently": 6628,
      "recently demonstrated": 9492,
      "capabilities natural": 1350,
      "tasks success": 11284,
      "success llms": 10919,
      "encompass diverse": 3334,
      "context length": 2143,
      "alignment training": 582,
      "training datasets": 11548,
      "rapid development": 9337,
      "llm research": 6424,
      "overview recent": 8196,
      "recent developments": 9463,
      "systematic treatment": 11051,
      "existing literature": 3695,
      "models datasets": 7294,
      "broader research": 1294,
      "researchers practitioners": 9815,
      "insights extensive": 5365,
      "existing works": 3716,
      "domain adaptation": 3052,
      "action recognition": 329,
      "findings study": 4097,
      "generate logic": 4455,
      "specifically models": 10637,
      "models predictions": 7492,
      "measures consistency": 6886,
      "compared baseline": 1839,
      "framework enhance": 4249,
      "potential challenges": 8619,
      "llms knowledge": 6574,
      "terms top1": 11365,
      "fundamental challenging": 4318,
      "aspect natural": 827,
      "gap propose": 4385,
      "benchmark evaluate": 1116,
      "tasks covering": 11184,
      "understanding translation": 11787,
      "contain rich": 2124,
      "analysis design": 621,
      "test suite": 11373,
      "models learn": 7378,
      "based transformer": 1061,
      "llms results": 6636,
      "consistently improves": 2095,
      "datasets pretrained": 2542,
      "teaching large": 11321,
      "legal professionals": 6261,
      "simple prompting": 10466,
      "models produce": 7498,
      "performed zeroshot": 8464,
      "gpt3 models": 4682,
      "results llms": 9910,
      "thought prompting": 11464,
      "enables model": 3312,
      "methods method": 7002,
      "method enables": 6950,
      "evolution large": 3602,
      "llms growing": 6556,
      "evaluation human": 3557,
      "increasingly important": 5232,
      "knowledge reasoning": 5698,
      "chinese context": 1623,
      "context paper": 2145,
      "chinese llms": 1631,
      "llms conduct": 6482,
      "conduct human": 2031,
      "evaluation findings": 3555,
      "llms perform": 6606,
      "automatic human": 938,
      "alignment different": 574,
      "different aspects": 2875,
      "user information": 11911,
      "information needs": 5307,
      "demonstrated exceptional": 2673,
      "exceptional capabilities": 3634,
      "generation knowledge": 4539,
      "knowledge inference": 5679,
      "research llms": 9800,
      "model evaluation": 7140,
      "models provide": 7501,
      "relevant information": 9628,
      "information llms": 5305,
      "challenges exist": 1479,
      "ethical considerations": 3493,
      "research chinese": 9779,
      "valuable insights": 12020,
      "paper provides": 8262,
      "enhancement llms": 3400,
      "open challenges": 8032,
      "factual knowledge": 3976,
      "models retrieval": 7518,
      "opendomain question": 8045,
      "require substantial": 9759,
      "solving wide": 10561,
      "world knowledge": 12279,
      "knowledge including": 5678,
      "tasks remains": 11271,
      "unclear llms": 11740,
      "llms able": 6442,
      "study present": 10860,
      "opendomain qa": 8044,
      "primary research": 8830,
      "research questions": 9804,
      "llms possess": 6612,
      "quality results": 9244,
      "evaluating models": 3530,
      "evaluation methods": 3566,
      "models paper": 7482,
      "novel approach": 7915,
      "overcoming limitations": 8183,
      "limitations previous": 6344,
      "previous methods": 8809,
      "various forms": 12068,
      "capabilities various": 1360,
      "llms providing": 6624,
      "abilities solve": 129,
      "complex problems": 1899,
      "editing large": 3127,
      "model large": 7170,
      "llms showcased": 6646,
      "showcased remarkable": 10361,
      "automatic prompt": 946,
      "leverages llms": 6290,
      "taking account": 11102,
      "process helps": 8884,
      "helps llms": 4856,
      "llms better": 6468,
      "better align": 1174,
      "thinking llms": 11453,
      "tasks experimental": 11203,
      "performance highquality": 8394,
      "exhibits notable": 3671,
      "prompt generation": 8991,
      "generation good": 4535,
      "outofdistribution ood": 8131,
      "plays vital": 8538,
      "vital role": 12158,
      "role enhancing": 10051,
      "ml models": 7086,
      "diverse natural": 3021,
      "existing research": 3707,
      "like bert": 6321,
      "bert roberta": 1157,
      "roberta gpt2": 10033,
      "scales pretraining": 10113,
      "pretraining objectives": 8792,
      "paper embarks": 8221,
      "empirical investigation": 3277,
      "llama series": 6392,
      "demonstrates superior": 2699,
      "detectors provide": 2814,
      "provide intriguing": 9158,
      "models new": 7475,
      "understanding llms": 11777,
      "sequence understanding": 10285,
      "understanding large": 11774,
      "shown impressive": 10380,
      "ability opendomain": 158,
      "input format": 5349,
      "prompts demonstrations": 9031,
      "tasks event": 11198,
      "event extraction": 3592,
      "extraction entity": 3928,
      "end present": 3351,
      "bilingual english": 1197,
      "model instructiontuned": 7167,
      "capable performing": 1378,
      "unseen domains": 11848,
      "conduct empirical": 2024,
      "empirical studies": 3280,
      "transfer tasks": 11597,
      "tasks model": 11245,
      "model accessible": 7099,
      "broad applications": 1288,
      "significantly boost": 10425,
      "models consistently": 7286,
      "achieve best": 238,
      "best results": 1169,
      "results different": 9897,
      "different benchmarks": 2877,
      "benchmarks recent": 1143,
      "zerofewshot learning": 12308,
      "learning chainofthought": 6198,
      "models present": 7493,
      "present paper": 8721,
      "paper comprehensively": 8212,
      "comprehensively investigate": 1952,
      "investigate llms": 5562,
      "aspects including": 831,
      "pose potential": 8585,
      "recommendation systems": 9521,
      "systems traditional": 11068,
      "methods usually": 7018,
      "recommendation results": 9520,
      "long tail": 6706,
      "users address": 11922,
      "address issues": 398,
      "general framework": 4405,
      "llm knowledge": 6419,
      "knowledge graphs": 5675,
      "graphs kg": 4738,
      "semantic representations": 10240,
      "order improve": 8109,
      "improve semantic": 5137,
      "semantic understanding": 10245,
      "use llms": 11891,
      "llms powerful": 6614,
      "rich semantic": 10006,
      "addition method": 375,
      "structural information": 10816,
      "various traditional": 12097,
      "traditional models": 11522,
      "framework significantly": 4276,
      "personalized recommendations": 8479,
      "field code": 4049,
      "ensemble learning": 3415,
      "llms prompting": 6619,
      "prompting recently": 9022,
      "abilities variety": 131,
      "llms existing": 6528,
      "paradigm requires": 8273,
      "substantial manual": 10895,
      "manual effort": 6819,
      "limitations specifically": 6348,
      "given fact": 4632,
      "based llm": 1047,
      "effect evaluation": 3135,
      "majority voting": 6786,
      "types tasks": 11719,
      "significant margin": 10414,
      "code publicly": 1729,
      "sequence generation": 10280,
      "generation large": 4541,
      "llms capable": 6473,
      "instruction finetuning": 5403,
      "task instruction": 11132,
      "instruction input": 5410,
      "selfattention mechanism": 10215,
      "mechanism llms": 6891,
      "llms models": 6593,
      "risk instruction": 10018,
      "instruction forgetting": 5408,
      "mitigate issue": 7069,
      "theoretical analysis": 11445,
      "models learning": 7379,
      "instructionfollowing capabilities": 5427,
      "approach consistently": 735,
      "data annotation": 2385,
      "notably method": 7908,
      "improves zeroshot": 5155,
      "research applications": 9775,
      "data models": 2436,
      "network architecture": 7785,
      "called attention": 1330,
      "paper large": 8240,
      "softmax regression": 10534,
      "regression problem": 9586,
      "regression function": 9585,
      "exhibit impressive": 3658,
      "learning abilities": 6184,
      "knowledge solving": 5702,
      "realworld tasks": 9394,
      "unleash potential": 11828,
      "enabling llms": 3318,
      "mechanism designed": 6890,
      "optimal solution": 8090,
      "dataset demonstrate": 2493,
      "10 improvement": 3,
      "diverse tasks": 3028,
      "api calls": 683,
      "highlighting effectiveness": 4896,
      "effectiveness efficiency": 3170,
      "social bias": 10525,
      "models recent": 7509,
      "prompting researchers": 9023,
      "explicit implicit": 3826,
      "bias propose": 1189,
      "llms known": 6576,
      "llms capabilities": 6472,
      "data generation": 2418,
      "generation using": 4587,
      "instrumental enabling": 5451,
      "various opendomain": 12087,
      "highquality instruction": 4911,
      "quality human": 9243,
      "models generate": 7341,
      "generate instruction": 4451,
      "work explore": 12250,
      "generate highquality": 4449,
      "various existing": 12065,
      "instruction generation": 5409,
      "generation methods": 4551,
      "novel strategies": 7932,
      "enhance quality": 3393,
      "models hope": 7351,
      "generating highquality": 4500,
      "models language": 7371,
      "using generative": 11946,
      "ai paper": 519,
      "using advanced": 11936,
      "advanced ai": 444,
      "tools like": 11500,
      "stable diffusion": 10671,
      "compared original": 1851,
      "models natural": 7471,
      "natural science": 7748,
      "field natural": 4050,
      "new capabilities": 7810,
      "tailored llms": 11097,
      "llms natural": 6595,
      "opensource llm": 8061,
      "llm incorporating": 6417,
      "scientific knowledge": 10152,
      "factual correctness": 3974,
      "model automating": 7110,
      "generation scientific": 4578,
      "eliminates need": 3221,
      "model explore": 7146,
      "training strategies": 11585,
      "models research": 7515,
      "showcases ability": 10363,
      "ability llm": 153,
      "despite great": 2780,
      "great advance": 4744,
      "models mllms": 7467,
      "instruction dataset": 5400,
      "dataset building": 2480,
      "makes current": 6792,
      "current mllms": 2357,
      "relatively low": 9615,
      "cost paper": 2260,
      "generation model": 4552,
      "dataset training": 2510,
      "enhance model": 3390,
      "model capability": 7119,
      "compared previous": 1852,
      "data collection": 2395,
      "data generated": 2416,
      "different types": 2912,
      "dataset based": 2479,
      "gpt4 generate": 4696,
      "data type": 2466,
      "correctness prompt": 2244,
      "prompt design": 8988,
      "generation results": 4577,
      "results previous": 9923,
      "propose interactive": 9074,
      "interactive prompt": 5498,
      "interaction human": 5489,
      "correctness generated": 2243,
      "general solution": 4417,
      "model instruction": 7166,
      "generation despite": 4527,
      "despite superior": 2789,
      "generate natural": 4459,
      "according given": 219,
      "given task": 4641,
      "models capture": 7274,
      "capture information": 1391,
      "language instructions": 5770,
      "knowledge language": 5682,
      "models finally": 7331,
      "efficient compared": 3193,
      "compared traditional": 1858,
      "models despite": 7301,
      "fewer parameters": 4026,
      "approach generate": 743,
      "models improves": 7356,
      "augmenting large": 922,
      "llms external": 6533,
      "external tools": 3917,
      "emerged promising": 3243,
      "promising approach": 8966,
      "learning task": 6245,
      "task trained": 11147,
      "llms learn": 6578,
      "learning model": 6229,
      "applications existing": 704,
      "methods train": 7017,
      "train model": 11528,
      "novel tool": 7937,
      "learning method": 6226,
      "use various": 11896,
      "propose iterative": 9075,
      "experiments conducted": 3769,
      "realworld settings": 9393,
      "settings demonstrate": 10317,
      "application scenarios": 701,
      "semantic alignment": 10227,
      "methods depend": 6982,
      "user intent": 11914,
      "research introduce": 9795,
      "benefits terms": 1151,
      "annotation method": 647,
      "model termed": 7228,
      "data introduce": 2427,
      "introduce effective": 5538,
      "prompt augmentation": 8986,
      "method accomplishes": 6933,
      "desired style": 2774,
      "multitask benchmark": 7675,
      "long context": 6704,
      "llms demonstrate": 6493,
      "demonstrate impressive": 2658,
      "performance language": 8402,
      "works proposed": 12275,
      "proposed methods": 9121,
      "methods improve": 6992,
      "improve llms": 5128,
      "context windows": 2149,
      "memory mechanisms": 6917,
      "rigorous evaluation": 10013,
      "datasets task": 2550,
      "average length": 983,
      "tasks code": 11172,
      "code completion": 1704,
      "standardized unified": 10686,
      "unified format": 11801,
      "evaluation llms": 3563,
      "llms comprehensive": 6481,
      "opensourced models": 8067,
      "compression technique": 1955,
      "understanding capability": 11765,
      "users express": 11925,
      "effective mental": 3141,
      "timeconsuming task": 11480,
      "leveraging capabilities": 6294,
      "recent advances": 9459,
      "advances large": 467,
      "models offers": 7479,
      "challenge paper": 1472,
      "capable analyzing": 1376,
      "application large": 696,
      "models field": 7330,
      "health support": 4838,
      "empowered large": 3296,
      "benchmark evaluation": 1118,
      "evaluation large": 3560,
      "emerged new": 3240,
      "address challenge": 386,
      "methods including": 6993,
      "including question": 5191,
      "based findings": 1031,
      "findings propose": 4094,
      "execution accuracy": 3651,
      "sets new": 10313,
      "various scenarios": 12092,
      "advantages disadvantages": 475,
      "hope work": 4934,
      "work provides": 12264,
      "deeper understanding": 2613,
      "model multimodal": 7185,
      "model mllm": 7182,
      "multimodal data": 7626,
      "data current": 2398,
      "individual pretrained": 5251,
      "specific subtasks": 10619,
      "llms integrate": 6569,
      "task realworld": 11143,
      "common practice": 1796,
      "inspired study": 5381,
      "results multiple": 9917,
      "result obtained": 9869,
      "performance mllm": 8410,
      "models parallel": 7485,
      "process input": 8886,
      "input data": 5348,
      "data generate": 2415,
      "study using": 10871,
      "sparked significant": 10589,
      "language capabilities": 5759,
      "modality alignment": 7096,
      "remains open": 9660,
      "used inputs": 11902,
      "data difficult": 2403,
      "issues propose": 5597,
      "encoder llm": 3327,
      "llm exhibits": 6409,
      "training process": 11579,
      "prompts llm": 9037,
      "llm generate": 6413,
      "endtoend manner": 3362,
      "demonstrate straightforward": 2665,
      "straightforward process": 10772,
      "extend capabilities": 3877,
      "opensource large": 8060,
      "human intentions": 4973,
      "unleash power": 11829,
      "power llms": 8649,
      "equips llms": 3448,
      "training multiple": 11572,
      "llms enabling": 6516,
      "seamless integration": 10170,
      "model apis": 7107,
      "unified way": 11807,
      "comprehensive framework": 1941,
      "framework proposed": 4273,
      "finally showcase": 4078,
      "gaining increasing": 4368,
      "attention potential": 893,
      "learning techniques": 6247,
      "expected results": 3726,
      "propose approach": 9056,
      "approach transform": 765,
      "llms traditional": 6670,
      "approach fewshot": 740,
      "fewshot incontext": 4031,
      "correct answer": 2234,
      "using technique": 11975,
      "experiments method": 3788,
      "method achieve": 6934,
      "achieve correct": 247,
      "method provides": 6963,
      "provides solution": 9179,
      "large number": 6112,
      "process model": 8891,
      "deep learningbased": 2601,
      "learningbased methods": 6250,
      "methods face": 6986,
      "face challenges": 3946,
      "domains lack": 3058,
      "application chatgpt": 694,
      "aims explore": 546,
      "knowledge largescale": 5689,
      "largescale corpora": 6128,
      "detection conduct": 2803,
      "detection task": 2810,
      "grounding large": 4760,
      "model agents": 7105,
      "automatic reasoning": 947,
      "reasoning planning": 9433,
      "planning capability": 8515,
      "semantic knowledge": 10236,
      "human world": 4991,
      "hinders applications": 4920,
      "existing studies": 3710,
      "studies try": 10847,
      "finetune llm": 4105,
      "utilize predefined": 11990,
      "bridge llms": 1275,
      "human efforts": 4961,
      "single task": 10487,
      "strengths llms": 10796,
      "llms autonomously": 6465,
      "framework automatically": 4237,
      "employs llm": 3291,
      "guidance successfully": 4779,
      "performance challenging": 8367,
      "tasks compared": 11179,
      "learning methods": 6227,
      "proving effectiveness": 9187,
      "generate responses": 4467,
      "responses given": 9854,
      "compared conventional": 1843,
      "translation quality": 11640,
      "linguistic features": 6370,
      "proved effective": 9143,
      "outcomes indicate": 8125,
      "mathematical problems": 6864,
      "studies typically": 10848,
      "models unable": 7550,
      "surpassing gpt4": 11018,
      "similar performance": 10455,
      "llms achieved": 6445,
      "remarkable success": 9686,
      "success nlp": 10923,
      "multimodal tasks": 7642,
      "tasks despite": 11189,
      "despite successes": 2788,
      "main challenges": 6770,
      "challenges remain": 1492,
      "developing llms": 2832,
      "computational cost": 1971,
      "paper report": 8265,
      "significantly reduce": 10448,
      "training cost": 11543,
      "strategy demonstrate": 10787,
      "existing evaluations": 3688,
      "potential impact": 8627,
      "achieves performance": 294,
      "explored use": 3855,
      "study propose": 10863,
      "propose tuningfree": 9106,
      "tuning parameters": 11700,
      "parameter tuning": 8282,
      "models static": 7533,
      "approach llm": 751,
      "various realworld": 12090,
      "existing llm": 3696,
      "methods mainly": 7000,
      "widely exists": 12218,
      "llms address": 6450,
      "proposed framework": 9116,
      "framework llms": 4268,
      "llms performances": 6608,
      "interaction llms": 5491,
      "furthermore proposed": 4338,
      "framework general": 4255,
      "evaluation method": 3565,
      "translation code": 11636,
      "generation demonstrate": 4526,
      "llmbased autonomous": 6433,
      "autonomous agents": 960,
      "handling diverse": 4806,
      "diverse data": 3014,
      "data learning": 2432,
      "efficient manner": 3197,
      "designed diverse": 2761,
      "despite success": 2787,
      "encounter limitations": 3339,
      "architecture design": 790,
      "prior knowledge": 8835,
      "propose use": 9110,
      "use large": 11887,
      "learning process": 6236,
      "diverse realworld": 3024,
      "node graph": 7886,
      "method dubbed": 6947,
      "performance different": 8380,
      "humanlike decisions": 5007,
      "pseudo data": 9196,
      "models lowresource": 7463,
      "serves cornerstone": 10302,
      "llms introduce": 6571,
      "construct highquality": 2113,
      "experiments using": 3807,
      "data domain": 2404,
      "methods requiring": 7009,
      "model scale": 7212,
      "efficiency furthermore": 3186,
      "furthermore method": 4335,
      "great potential": 4750,
      "models align": 7259,
      "previous research": 8811,
      "human preference": 4983,
      "finetuning step": 4146,
      "frozen llms": 4293,
      "llms directly": 6508,
      "introduce novel": 5546,
      "inference method": 5272,
      "pretrained llms": 8759,
      "llms evaluate": 6520,
      "generation ai": 4514,
      "ai safety": 520,
      "need extra": 7767,
      "gradient computation": 4714,
      "computation parameter": 1966,
      "parameter updates": 8283,
      "eliminating need": 3224,
      "results evaluated": 9899,
      "evaluated gpt4": 3518,
      "establishes new": 3483,
      "attack success": 877,
      "success rate": 10924,
      "draw inspiration": 3088,
      "integrating multiple": 5462,
      "tasks related": 11269,
      "errors resulting": 3470,
      "including contextual": 5179,
      "tasks achieve": 11159,
      "achieve objective": 257,
      "model offers": 7189,
      "seamlessly integrates": 10173,
      "context information": 2142,
      "inspired propose": 5376,
      "systems achieve": 11056,
      "text encoder": 11392,
      "text prompts": 11410,
      "utterances content": 12007,
      "68 relative": 94,
      "prompt given": 8992,
      "chaining large": 1453,
      "learning approaches": 6193,
      "stateoftheart large": 10711,
      "tool usage": 11492,
      "connecting large": 2070,
      "llms excel": 6523,
      "rely carefully": 9642,
      "carefully crafted": 1399,
      "crafted prompts": 2294,
      "process paper": 8893,
      "fast convergence": 4002,
      "approach allows": 730,
      "powerful language": 8656,
      "efficient optimization": 3198,
      "llms based": 6466,
      "respectively furthermore": 9841,
      "connecting llms": 2073,
      "inspire research": 5372,
      "increasingly crucial": 5231,
      "crucial efficiently": 2329,
      "including named": 5186,
      "dialogue systems": 2865,
      "systems recently": 11066,
      "achieved significant": 273,
      "nlp downstream": 7865,
      "tasks lack": 11233,
      "lack specialized": 5747,
      "proposed improve": 9117,
      "parameterefficient tuning": 8289,
      "different domains": 2880,
      "results tasks": 9932,
      "significant margins": 10415,
      "work provide": 12263,
      "provide insights": 9156,
      "technical terms": 11330,
      "model performs": 7199,
      "convolutional neural": 2219,
      "features entities": 4015,
      "incorporating predicted": 5217,
      "model significantly": 7217,
      "significantly improved": 10434,
      "datasets cover": 2521,
      "generate summaries": 4468,
      "develop new": 2824,
      "new datasets": 7816,
      "datasets conduct": 2520,
      "generation capability": 4521,
      "summarization tasks": 10962,
      "summaries generated": 10957,
      "models specifically": 7532,
      "factual consistency": 3973,
      "tasks surpassing": 11286,
      "reference summaries": 9554,
      "works field": 12273,
      "field text": 4054,
      "novel datasets": 7919,
      "chinese language": 1627,
      "propose comprehensive": 9061,
      "create largescale": 2298,
      "largescale chinese": 6127,
      "multiple domains": 7654,
      "ability existing": 141,
      "models explore": 7325,
      "limitations conduct": 6341,
      "conduct evaluations": 2027,
      "using different": 11943,
      "chatgpt results": 1594,
      "semantic features": 10233,
      "relatively good": 9614,
      "improved providing": 5141,
      "work serve": 12265,
      "serve essential": 10296,
      "textual context": 11437,
      "llms helpful": 6558,
      "information corresponding": 5291,
      "corresponding textual": 2252,
      "text representation": 11413,
      "application llms": 700,
      "knowledge improve": 5677,
      "representations llms": 9733,
      "network structure": 7792,
      "promising avenues": 8967,
      "combining llms": 1785,
      "advantage model": 473,
      "llms revolutionized": 6637,
      "revolutionized natural": 9985,
      "catastrophic forgetting": 1415,
      "achieve higher": 250,
      "text llm": 11404,
      "paper explored": 8228,
      "representation ability": 9726,
      "ability different": 139,
      "powerful large": 8658,
      "text prompt": 11409,
      "prompt dataset": 8987,
      "high quality": 4875,
      "different ways": 2913,
      "ways data": 12177,
      "curriculum learning": 2367,
      "experiments ablation": 3762,
      "augmentation methods": 918,
      "methods data": 6980,
      "multiplechoice questions": 7665,
      "explanations generated": 3823,
      "generated questions": 4484,
      "crucial step": 2332,
      "related concepts": 9599,
      "ensure quality": 3417,
      "gpt4 exhibited": 4695,
      "represent significant": 9724,
      "enhancing capabilities": 3405,
      "assistant large": 857,
      "demonstrated great": 2677,
      "framework named": 4270,
      "pretraining supervised": 8795,
      "pretraining dataset": 8775,
      "dataset pretraining": 2504,
      "dataset tailored": 2507,
      "tailored distinct": 11096,
      "instruction pairs": 5411,
      "llms augmented": 6462,
      "additional modules": 379,
      "tasks especially": 11194,
      "especially text": 3474,
      "text generative": 11398,
      "generative tasks": 4622,
      "leads high": 6173,
      "cost model": 2258,
      "online deployment": 8028,
      "address multiple": 407,
      "multiple nlp": 7658,
      "tasks order": 11251,
      "applications specifically": 712,
      "model capture": 7121,
      "twostage training": 11713,
      "training method": 11567,
      "tasks proposed": 11261,
      "performance based": 8364,
      "models various": 7558,
      "opensource language": 8058,
      "methods require": 7008,
      "specifically consider": 10623,
      "different data": 2878,
      "data sources": 2461,
      "leverage complementary": 6274,
      "costly human": 2264,
      "experiments standard": 3802,
      "standard benchmarks": 10683,
      "models use": 7554,
      "generalization performance": 4429,
      "finally conduct": 4072,
      "effectiveness robustness": 3177,
      "utilizing large": 11999,
      "strategies construct": 10776,
      "finetuning datasets": 4124,
      "datasets chinese": 2519,
      "finetune llms": 4106,
      "reasoning capability": 9414,
      "augment llms": 912,
      "objective subjective": 7975,
      "subjective dimensions": 10881,
      "quantitative qualitative": 9250,
      "qualitative results": 9234,
      "users diverse": 11924,
      "resources available": 9829,
      "llms presents": 6616,
      "lack domain": 5742,
      "domain expertise": 3053,
      "approach captures": 733,
      "nested structure": 7783,
      "pipeline achieves": 8503,
      "review essential": 9973,
      "current methods": 2356,
      "shown promise": 10383,
      "revolutionizing natural": 9990,
      "issues paper": 5595,
      "approach leverages": 750,
      "knowledge enhance": 5666,
      "use natural": 11892,
      "practical implementation": 8666,
      "models employ": 7311,
      "recommendations future": 9523,
      "reasoning path": 9432,
      "retrievalaugmented large": 9950,
      "extraordinary performance": 3937,
      "tasks question": 11264,
      "qa tasks": 9229,
      "knowledge existing": 5669,
      "generate reasoning": 4466,
      "approaches inherent": 769,
      "low quality": 6732,
      "quality generated": 9242,
      "llm easily": 6405,
      "interaction ir": 5490,
      "approach enables": 739,
      "selects appropriate": 10212,
      "answering datasets": 666,
      "datasets outperform": 2540,
      "answer accuracy": 656,
      "ai ability": 509,
      "leveraging diverse": 6295,
      "compared llms": 1850,
      "zeroshot fewshot": 12314,
      "llms incontext": 6564,
      "taskspecific finetuning": 11309,
      "errors llm": 3469,
      "llm predictions": 6423,
      "extent llms": 3913,
      "recognition capabilities": 9511,
      "tuning present": 11702,
      "present new": 8718,
      "carefully curated": 1401,
      "exam questions": 3611,
      "shows strong": 10395,
      "strong capabilities": 10808,
      "models gpt35": 7348,
      "nlp benchmarks": 7862,
      "using small": 11974,
      "practical perspective": 8669,
      "capability understanding": 1374,
      "release model": 9622,
      "domains remains": 3062,
      "paper evaluates": 8224,
      "models specialized": 7529,
      "certain domains": 1443,
      "processing ensure": 8902,
      "vertical domains": 12124,
      "learning research": 6239,
      "semantic communications": 10230,
      "models fms": 7334,
      "models increasingly": 7363,
      "research explored": 9789,
      "semantic extraction": 10232,
      "different levels": 2887,
      "computation memory": 1965,
      "study focuses": 10854,
      "universal knowledge": 11822,
      "study highlights": 10855,
      "comprehensive benchmark": 1927,
      "benchmark evaluating": 1117,
      "comprehensively evaluate": 1951,
      "hallucination detection": 4790,
      "domains llms": 3060,
      "discuss key": 2974,
      "analyze current": 632,
      "point future": 8555,
      "prompts code": 9030,
      "significantly advanced": 10423,
      "llms use": 6675,
      "community remains": 1812,
      "usefulness hand": 11907,
      "timeconsuming costly": 11477,
      "issue introduce": 5587,
      "designed enhance": 2762,
      "comprises components": 1957,
      "corpora demonstrate": 2231,
      "framework generate": 4256,
      "encoder large": 3324,
      "model series": 7215,
      "series llms": 10294,
      "indomain training": 5255,
      "enable llms": 3308,
      "learning despite": 6204,
      "fewshot ability": 4029,
      "llms standard": 6661,
      "paper raise": 8264,
      "instead using": 5392,
      "asks llms": 825,
      "llms create": 6490,
      "final output": 4067,
      "flexible framework": 4167,
      "icl chainofthought": 5035,
      "arithmetic reasoning": 805,
      "generation benchmarks": 4519,
      "learning strategy": 6243,
      "performance paper": 8420,
      "knowledge learned": 5691,
      "llms factual": 6535,
      "llms output": 6602,
      "output generation": 8166,
      "llms fewshot": 6536,
      "learning scenarios": 6241,
      "scenarios introduce": 10128,
      "framework improve": 4260,
      "proposed approaches": 9114,
      "autoregressive llms": 967,
      "gptstyle models": 4709,
      "answering tasks": 676,
      "outperforms strong": 8162,
      "context modeling": 2144,
      "reasoning llms": 9429,
      "wide spectrum": 12213,
      "social network": 10527,
      "network services": 7791,
      "contexts using": 2154,
      "using natural": 11960,
      "context reasoning": 2147,
      "finetuning model": 4135,
      "users requests": 11931,
      "users request": 11930,
      "stage does": 10676,
      "data help": 2419,
      "help llms": 4848,
      "llms reasoning": 6627,
      "reasoning large": 9424,
      "foundation language": 4219,
      "language technologies": 5980,
      "great success": 4751,
      "data training": 2465,
      "training llms": 11566,
      "impact code": 5076,
      "different stages": 2908,
      "results provide": 9926,
      "text significantly": 11414,
      "enhance llms": 3389,
      "general reasoning": 4415,
      "mixing strategy": 7081,
      "strategy code": 10786,
      "deepen understanding": 2610,
      "llms regarding": 6630,
      "source code": 10572,
      "satisfy users": 10097,
      "users information": 11926,
      "tasks important": 11220,
      "responses lack": 9856,
      "effectiveness llms": 3172,
      "issues present": 5596,
      "learning contrastive": 6202,
      "suit needs": 10950,
      "specifically construct": 10624,
      "reward function": 9995,
      "teach llms": 11316,
      "conducted experiments": 2042,
      "experiments typical": 3806,
      "typical applications": 11721,
      "consistency llms": 2091,
      "llms outputs": 6603,
      "prompts vulnerability": 9042,
      "vulnerability detection": 12168,
      "approaches lack": 770,
      "optimization llms": 8094,
      "semantic space": 10243,
      "technique solve": 11332,
      "attack strategies": 876,
      "outperforming existing": 8143,
      "foundational framework": 4231,
      "concerns potential": 2000,
      "llms requires": 6634,
      "dialogue dataset": 2861,
      "value alignment": 12023,
      "alignment llms": 578,
      "evaluate representative": 3515,
      "representative llms": 9737,
      "high level": 4872,
      "suggest llms": 10942,
      "based provided": 1058,
      "indicating potential": 5246,
      "nlp large": 7867,
      "performance limited": 8408,
      "input length": 5350,
      "pilot experiments": 8498,
      "improved performance": 5140,
      "insight propose": 5362,
      "relative improvement": 9612,
      "llms datasets": 6492,
      "achieve competitive": 244,
      "competitive results": 1879,
      "assessing quality": 843,
      "answers generated": 679,
      "generated ai": 4472,
      "used evaluate": 11899,
      "candidate answers": 1334,
      "mimic human": 7043,
      "manner specifically": 6816,
      "llms conducted": 6485,
      "conducted extensive": 2043,
      "experiments diverse": 3779,
      "rates models": 9349,
      "evaluations indicate": 3587,
      "diverse applications": 3012,
      "human labor": 4977,
      "knowledge design": 5660,
      "search space": 10180,
      "space search": 10581,
      "strategy paper": 10788,
      "gpt4 based": 4692,
      "design new": 2751,
      "gpt4 generative": 4697,
      "generates accurate": 4491,
      "natural language inference": 7714,
      "natural language understanding": 7739,
      "obtains new stateoftheart": 8001,
      "multilingual language models": 7618,
      "machine translation models": 6759,
      "language models propose": 5936,
      "radford et al": 9305,
      "et al 2018": 3490,
      "model experimental results": 7143,
      "experimental results model": 3749,
      "story generation generating": 10768,
      "language generation models": 5765,
      "pretrained language model": 8747,
      "language model gpt2": 5788,
      "pretraining experimental results": 8779,
      "method large language": 6957,
      "large language model": 6009,
      "significantly improves accuracy": 10436,
      "address problem propose": 410,
      "largescale language model": 6135,
      "terms automatic metrics": 11361,
      "automatic metrics human": 944,
      "metrics human evaluation": 7027,
      "emergence large language": 3252,
      "large language models": 6019,
      "language models llms": 5853,
      "deep reinforcement learning": 2607,
      "math word problems": 6862,
      "math word problem": 6861,
      "task natural language": 11135,
      "natural language processing": 7722,
      "based generative pretrained": 1035,
      "generative pretrained language": 4607,
      "demonstrate effectiveness proposed": 2654,
      "effectiveness proposed method": 3176,
      "proposed method benchmark": 9120,
      "results method consistently": 9912,
      "method consistently outperforms": 6945,
      "generative pretrained models": 4612,
      "pretrained language models": 8749,
      "language models gpt": 5838,
      "generative pretrained model": 4611,
      "conduct extensive experiments": 2030,
      "outperforms baseline models": 8147,
      "ablation studies conducted": 170,
      "table question answering": 11075,
      "based natural language": 1050,
      "autoregressive language models": 966,
      "stateoftheart results various": 10729,
      "large foundation models": 6007,
      "new stateoftheart results": 7841,
      "models end propose": 7315,
      "175 billion parameters": 30,
      "automatic speech recognition": 949,
      "masked language modeling": 6839,
      "word error rate": 12240,
      "release code model": 9619,
      "language models incontext": 5841,
      "models incontext learning": 7361,
      "explored recent years": 3854,
      "success natural language": 10921,
      "language models gpt2": 5839,
      "language model pretrained": 5798,
      "tasks unified texttotext": 11296,
      "unified texttotext format": 11806,
      "training objectives different": 11575,
      "language models bert": 5814,
      "language models used": 5951,
      "language models multiple": 5922,
      "models multiple tasks": 7470,
      "tasks large language": 11236,
      "language models achieved": 5810,
      "models achieved impressive": 7255,
      "language model external": 5785,
      "capabilities remains unclear": 1358,
      "language models perform": 5931,
      "tasks work introduce": 11302,
      "model best knowledge": 7113,
      "language model demonstrate": 5783,
      "performance wide range": 8456,
      "shows significant improvement": 10394,
      "data scarcity problem": 2457,
      "lack largescale highquality": 5746,
      "overcome limitation propose": 8179,
      "text data specifically": 11389,
      "facilitating future research": 3961,
      "future research field": 4356,
      "information large language": 5303,
      "graph neural networks": 4733,
      "neural networks gnns": 7804,
      "training large language": 11562,
      "paper propose efficient": 8253,
      "propose efficient effective": 9065,
      "effectiveness proposed approach": 3175,
      "numerous natural language": 7963,
      "natural language tasks": 7737,
      "language processing nlp": 5964,
      "processing nlp demonstrate": 8906,
      "success large language": 10915,
      "language models llm": 5852,
      "extensive experimental results": 3891,
      "experimental results public": 3756,
      "results public datasets": 9928,
      "performance stateoftheart approaches": 8433,
      "promising research direction": 8974,
      "natural language prompts": 7734,
      "language models limited": 5851,
      "address issue propose": 397,
      "preliminary evaluation chatgpt": 8707,
      "minor performance differences": 7058,
      "evaluating number benchmark": 3532,
      "number benchmark test": 7950,
      "benchmark test sets": 1132,
      "improves translation performance": 5154,
      "using publicly available": 11966,
      "nonlatin script languages": 7897,
      "generate multimodal content": 4457,
      "access external knowledge": 203,
      "external knowledge base": 3916,
      "llm improve performance": 6416,
      "language model based": 5780,
      "language model pretraining": 5801,
      "language understanding generation": 5985,
      "understanding generation tasks": 11773,
      "largescale pretrained language": 6141,
      "comparative study chatgpt": 1833,
      "chatgpt finetuned bert": 1563,
      "prior studies shown": 8838,
      "studies shown chatgpt": 10846,
      "understanding ability chatgpt": 11763,
      "falls short handling": 3993,
      "tasks large margin": 11239,
      "achieves comparable performance": 284,
      "existing large language": 3693,
      "reinforcement learning rl": 9596,
      "approach significantly improves": 763,
      "language understanding tasks": 5989,
      "demonstrated impressive performance": 2681,
      "impressive performance various": 5113,
      "performance various natural": 8448,
      "various natural language": 12080,
      "processing nlp tasks": 8907,
      "understanding reasoning capabilities": 11782,
      "language understanding nlu": 5987,
      "understanding nlu tasks": 11779,
      "language inference sentiment": 5769,
      "sentiment analysis tasks": 10271,
      "reinforcement learning human": 9593,
      "learning human feedback": 6215,
      "human feedback rlhf": 4970,
      "policy search problem": 8563,
      "problem reinforcement learning": 8870,
      "leverages large language": 6285,
      "prompting methods generate": 9020,
      "significantly outperform stateoftheart": 10443,
      "outperform stateoftheart baselines": 8138,
      "potential large language": 8629,
      "including natural language": 5190,
      "language processing computer": 5961,
      "processing computer vision": 8901,
      "language model gpt4": 5789,
      "diffusion models introducing": 2929,
      "including text images": 5196,
      "prompting large language": 9013,
      "language models large": 5844,
      "models large language": 7373,
      "language models demonstrated": 5821,
      "based observation propose": 1053,
      "observation propose novel": 7984,
      "various downstream tasks": 12064,
      "incontext learning performance": 5209,
      "grammatical error correction": 4725,
      "terms automatic evaluation": 11359,
      "automatic evaluation metrics": 937,
      "human evaluation quantitatively": 4964,
      "results demonstrate chatgpt": 9892,
      "artificial intelligence ai": 817,
      "chatgpt large language": 1577,
      "chat generative pretrained": 1525,
      "generative pretrained transformer": 4613,
      "language models empirical": 5827,
      "models empirical study": 7309,
      "performance large language": 8404,
      "language models based": 5813,
      "based instruction tuning": 1041,
      "instruction tuning different": 5417,
      "potential future research": 8624,
      "future research directions": 4355,
      "furthermore propose novel": 4337,
      "propose novel twostep": 9095,
      "problem large language": 8863,
      "models llms chatgpt": 7394,
      "llms chatgpt gpt4": 6475,
      "llms remains unclear": 6632,
      "problem paper propose": 8867,
      "remarkable potential various": 9683,
      "error correction gec": 3462,
      "using incontext learning": 11949,
      "experimental results human": 3745,
      "human evaluations demonstrate": 4967,
      "parameterefficient finetuning large": 8286,
      "finetuning large language": 4129,
      "models success large": 7536,
      "models llms like": 7428,
      "llms like gpt3": 6586,
      "parameterefficient finetuning peft": 8287,
      "comparable better performance": 1818,
      "llms different tasks": 6507,
      "math reasoning datasets": 6859,
      "datasets results demonstrate": 2548,
      "framework finetuning large": 4253,
      "llms downstream tasks": 6510,
      "systems large language": 11061,
      "solving math word": 10560,
      "ability large language": 150,
      "language models work": 5954,
      "models work propose": 7561,
      "models including gpt4": 7359,
      "provide detailed analysis": 9154,
      "power large language": 8646,
      "cell type annotation": 1437,
      "challenging task requires": 1506,
      "language models chatgpt": 5816,
      "ability chatgpt gpt4": 135,
      "pretrained transformer gpt4": 8768,
      "supervised finetuning sft": 10987,
      "proximal policy optimization": 9191,
      "policy optimization ppo": 8561,
      "propose novel learning": 9089,
      "align language model": 568,
      "recently large language": 9500,
      "llms like chatgpt": 6581,
      "like chatgpt demonstrated": 6323,
      "chatgpt demonstrated remarkable": 1550,
      "demonstrated remarkable performance": 2689,
      "variety natural language": 12044,
      "language processing tasks": 5971,
      "remains explored paper": 9656,
      "artificial general intelligence": 814,
      "general intelligence agi": 4407,
      "improve model performance": 5131,
      "language models diffusion": 5824,
      "models diffusion models": 7304,
      "generated large language": 4480,
      "generative large language": 4598,
      "models publicly available": 7505,
      "pretrained large language": 8755,
      "language models exponential": 5832,
      "models exponential growth": 7327,
      "electronic health records": 3211,
      "poses significant challenge": 8592,
      "processing nlp techniques": 8908,
      "language models plms": 5933,
      "challenges future directions": 1482,
      "finally discuss existing": 4074,
      "discuss existing challenges": 2972,
      "field artificial intelligence": 4047,
      "generative pretrained transformers": 4616,
      "pretrained transformers gpt": 8770,
      "models llms demonstrated": 7400,
      "llms demonstrated remarkable": 6499,
      "information retrieval ir": 5314,
      "code reproduce results": 1733,
      "reproduce results available": 9747,
      "promising results generating": 8976,
      "achieving competitive performance": 314,
      "code models available": 1723,
      "empowering large language": 3302,
      "human evaluation results": 4965,
      "gpt4 automatic evaluation": 4691,
      "data public httpsgithubcomnlpxucanwizardlm": 2447,
      "language models like": 5848,
      "models like chatgpt": 7381,
      "task converts natural": 11121,
      "converts natural language": 2215,
      "models llms work": 7461,
      "work natural language": 12258,
      "natural language generation": 7712,
      "outperforms stateoftheart models": 8161,
      "demonstrates strong generalization": 2698,
      "capacity largescale language": 1384,
      "largescale language models": 6136,
      "address limitation propose": 403,
      "finetuning experimental results": 4126,
      "deep neural networks": 2604,
      "neural networks dnns": 7803,
      "quantitatively evaluate performance": 9254,
      "promising performance various": 8971,
      "performance various tasks": 8454,
      "tasks conduct extensive": 11182,
      "incontext learning icl": 5207,
      "chatgpt exhibits strong": 1560,
      "remains challenging task": 9652,
      "machine learning tasks": 6756,
      "machine learning ml": 6754,
      "aim bridge gap": 537,
      "using large language": 11952,
      "machine translation mt": 6760,
      "using deep learning": 11942,
      "models llms gpt3": 7424,
      "llms gpt3 chatgpt": 6552,
      "bridge gap llms": 1272,
      "network large language": 7789,
      "tasks target task": 11289,
      "achieves competitive performance": 287,
      "paper propose simple": 8258,
      "propose simple efficient": 9100,
      "simple efficient approach": 10463,
      "demonstrate superiority proposed": 2669,
      "instructions instruction tuning": 5437,
      "different model sizes": 2893,
      "facilitate future research": 3955,
      "future research release": 4357,
      "human quality evaluation": 4987,
      "knowledge large language": 5685,
      "language models lms": 5919,
      "empirical results demonstrate": 3279,
      "results demonstrate method": 9894,
      "method significantly improves": 6966,
      "general artificial intelligence": 4402,
      "reasoning ability crucial": 9404,
      "various nlp tasks": 12086,
      "unclear chatgpt performs": 11739,
      "paper conduct comprehensive": 8215,
      "conduct comprehensive evaluation": 2023,
      "approaches large language": 772,
      "varying difficulty levels": 12101,
      "chain thought cot": 1450,
      "comparable stateoftheart methods": 1827,
      "knowledge graph construction": 5674,
      "propose simple effective": 9099,
      "comprehensive experimental results": 1939,
      "code datasets available": 1713,
      "language models previous": 5935,
      "knowledgeintensive tasks paper": 5718,
      "tasks paper propose": 11253,
      "paper propose new": 8255,
      "propose new paradigm": 9083,
      "various language models": 12073,
      "commonsense reasoning tasks": 1804,
      "models llms exhibit": 7413,
      "blackbox language model": 1219,
      "experiments demonstrate effectiveness": 3773,
      "demonstrate effectiveness method": 2653,
      "data instruction tuning": 2425,
      "instruction tuning large": 5418,
      "tuning large language": 11694,
      "models llms gained": 7419,
      "adaptation large language": 355,
      "models llms downstream": 7406,
      "downstream tasks finetuning": 3083,
      "millions billions parameters": 7041,
      "performance specific task": 8430,
      "instruction tuning data": 5415,
      "powerful capabilities text": 8654,
      "paper presents novel": 8250,
      "novel transformer architecture": 7939,
      "experiments benchmark datasets": 3765,
      "results method outperforms": 9913,
      "method outperforms stateoftheart": 6960,
      "empower large language": 3294,
      "language model perform": 5797,
      "question answering large": 9271,
      "answering large language": 670,
      "language model llm": 5793,
      "model llm gained": 7179,
      "attracted widespread attention": 902,
      "question answering qa": 9273,
      "addition propose new": 377,
      "achieve better performance": 241,
      "extensive experiments demonstrate": 3899,
      "experiments demonstrate approach": 3772,
      "llms like gpt": 6585,
      "language processing paper": 5968,
      "models llms exhibited": 7415,
      "emergent incontext learning": 3259,
      "decision making process": 2574,
      "downstream tasks extensive": 3081,
      "tasks extensive experiments": 11210,
      "datasets method achieves": 2538,
      "multitask instruction tuning": 7677,
      "broad range tasks": 1290,
      "tasks conduct experiments": 11181,
      "exploring potential chatgpt": 3862,
      "findings demonstrate feasibility": 4088,
      "smaller parameter size": 10517,
      "model extensive experiments": 7148,
      "significantly outperforms stateoftheart": 10447,
      "active learning mechanism": 337,
      "address limitations present": 405,
      "language models conduct": 5819,
      "llms exhibited remarkable": 6527,
      "remarkable performance various": 9678,
      "nlp tasks finetuning": 7875,
      "expensive timeconsuming obtain": 3729,
      "paper introduces novel": 8237,
      "using reinforcement learning": 11968,
      "model publicly available": 7207,
      "performance range natural": 8425,
      "range natural language": 9319,
      "small language models": 10508,
      "language models slms": 5947,
      "named entity recognition": 7694,
      "entity recognition relation": 3429,
      "recognition relation extraction": 9514,
      "llms generate reasonable": 6546,
      "tasks including context": 11223,
      "understanding code generation": 11767,
      "results gpt4 achieve": 9905,
      "achieve comparable performance": 243,
      "report large language": 9715,
      "language generation nlg": 5766,
      "address issue paper": 396,
      "leverage large language": 6277,
      "machine translation text": 6762,
      "translation text summarization": 11644,
      "demonstrate method effectively": 2661,
      "method effectively improve": 6949,
      "answer question paper": 660,
      "datasets demonstrate effectiveness": 2524,
      "including large language": 5182,
      "remains largely unexplored": 9658,
      "experiments demonstrate proposed": 3776,
      "significantly outperforms existing": 10445,
      "outperforms existing methods": 8153,
      "issue large language": 5589,
      "large language modelsllms": 6104,
      "research large language": 9797,
      "current research focuses": 2360,
      "models llms remarkable": 7443,
      "response user input": 9848,
      "language model small": 5802,
      "natural language text": 7738,
      "limitations propose novel": 6346,
      "methods experimental results": 6985,
      "recent success large": 9479,
      "questions large language": 9295,
      "paper explore potential": 8227,
      "experimental results real": 3758,
      "play critical role": 8527,
      "avoiding potential data": 996,
      "potential data leakage": 8622,
      "smaller language models": 10514,
      "training experimental results": 11553,
      "experimental results demonstrate": 3740,
      "models like gpt35": 7383,
      "propose new benchmark": 9082,
      "modern large language": 7567,
      "code benchmark publicly": 1702,
      "like chatgpt shown": 6328,
      "abilities different models": 123,
      "evaluating large language": 3527,
      "language models chinese": 5817,
      "recent advancements large": 9456,
      "advancements large language": 463,
      "yielded remarkable performance": 12302,
      "performance natural language": 8413,
      "paper propose novel": 8256,
      "sentiment analysis dataset": 10270,
      "existing opensource llms": 3706,
      "reasoning ability large": 9405,
      "intermediate reasoning steps": 5515,
      "rapid advancement large": 9334,
      "advancement large language": 454,
      "models llms led": 7427,
      "pretrained models help": 8765,
      "achieves stateoftheart performance": 302,
      "different pretrained models": 2900,
      "general language understanding": 4414,
      "era large language": 3453,
      "metrics observe necessity": 7031,
      "multimodal large language": 7633,
      "code data available": 1706,
      "legal large language": 6259,
      "knowledge bases large": 5656,
      "bases large language": 1082,
      "models llms shown": 7450,
      "llms shown potential": 6651,
      "language models crucial": 5820,
      "generative language models": 4596,
      "chinese large language": 1629,
      "align human values": 566,
      "exhibited remarkable abilities": 3665,
      "provide preliminary evaluation": 9161,
      "large pretrained models": 6117,
      "propose novel framework": 9087,
      "computer vision natural": 1983,
      "vision natural language": 12142,
      "language models significant": 5945,
      "given natural language": 4635,
      "natural language questions": 7735,
      "learning large language": 6222,
      "models llms emerged": 7409,
      "baseline models comprehensive": 1070,
      "models llms recently": 7441,
      "llms recently demonstrated": 6629,
      "demonstrated remarkable capabilities": 2686,
      "remarkable capabilities natural": 9670,
      "capabilities natural language": 1351,
      "comprehensive overview recent": 1945,
      "models llms generate": 7423,
      "model performance compared": 7198,
      "aspect natural language": 828,
      "bridge gap propose": 1274,
      "datasets pretrained models": 2543,
      "teaching large language": 11322,
      "chain thought prompting": 1451,
      "knowledge reasoning abilities": 5699,
      "alignment human values": 577,
      "conduct human evaluation": 2032,
      "automatic human evaluation": 939,
      "llms demonstrated exceptional": 6496,
      "language models retrieval": 5942,
      "opendomain question answering": 8046,
      "chatgpt demonstrated impressive": 1549,
      "wide range tasks": 12211,
      "existing evaluation methods": 3686,
      "overcoming limitations previous": 8184,
      "solve complex problems": 10550,
      "editing large language": 3128,
      "language model large": 5790,
      "model large language": 7171,
      "models llms showcased": 7448,
      "llms showcased remarkable": 6647,
      "helps llms better": 4857,
      "llms conduct extensive": 6483,
      "tasks experimental results": 11204,
      "experimental results indicate": 3746,
      "plays vital role": 8539,
      "diverse natural language": 3022,
      "bert roberta gpt2": 1158,
      "understanding large language": 11775,
      "llms shown impressive": 6650,
      "nlp tasks llms": 7877,
      "tasks event extraction": 11199,
      "bilingual english chinese": 1198,
      "data model size": 2435,
      "significantly boost performance": 10426,
      "achieve best results": 239,
      "benchmarks recent years": 1144,
      "incontext learning chainofthought": 5206,
      "paper comprehensively investigate": 8213,
      "users address issues": 11923,
      "knowledge graphs kg": 5676,
      "rich semantic information": 10007,
      "existing methods usually": 3700,
      "code publicly available": 1730,
      "generation large language": 4542,
      "sequence generation tasks": 10281,
      "risk instruction forgetting": 10019,
      "mitigate issue propose": 7070,
      "significantly improves zeroshot": 10439,
      "improves zeroshot performance": 5156,
      "data models trained": 2439,
      "llms exhibit impressive": 6525,
      "incontext learning abilities": 5205,
      "language models recent": 5938,
      "generate instruction data": 4452,
      "machine learning models": 6755,
      "language models natural": 5924,
      "eliminates need manual": 3222,
      "achieves stateoftheart results": 305,
      "despite great advance": 2781,
      "language models mllms": 5921,
      "cost paper propose": 2261,
      "despite superior performance": 2790,
      "superior performance large": 10978,
      "language models generate": 5837,
      "generate natural language": 4460,
      "natural language instructions": 7716,
      "knowledge language models": 5683,
      "effective efficient compared": 3139,
      "language models despite": 5823,
      "augmenting large language": 923,
      "extensive experiments conducted": 3898,
      "long context understanding": 6705,
      "models llms demonstrate": 7398,
      "llms demonstrate impressive": 6494,
      "impressive performance language": 5112,
      "works proposed methods": 12276,
      "tasks code completion": 11173,
      "evaluation llms comprehensive": 3564,
      "comprehensive evaluation llms": 1935,
      "recent advances large": 9460,
      "advances large language": 468,
      "language models offers": 5926,
      "application large language": 697,
      "language models field": 5835,
      "mental health support": 6922,
      "empowered large language": 3297,
      "evaluation large language": 3561,
      "emerged new paradigm": 3241,
      "address challenge paper": 388,
      "based findings propose": 1032,
      "language model multimodal": 5796,
      "language model mllm": 5795,
      "individual pretrained models": 5252,
      "address issues propose": 400,
      "spoken language understanding": 10663,
      "gaining increasing attention": 4369,
      "models llms traditional": 7458,
      "fewshot incontext learning": 4032,
      "shown promising results": 10385,
      "framework based chatgpt": 4239,
      "knowledge largescale corpora": 5690,
      "grounding large language": 4761,
      "models llms powerful": 7437,
      "performance challenging tasks": 8368,
      "previous studies typically": 8819,
      "models llms achieved": 7389,
      "llms achieved remarkable": 6446,
      "achieved remarkable success": 271,
      "remarkable success nlp": 9687,
      "multimodal tasks despite": 7643,
      "high computational cost": 4869,
      "achieves performance comparable": 295,
      "recent studies explored": 9477,
      "language models static": 5948,
      "various realworld tasks": 12091,
      "evaluate ability llms": 3501,
      "issues propose novel": 5598,
      "llmbased autonomous agents": 6434,
      "use large language": 11888,
      "models llms introduce": 7426,
      "gradient computation parameter": 4715,
      "results evaluated gpt4": 9900,
      "attack success rate": 878,
      "processing tasks related": 8913,
      "tasks including contextual": 11224,
      "chaining large language": 1454,
      "stateoftheart large language": 10712,
      "connecting large language": 2071,
      "models llms excel": 7412,
      "carefully crafted prompts": 1400,
      "remarkable capabilities various": 9673,
      "capabilities various tasks": 1364,
      "various tasks including": 12095,
      "including named entity": 5187,
      "llms achieved significant": 6447,
      "nlp downstream tasks": 7866,
      "furthermore evaluate performance": 4331,
      "demonstrates superior performance": 2700,
      "outperforms existing models": 8154,
      "convolutional neural network": 2220,
      "llms generate summaries": 6548,
      "ability existing models": 142,
      "different methods including": 2891,
      "models llms revolutionized": 7444,
      "llms revolutionized natural": 6639,
      "revolutionized natural language": 9986,
      "powerful large language": 8659,
      "different ways data": 2914,
      "ways data augmentation": 12178,
      "experiments ablation studies": 3763,
      "data augmentation methods": 2387,
      "capabilities large language": 1343,
      "assistant large language": 858,
      "llms demonstrated great": 6497,
      "demonstrated great potential": 2678,
      "pretraining supervised finetuning": 8796,
      "language models better": 5815,
      "nlp tasks especially": 7873,
      "tasks especially text": 11195,
      "cost model training": 2259,
      "multiple nlp tasks": 7659,
      "language models various": 5952,
      "opensource language models": 8059,
      "models like llama": 7384,
      "present novel framework": 8720,
      "extensive experiments standard": 3906,
      "code data models": 1707,
      "data models publicly": 2437,
      "utilizing large language": 12000,
      "models llms provide": 7440,
      "objective subjective dimensions": 7976,
      "quantitative qualitative results": 9251,
      "models llms presents": 7439,
      "process paper introduces": 8894,
      "llms shown promise": 6652,
      "revolutionizing natural language": 9991,
      "use natural language": 11893,
      "language models employ": 5829,
      "recommendations future research": 9524,
      "retrievalaugmented large language": 9951,
      "question answering datasets": 9268,
      "datasets outperform stateoftheart": 2541,
      "llms incontext learning": 6565,
      "instruction tuning present": 5421,
      "large models gpt4": 6110,
      "exceptional capabilities various": 3635,
      "capabilities various domains": 1361,
      "various domains remains": 12060,
      "existing large models": 3694,
      "foundation models fms": 4224,
      "previous research explored": 8812,
      "research explored use": 9790,
      "comprehensive benchmark evaluating": 1928,
      "various domains llms": 12059,
      "models llms use": 7459,
      "address issue introduce": 395,
      "encoder large language": 3325,
      "icl chainofthought cot": 5036,
      "code generation benchmarks": 1715,
      "achieves comparable results": 285,
      "data code available": 2393,
      "fewshot learning scenarios": 4034,
      "question answering tasks": 9275,
      "outperforms strong baselines": 8163,
      "using natural language": 11961,
      "reasoning large language": 9425,
      "impact code data": 5077,
      "source code model": 10574,
      "code model parameters": 1721,
      "users information needs": 11927,
      "address issues present": 399,
      "llms generate responses": 6547,
      "effectively improve performance": 3156,
      "improve performance llms": 5133,
      "prompts vulnerability detection": 9043,
      "evaluate representative llms": 3516,
      "building insight propose": 1312,
      "conducted extensive experiments": 2044,
      "search space search": 10181,
      "model experimental results model": 7144,
      "pretrained language model gpt2": 8748,
      "terms automatic metrics human": 11362,
      "automatic metrics human evaluation": 945,
      "emergence large language models": 3253,
      "large language models llms": 6047,
      "task natural language processing": 11136,
      "based generative pretrained language": 1036,
      "generative pretrained language model": 4608,
      "demonstrate effectiveness proposed method": 2656,
      "generative pretrained language models": 4609,
      "language models incontext learning": 5842,
      "success natural language processing": 10922,
      "tasks unified texttotext format": 11297,
      "pretrained language models bert": 8751,
      "language models multiple tasks": 5923,
      "tasks large language models": 11237,
      "large language models achieved": 6020,
      "information large language models": 5304,
      "graph neural networks gnns": 4734,
      "training large language models": 11563,
      "natural language processing nlp": 7726,
      "language processing nlp demonstrate": 5965,
      "success large language models": 10916,
      "large language models llm": 6046,
      "experimental results public datasets": 3757,
      "large language models limited": 6045,
      "evaluating number benchmark test": 3533,
      "number benchmark test sets": 7951,
      "largescale pretrained language model": 6142,
      "prior studies shown chatgpt": 8839,
      "demonstrated impressive performance various": 2682,
      "impressive performance various natural": 5114,
      "performance various natural language": 8449,
      "various natural language processing": 12081,
      "language processing nlp tasks": 5966,
      "natural language understanding nlu": 7742,
      "language understanding nlu tasks": 5988,
      "natural language inference sentiment": 7715,
      "reinforcement learning human feedback": 9594,
      "learning human feedback rlhf": 6216,
      "leverages large language models": 6287,
      "potential large language models": 8630,
      "natural language processing computer": 7723,
      "language processing computer vision": 5962,
      "prompting large language models": 9014,
      "large language models large": 6043,
      "language models large language": 5845,
      "models large language models": 7374,
      "large language models demonstrated": 6028,
      "terms automatic evaluation metrics": 11360,
      "chat generative pretrained transformer": 1526,
      "large language models empirical": 6033,
      "language models empirical study": 5828,
      "performance large language models": 8405,
      "large language models based": 6022,
      "potential future research directions": 8625,
      "problem large language models": 8864,
      "language models llms chatgpt": 5859,
      "models llms chatgpt gpt4": 7395,
      "grammatical error correction gec": 4726,
      "finetuning large language models": 4130,
      "models success large language": 7537,
      "language models llms like": 5890,
      "models llms like gpt3": 7434,
      "systems large language models": 11062,
      "large language models perform": 6091,
      "ability large language models": 151,
      "large language models work": 6101,
      "language models work propose": 5955,
      "power large language models": 8647,
      "large language models chatgpt": 6023,
      "generative pretrained transformer gpt4": 4615,
      "proximal policy optimization ppo": 9192,
      "recently large language models": 9501,
      "models llms like chatgpt": 7429,
      "like chatgpt demonstrated remarkable": 6324,
      "chatgpt demonstrated remarkable performance": 1551,
      "variety natural language processing": 12045,
      "natural language processing tasks": 7733,
      "artificial general intelligence agi": 815,
      "large language models diffusion": 6030,
      "language models diffusion models": 5825,
      "generative large language models": 4599,
      "pretrained large language models": 8756,
      "large language models exponential": 6036,
      "language models exponential growth": 5833,
      "language processing nlp techniques": 5967,
      "pretrained language models plms": 8753,
      "finally discuss existing challenges": 4075,
      "field artificial intelligence ai": 4048,
      "generative pretrained transformers gpt": 4617,
      "language models llms demonstrated": 5865,
      "models llms demonstrated remarkable": 7403,
      "code reproduce results available": 1734,
      "empowering large language models": 3303,
      "language models like chatgpt": 5849,
      "task converts natural language": 11122,
      "language models llms work": 5918,
      "capacity largescale language models": 1385,
      "largescale language models llms": 6137,
      "deep neural networks dnns": 2605,
      "using large language models": 11955,
      "language models llms gpt3": 5886,
      "models llms gpt3 chatgpt": 7425,
      "network large language models": 7790,
      "propose simple efficient approach": 9101,
      "leverages large language model": 6286,
      "knowledge large language models": 5687,
      "approaches large language models": 773,
      "knowledgeintensive tasks paper propose": 5719,
      "demonstrate effectiveness proposed approach": 2655,
      "language models llms exhibit": 5876,
      "experiments demonstrate effectiveness method": 3774,
      "instruction tuning large language": 5419,
      "tuning large language models": 11696,
      "language models llms gained": 5882,
      "adaptation large language models": 356,
      "language models llms downstream": 5869,
      "models llms downstream tasks": 7407,
      "results method outperforms stateoftheart": 9914,
      "question answering large language": 9272,
      "large language model llm": 6015,
      "language model llm gained": 5794,
      "extensive experiments demonstrate approach": 3900,
      "models llms like gpt": 7433,
      "language models llms exhibited": 5878,
      "downstream tasks extensive experiments": 3082,
      "large language models conduct": 6026,
      "models llms exhibited remarkable": 7416,
      "remarkable performance various natural": 9679,
      "llms demonstrated remarkable performance": 6500,
      "performance range natural language": 8426,
      "natural language understanding generation": 7740,
      "language understanding generation tasks": 5986,
      "small language models slms": 10509,
      "named entity recognition relation": 7696,
      "entity recognition relation extraction": 3430,
      "report large language models": 9716,
      "natural language generation nlg": 7713,
      "leverage large language models": 6278,
      "machine translation text summarization": 6763,
      "extensive experiments demonstrate proposed": 3903,
      "issue large language models": 5590,
      "research large language models": 9798,
      "language models llms remarkable": 5902,
      "recent success large language": 9480,
      "questions large language models": 9296,
      "avoiding potential data leakage": 997,
      "language models like gpt35": 5850,
      "modern large language models": 7568,
      "llms like chatgpt shown": 6584,
      "evaluating large language models": 3528,
      "large language models chinese": 6024,
      "recent advancements large language": 9457,
      "advancements large language models": 464,
      "performance natural language processing": 8414,
      "reasoning ability large language": 9406,
      "rapid advancement large language": 9335,
      "advancement large language models": 455,
      "language models llms led": 5889,
      "era large language models": 3454,
      "multimodal large language model": 7634,
      "legal large language model": 6260,
      "knowledge bases large language": 5657,
      "bases large language models": 1083,
      "language models llms shown": 5908,
      "models llms shown potential": 7453,
      "large language models crucial": 6027,
      "extensive experiments demonstrate effectiveness": 3901,
      "chinese large language models": 1630,
      "provide preliminary evaluation chatgpt": 9162,
      "computer vision natural language": 1984,
      "vision natural language processing": 12143,
      "large language models significant": 6098,
      "learning large language models": 6223,
      "language models llms emerged": 5872,
      "language models llms recently": 5900,
      "models llms recently demonstrated": 7442,
      "demonstrated remarkable capabilities natural": 2687,
      "remarkable capabilities natural language": 9671,
      "capabilities natural language processing": 1352,
      "language models llms generate": 5885,
      "teaching large language models": 11323,
      "models llms demonstrated exceptional": 7401,
      "large language models retrieval": 6097,
      "large language model large": 6013,
      "language model large language": 5791,
      "model large language models": 7172,
      "language models llms showcased": 5906,
      "models llms showcased remarkable": 7449,
      "llms conduct extensive experiments": 6484,
      "diverse natural language processing": 3023,
      "understanding large language models": 11776,
      "models llms shown impressive": 7452,
      "generation large language models": 4543,
      "significantly improves zeroshot performance": 10440,
      "models llms exhibit impressive": 7414,
      "large language models recent": 6094,
      "large language models natural": 6088,
      "multimodal large language models": 7637,
      "large language models mllms": 6086,
      "despite superior performance large": 2791,
      "superior performance large language": 10979,
      "large language models generate": 6040,
      "augmenting large language models": 924,
      "language models llms demonstrate": 5863,
      "models llms demonstrate impressive": 7399,
      "recent advances large language": 9461,
      "advances large language models": 469,
      "application large language models": 698,
      "large language models field": 6039,
      "empowered large language models": 3299,
      "evaluation large language models": 3562,
      "large language model multimodal": 6018,
      "large language model mllm": 6017,
      "using large language model": 11953,
      "language models llms traditional": 5915,
      "language models llms powerful": 5896,
      "language models llms achieved": 5854,
      "models llms achieved remarkable": 7390,
      "achieved remarkable success nlp": 272,
      "address issues propose novel": 401,
      "use large language models": 11889,
      "language models llms introduce": 5888,
      "chaining large language models": 1455,
      "stateoftheart large language models": 10713,
      "connecting large language models": 2072,
      "language models llms excel": 5875,
      "paper propose novel framework": 8257,
      "including named entity recognition": 5188,
      "models llms achieved significant": 7391,
      "language models llms revolutionized": 5903,
      "models llms revolutionized natural": 7446,
      "llms revolutionized natural language": 6640,
      "revolutionized natural language processing": 9987,
      "different ways data augmentation": 2915,
      "capabilities large language models": 1344,
      "assistant large language model": 859,
      "nlp tasks especially text": 7874,
      "code data models publicly": 1708,
      "data models publicly available": 2438,
      "utilizing large language models": 12001,
      "language models llms provide": 5899,
      "language models llms presents": 5898,
      "models llms shown promise": 7454,
      "revolutionizing natural language processing": 9992,
      "various natural language tasks": 12084,
      "exceptional capabilities various domains": 3636,
      "including large language models": 5183,
      "language models llms use": 5916,
      "encoder large language model": 3326,
      "tasks experimental results demonstrate": 11205,
      "reasoning large language models": 9426,
      "source code model parameters": 10575,
      "large language model based": 6010,
      "terms automatic metrics human evaluation": 11363,
      "emergence large language models llms": 3254,
      "based generative pretrained language model": 1037,
      "natural language processing nlp demonstrate": 7727,
      "success large language models llm": 10917,
      "evaluating number benchmark test sets": 3534,
      "demonstrated impressive performance various natural": 2683,
      "impressive performance various natural language": 5115,
      "performance various natural language processing": 8450,
      "various natural language processing nlp": 12082,
      "natural language processing nlp tasks": 7728,
      "natural language understanding nlu tasks": 7743,
      "reinforcement learning human feedback rlhf": 9595,
      "natural language processing computer vision": 7724,
      "prompting large language models large": 9015,
      "large language models large language": 6044,
      "language models large language models": 5846,
      "large language models empirical study": 6034,
      "models large language models llms": 7375,
      "large language models llms chatgpt": 6050,
      "language models llms chatgpt gpt4": 5860,
      "models success large language models": 7538,
      "success large language models llms": 10918,
      "large language models llms like": 6069,
      "language models llms like gpt3": 5893,
      "large language models work propose": 6102,
      "recently large language models llms": 9502,
      "language models llms like chatgpt": 5891,
      "like chatgpt demonstrated remarkable performance": 6325,
      "variety natural language processing tasks": 12046,
      "large language models diffusion models": 6031,
      "large language models exponential growth": 6037,
      "natural language processing nlp techniques": 7729,
      "large language models llms demonstrated": 6052,
      "language models llms demonstrated remarkable": 5868,
      "training large language models llms": 11564,
      "large language models llms work": 6084,
      "large language models llms gpt3": 6066,
      "language models llms gpt3 chatgpt": 5887,
      "using large language models llms": 11957,
      "instruction tuning large language models": 5420,
      "large language models llms gained": 6063,
      "large language models llms downstream": 6053,
      "language models llms downstream tasks": 5870,
      "large language model llm gained": 6016,
      "language models llms like gpt": 5892,
      "large language models llms exhibited": 6060,
      "language models llms exhibited remarkable": 5879,
      "remarkable performance various natural language": 9680,
      "models llms demonstrated remarkable performance": 7404,
      "natural language understanding generation tasks": 7741,
      "named entity recognition relation extraction": 7697,
      "generative pretrained language models plms": 4610,
      "large language models llms remarkable": 6077,
      "recent success large language models": 9481,
      "modern large language models llms": 7569,
      "models llms like chatgpt shown": 7432,
      "recent advancements large language models": 9458,
      "advancements large language models llms": 465,
      "performance natural language processing tasks": 8415,
      "reasoning ability large language models": 9407,
      "ability large language models llms": 152,
      "rapid advancement large language models": 9336,
      "advancement large language models llms": 456,
      "large language models llms led": 6068,
      "multimodal large language model llm": 7635,
      "knowledge bases large language models": 5658,
      "bases large language models llms": 1084,
      "large language models llms shown": 6080,
      "language models llms shown potential": 5911,
      "large language models llms exhibit": 6059,
      "various natural language processing tasks": 12083,
      "computer vision natural language processing": 1985,
      "large language models llms emerged": 6055,
      "large language models llms recently": 6076,
      "language models llms recently demonstrated": 5901,
      "demonstrated remarkable capabilities natural language": 2688,
      "pretrained large language models llms": 8757,
      "large language models llms generate": 6065,
      "language models llms demonstrated exceptional": 5866,
      "large language model large language": 6014,
      "language model large language models": 5792,
      "model large language models llms": 7173,
      "large language models llms showcased": 6079,
      "language models llms showcased remarkable": 5907,
      "language models llms shown impressive": 5910,
      "power large language models llms": 8648,
      "generation large language models large": 4544,
      "language models llms exhibit impressive": 5877,
      "multimodal large language models mllms": 7638,
      "despite superior performance large language": 2792,
      "large language models llms demonstrate": 6051,
      "language models llms demonstrate impressive": 5864,
      "recent advances large language models": 9462,
      "multimodal large language model mllm": 7636,
      "large language models llms traditional": 6081,
      "large language models llms powerful": 6072,
      "large language models llms achieved": 6048,
      "language models llms achieved remarkable": 5855,
      "use large language models llms": 11890,
      "large language models llms introduce": 6067,
      "stateoftheart large language models llms": 10714,
      "large language models llms excel": 6058,
      "language models llms achieved significant": 5856,
      "large language models llms revolutionized": 6078,
      "language models llms revolutionized natural": 5905,
      "models llms revolutionized natural language": 7447,
      "llms revolutionized natural language processing": 6641,
      "code data models publicly available": 1709,
      "utilizing large language models llms": 12002,
      "large language models llms provide": 6075,
      "large language models llms presents": 6074,
      "language models llms shown promise": 5912,
      "performance various natural language tasks": 8451,
      "large language models llms use": 6082,
      "reasoning large language models llms": 9427,
      "captioning": 1388,
      "bertbased": 1159,
      "generators": 4624,
      "coco": 1698,
      "identical": 5041,
      "sound": 10570,
      "ending": 3358,
      "bbc": 1090,
      "classified": 1656,
      "assuming": 867,
      "invalid": 5557,
      "implicitly": 5095,
      "clip": 1677,
      "gpu": 4710,
      "head": 4832,
      "cv": 2375,
      "se": 10168,
      "competent": 1872,
      "singular": 10489,
      "normalization": 7902,
      "philosophy": 8489,
      "block": 1228,
      "combine": 1780,
      "imagenet": 5064,
      "traffic": 11525,
      "practices": 8675,
      "tracking": 11512,
      "5th": 84,
      "mrr": 7601,
      "visionandlanguage": 12145,
      "huggingface": 4947,
      "repository": 9721,
      "object": 7966,
      "scene": 10134,
      "abstractive": 184,
      "53": 80,
      "rouge2": 10060,
      "51": 77,
      "rougel": 10061,
      "locations": 6695,
      "bidirectional": 1193,
      "visionlanguage": 12146,
      "frameworks": 4279,
      "fidelity": 4043,
      "quantization": 9257,
      "formulate": 4208,
      "145": 22,
      "million": 7037,
      "driving": 3099,
      "piece": 8494,
      "spatial": 10594,
      "norms": 7904,
      "quantized": 9259,
      "standards": 10687,
      "confined": 2054,
      "motion": 7589,
      "gpt23": 4678,
      "trainingfree": 11589,
      "magic": 6767,
      "plugandplay": 8547,
      "offtheshelf": 8021,
      "involve": 5573,
      "operation": 8075,
      "computationally": 1976,
      "speedup": 10652,
      "frequency": 4285,
      "recover": 9533,
      "drop": 3100,
      "dramatically": 3085,
      "masks": 6843,
      "expect": 3724,
      "respect": 9831,
      "dalle": 2378,
      "video": 12128,
      "hierarchical": 4864,
      "write": 12284,
      "modal": 7093,
      "stronger": 10813,
      "tells": 11342,
      "pair": 8201,
      "presenting": 8727,
      "vectors": 12107,
      "altered": 603,
      "close": 1679,
      "benefiting": 1149,
      "bart": 1019,
      "edit": 3124,
      "languageimage": 5992,
      "exhaustive": 3655,
      "viewpoint": 12131,
      "shifts": 10348,
      "sacrifices": 10077,
      "weights": 12198,
      "contained": 2126,
      "kullbackleibler": 5725,
      "divergence": 3007,
      "kld": 5646,
      "id": 5037,
      "lines": 6366,
      "sparser": 10592,
      "suitable": 10952,
      "patches": 8322,
      "directional": 2940,
      "objects": 7981,
      "proximity": 9193,
      "detected": 2799,
      "webscale": 12190,
      "arises": 802,
      "branch": 1263,
      "complement": 1883,
      "forming": 4203,
      "tunes": 11687,
      "adhere": 425,
      "musical": 7690,
      "producing": 8920,
      "controllability": 2188,
      "medical": 6894,
      "dialog": 2857,
      "licensing": 6307,
      "deeplearning": 2614,
      "understandable": 11760,
      "attracting": 903,
      "interface": 5508,
      "competency": 1871,
      "sending": 10256,
      "opens": 8053,
      "narrowing": 7703,
      "transferring": 11601,
      "location": 6694,
      "prototype": 9137,
      "dimension": 2932,
      "uniformly": 11810,
      "reject": 9597,
      "academia": 189,
      "vlms": 12160,
      "featuring": 4017,
      "accountability": 223,
      "compress": 1953,
      "fed": 4018,
      "imperfect": 5085,
      "nextgeneration": 7854,
      "kinds": 5644,
      "gaps": 4387,
      "mean": 6877,
      "strongly": 10814,
      "probe": 8857,
      "accuracies": 229,
      "suitability": 10951,
      "lowlevel": 6739,
      "localization": 6689,
      "abundant": 187,
      "exploitation": 3830,
      "map": 6826,
      "simultaneous": 10480,
      "robots": 10040,
      "naturallanguage": 7750,
      "usages": 11883,
      "navigation": 7755,
      "entails": 3421,
      "intention": 5481,
      "finer": 4102,
      "engines": 3378,
      "supporting": 11002,
      "multiview": 7688,
      "scenes": 10137,
      "neglect": 7778,
      "weigh": 12195,
      "prototypes": 9138,
      "scoring": 10160,
      "crossmodel": 2321,
      "entail": 3420,
      "inferential": 5279,
      "mere": 6925,
      "patch": 8320,
      "deception": 2569,
      "surgery": 11010,
      "exponentially": 3865,
      "unidirectional": 11798,
      "robotic": 10038,
      "expands": 3721,
      "tokenizer": 11487,
      "publically": 9204,
      "holistic": 4929,
      "lowrank": 6741,
      "adaption": 363,
      "unexpected": 11793,
      "harder": 4808,
      "demo": 2641,
      "narrow": 7701,
      "dense": 2712,
      "tremendous": 11661,
      "indispensable": 5247,
      "thousands": 11467,
      "hours": 4943,
      "diagnose": 2851,
      "speed": 10651,
      "blip2": 1227,
      "rationales": 9355,
      "resourceintensive": 9826,
      "processor": 8914,
      "refer": 9552,
      "term": 11354,
      "acquiring": 323,
      "act": 325,
      "equipping": 3446,
      "exploits": 3833,
      "distills": 2997,
      "competing": 1873,
      "easier": 3112,
      "fulfilling": 4298,
      "preservation": 8735,
      "formation": 4198,
      "spatiotemporal": 10595,
      "inconsistent": 5202,
      "severe": 10325,
      "obviously": 8003,
      "affected": 488,
      "humanmachine": 5012,
      "undergone": 11747,
      "diagnoses": 2852,
      "ad": 346,
      "largest": 6147,
      "minigpt4": 7048,
      "unbiased": 11733,
      "transparency": 11649,
      "website": 12191,
      "accelerated": 196,
      "immense": 5072,
      "treating": 11654,
      "flexibly": 4168,
      "managed": 6806,
      "customization": 2370,
      "noteworthy": 7911,
      "60": 86,
      "hallucinate": 4788,
      "accessed": 204,
      "reviewed": 9975,
      "timedependent": 11481,
      "triplets": 11674,
      "frame": 4233,
      "attribute": 904,
      "decomposes": 2587,
      "fuse": 4339,
      "highfidelity": 4887,
      "communicate": 1806,
      "vivid": 12159,
      "preserve": 8736,
      "start": 10690,
      "refined": 9562,
      "repeated": 9701,
      "collections": 1775,
      "enriched": 3412,
      "activate": 332,
      "interference": 5510,
      "tree": 11657,
      "urgent": 11875,
      "decoderonly": 2582,
      "conditions": 2018,
      "failing": 3979,
      "sequential": 10290,
      "temperature": 11343,
      "opt175b": 8086,
      "85": 105,
      "mass": 6844,
      "gain": 4359,
      "134x": 18,
      "paving": 8331,
      "openvocabulary": 8068,
      "cold": 1765,
      "contents": 2138,
      "robotics": 10039,
      "launched": 6160,
      "showed": 10367,
      "finegained": 4099,
      "manipulations": 6814,
      "confirmed": 2056,
      "prohibitively": 8952,
      "excessive": 3643,
      "affordable": 491,
      "routing": 10066,
      "holds": 4927,
      "assistants": 860,
      "restricts": 9867,
      "websites": 12192,
      "ensures": 3418,
      "audio": 910,
      "creative": 2304,
      "exacerbates": 3606,
      "adversaries": 482,
      "evade": 3498,
      "subtly": 10908,
      "seek": 10194,
      "llava": 6400,
      "examination": 3612,
      "live": 6385,
      "layer": 6163,
      "bag": 1012,
      "aggregate": 502,
      "questionandanswer": 9279,
      "mode": 7097,
      "select": 10205,
      "tackles": 11088,
      "parse": 8303,
      "variable": 12029,
      "feedforward": 4024,
      "prohibitive": 8951,
      "inaccessible": 5165,
      "assemble": 832,
      "overhead": 8188,
      "obvious": 8002,
      "redundancy": 9550,
      "deploys": 2726,
      "box": 1258,
      "coordinates": 2224,
      "taskaware": 11153,
      "formatting": 4200,
      "upsurge": 11873,
      "supervising": 10992,
      "astonishing": 871,
      "counter": 2275,
      "redundant": 9551,
      "localizing": 6691,
      "easy": 3114,
      "ego4d": 3205,
      "attempts": 887,
      "reformulating": 9573,
      "45": 69,
      "bloomz": 1233,
      "ct": 2334,
      "bypassing": 1321,
      "chatglm6b": 1533,
      "competition": 1874,
      "clouds": 1690,
      "extensible": 3881,
      "threefold": 11469,
      "productivity": 8923,
      "programs": 8940,
      "regions": 9583,
      "leaving": 6253,
      "unaffected": 11729,
      "movie": 7597,
      "movies": 7598,
      "captivating": 1389,
      "transitions": 11631,
      "fitting": 4158,
      "platform": 8521,
      "47": 70,
      "openworld": 8071,
      "wrong": 12287,
      "larger": 6122,
      "date": 2556,
      "exceptionally": 3641,
      "03": 0,
      "wellestablished": 12201,
      "77": 97,
      "poems": 8551,
      "rising": 10016,
      "hotspot": 4937,
      "brain": 1260,
      "trace": 11509,
      "delineate": 2631,
      "akin": 555,
      "body": 1235,
      "links": 6376,
      "referring": 9558,
      "iv": 5606,
      "absent": 180,
      "plugin": 8548,
      "chains": 1465,
      "surfaces": 11007,
      "overly": 8192,
      "succinct": 10934,
      "array": 807,
      "methodologies": 6972,
      "ignore": 5050,
      "instruct": 5394,
      "compliance": 1910,
      "generalizes": 4432,
      "adjust": 428,
      "visualization": 12156,
      "designer": 2767,
      "beginners": 1093,
      "lighting": 6317,
      "hindering": 4918,
      "display": 2986,
      "meticulously": 7021,
      "choices": 1637,
      "lasting": 6150,
      "multiscale": 7670,
      "interested": 5504,
      "landmarks": 5755,
      "inherit": 5329,
      "raises": 9308,
      "computations": 1977,
      "onthefly": 8029,
      "gradientbased": 4717,
      "planting": 8520,
      "seed": 10193,
      "prominence": 8960,
      "confident": 2051,
      "recipe": 9508,
      "positions": 8596,
      "dependency": 2716,
      "abstraction": 183,
      "days": 2558,
      "64": 89,
      "v100": 12009,
      "gpus": 4712,
      "clicks": 1673,
      "comprehending": 1922,
      "synthesizes": 11041,
      "harnesses": 4818,
      "underscore": 11750,
      "marked": 6832,
      "surge": 11008,
      "predominantly": 8698,
      "hypothesize": 5033,
      "presence": 8712,
      "quantify": 9246,
      "added": 370,
      "kept": 5626,
      "confidential": 2052,
      "multidimensional": 7606,
      "outstanding": 8169,
      "textrich": 11422,
      "contributed": 2180,
      "native": 7706,
      "suffering": 10937,
      "false": 3994,
      "guides": 4785,
      "promotes": 8982,
      "practicality": 8672,
      "studying": 10872,
      "approximate": 780,
      "95": 116,
      "suggestions": 10947,
      "soon": 10562,
      "weaker": 12181,
      "simulating": 10475,
      "learner": 6180,
      "assesses": 840,
      "shot": 10358,
      "auc": 909,
      "941": 115,
      "concerned": 1997,
      "cooperate": 2222,
      "respective": 9834,
      "bootstrap": 1246,
      "unlocking": 11840,
      "gptassisted": 4703,
      "turns": 11706,
      "926": 112,
      "illustrating": 5057,
      "sense": 10257,
      "rigorously": 10014,
      "annotators": 652,
      "segmenting": 10203,
      "completely": 1889,
      "lexicons": 6304,
      "exceptions": 3642,
      "brief": 1279,
      "decouple": 2590,
      "enrich": 3411,
      "aggregation": 505,
      "distinguishes": 3002,
      "aggregated": 503,
      "cast": 1410,
      "sc": 10101,
      "latency": 6152,
      "mlm": 7089,
      "modelbased": 7241,
      "recovery": 9535,
      "promoted": 8981,
      "nonexistent": 7894,
      "pinpoint": 8499,
      "gating": 4395,
      "tedious": 11341,
      "11": 8,
      "instructionbased": 5425,
      "mixtureofexpert": 7083,
      "astounding": 872,
      "moe": 7582,
      "constrains": 2109,
      "unification": 11799,
      "quantifying": 9247,
      "attributing": 907,
      "categorize": 1417,
      "deficiencies": 2620,
      "everincreasing": 3595,
      "wild": 12231,
      "progressive": 8949,
      "soft": 10532,
      "enduring": 3366,
      "handles": 4804,
      "stimulated": 10756,
      "cohesive": 1763,
      "confronted": 2062,
      "graphic": 4736,
      "engagement": 3369,
      "overlooking": 8191,
      "initialization": 5334,
      "html": 4944,
      "cc": 1434,
      "completed": 1887,
      "transparent": 11650,
      "maps": 6828,
      "inferior": 5280,
      "supplementing": 10996,
      "flow": 4169,
      "extractors": 3934,
      "unsolved": 11851,
      "vulnerabilities": 12166,
      "surrogate": 11022,
      "mislead": 7061,
      "22": 44,
      "26": 49,
      "86": 106,
      "ernie": 3457,
      "bot": 1249,
      "defenses": 2619,
      "proliferation": 8959,
      "claims": 1643,
      "nsfw": 7944,
      "substituting": 10902,
      "sections": 10189,
      "assessments": 848,
      "hazards": 4830,
      "inadequacy": 5168,
      "golden": 4657,
      "499": 73,
      "opinion": 8078,
      "iqa": 5582,
      "imprecise": 5106,
      "untapped": 11861,
      "potentials": 8643,
      "moving": 7599,
      "speaking": 10597,
      "collaborative": 1768,
      "suggested": 10943,
      "cortex": 2256,
      "datacentric": 2472,
      "insightful": 5363,
      "garnered": 4388,
      "scarce": 10116,
      "laborious": 5737,
      "factchecking": 3965,
      "extant": 3875,
      "image captioning": 5060,
      "excellent results": 3629,
      "results downstream": 9898,
      "new method": 7826,
      "results benchmark": 9881,
      "generate new": 4461,
      "different words": 2916,
      "model used": 7236,
      "tasks natural": 11247,
      "proposes new": 9125,
      "words sentences": 12245,
      "search optimal": 10178,
      "method tackle": 6968,
      "main contribution": 6771,
      "propose method": 9078,
      "analysis visual": 629,
      "vision language": 12138,
      "text image": 11399,
      "implicitly model": 5096,
      "focus chinese": 4174,
      "model called": 7118,
      "contrastive learning": 2177,
      "adopts simple": 439,
      "building large": 1313,
      "negative samples": 7776,
      "gpu resources": 4711,
      "dataset called": 2481,
      "transformer transformer": 11615,
      "transformer models": 11614,
      "vision cv": 12137,
      "works focus": 12274,
      "transformer model": 11613,
      "rich information": 10004,
      "methods study": 7012,
      "improves stateoftheart": 5152,
      "benchmarks including": 1139,
      "based pretrained": 1054,
      "language transformers": 5983,
      "boosts performance": 1245,
      "language vision": 5990,
      "natural languagebased": 7745,
      "language description": 5761,
      "new challenge": 7811,
      "jointly train": 5611,
      "train stateoftheart": 11530,
      "vision models": 12140,
      "design training": 2757,
      "training strategy": 11586,
      "experiments verify": 3811,
      "verify effectiveness": 12114,
      "method achieved": 6935,
      "using language": 11950,
      "systems code": 11057,
      "learning pretrained": 6235,
      "sequencetosequence model": 10288,
      "answer questions": 661,
      "models need": 7474,
      "model t5": 7226,
      "task based": 11117,
      "including masked": 5185,
      "models multimodal": 7468,
      "abstractive summarization": 185,
      "extract essential": 3921,
      "essential information": 3477,
      "data internet": 2426,
      "recently largescale": 9503,
      "largescale generative": 6131,
      "shown effective": 10374,
      "research gap": 9794,
      "information paper": 5308,
      "present simple": 8722,
      "effective method": 3142,
      "task using": 11149,
      "original text": 8120,
      "results best": 9883,
      "best model": 1165,
      "surpasses prior": 11015,
      "conduct thorough": 2036,
      "thorough ablation": 11455,
      "effectiveness various": 3178,
      "fusion methods": 4343,
      "conventional methods": 2197,
      "generated samples": 4487,
      "visionlanguage pretraining": 12147,
      "greatly improved": 4754,
      "tasks largescale": 11240,
      "largescale pretraining": 6144,
      "texttoimage synthesis": 11431,
      "pretraining framework": 8783,
      "quantization models": 9258,
      "generation text": 4584,
      "texttoimage generation": 11428,
      "generation process": 4565,
      "endtoend training": 3365,
      "largescale dataset": 6129,
      "million chinese": 7038,
      "aims generate": 547,
      "number training": 7956,
      "significantly increase": 10441,
      "introduce lightweight": 5542,
      "number trainable": 7954,
      "design novel": 2752,
      "decoder gpt2": 2581,
      "training framework": 11556,
      "results conducted": 9889,
      "benchmarks reveal": 1146,
      "models contain": 7288,
      "compared stateoftheart": 1853,
      "highly challenging": 4901,
      "tackle challenges": 11083,
      "challenges propose": 1489,
      "transformer gpt": 11611,
      "unsupervised manner": 11857,
      "generation remains": 4573,
      "open question": 8034,
      "semantically related": 10249,
      "does involve": 3046,
      "task zeroshot": 11151,
      "stateoftheart method": 10716,
      "image text": 5063,
      "great breakthroughs": 4746,
      "performance drop": 8382,
      "solve problem": 10552,
      "information using": 5320,
      "training phase": 11578,
      "respect various": 9832,
      "reasonable results": 9398,
      "adversarial loss": 479,
      "challenges potential": 1487,
      "computation cost": 1964,
      "available models": 976,
      "learners recent": 6182,
      "tasks making": 11244,
      "multimodal foundation": 7628,
      "new unified": 7846,
      "modeling framework": 7245,
      "tasks strong": 11281,
      "multimodal understanding": 7644,
      "tasks demonstrates": 11187,
      "code pretrained": 1726,
      "training work": 11588,
      "using automatic": 11937,
      "generation recently": 4572,
      "tasks number": 11249,
      "number studies": 7952,
      "model text": 7229,
      "image processing": 5061,
      "address problems": 411,
      "performance proposed": 8423,
      "model using": 7237,
      "results proposed": 9924,
      "multilingual text": 7620,
      "stateoftheart performances": 10723,
      "tasks suggesting": 11285,
      "models code": 7278,
      "largescale datasets": 6130,
      "models recently": 7511,
      "recently gained": 9495,
      "gained significant": 4365,
      "multimodal models": 7641,
      "models intuitive": 7368,
      "leverage pretrained": 6281,
      "semantically consistent": 10248,
      "text descriptions": 11390,
      "bert gpt2": 1154,
      "gpt2 bart": 4672,
      "processing task": 8911,
      "terms bleu": 11364,
      "model better": 7114,
      "better understand": 1183,
      "contrastive languageimage": 2176,
      "models zeroshot": 7562,
      "ability pretrained": 161,
      "specifically use": 10640,
      "kullbackleibler divergence": 5726,
      "divergence kld": 3008,
      "tasks achieves": 11160,
      "achieves higher": 289,
      "indistribution id": 5249,
      "achieves superior": 306,
      "superior robustness": 10981,
      "surpasses previous": 11013,
      "models nlp": 7476,
      "performance textonly": 8438,
      "selfsupervised training": 10225,
      "retrieval generation": 9945,
      "input text": 5355,
      "approach generally": 741,
      "generally applied": 4435,
      "using retrieved": 11969,
      "results approach": 9878,
      "performance bert": 8366,
      "bart t5": 1020,
      "outperform competitive": 8135,
      "competitive baselines": 1876,
      "baselines tasks": 1077,
      "tasks codes": 11177,
      "codes data": 1740,
      "data publicly": 2448,
      "object detection": 7969,
      "diverse knowledge": 3019,
      "object categories": 7968,
      "encoderdecoder architecture": 3329,
      "achieves best": 279,
      "text information": 11401,
      "motivated propose": 7594,
      "directly generate": 2948,
      "natural question": 7747,
      "framework leverages": 4267,
      "data ii": 2420,
      "studies demonstrate": 10838,
      "model specifically": 7221,
      "techniques including": 11336,
      "sequence length": 10282,
      "generation time": 4586,
      "evaluation demonstrates": 3553,
      "linear complexity": 6365,
      "provides novel": 9178,
      "model code": 7124,
      "providing valuable": 9184,
      "medical knowledge": 6901,
      "medical licensing": 6902,
      "processing images": 8903,
      "making challenging": 6797,
      "significant success": 10421,
      "integrating llms": 5461,
      "llms enhance": 6518,
      "llms medical": 6591,
      "medical domain": 6899,
      "capability existing": 1368,
      "models create": 7290,
      "language interface": 5771,
      "capabilities domains": 1340,
      "showing great": 10369,
      "inputs outputs": 5359,
      "end build": 3347,
      "model information": 7164,
      "chatgpt opens": 1584,
      "achieved great": 264,
      "narrowing gap": 7704,
      "current visual": 2363,
      "methods designed": 6983,
      "models lack": 7370,
      "forms pretraining": 4206,
      "pretraining downstream": 8776,
      "tasks explore": 11207,
      "learning generative": 6212,
      "pretrained masked": 8760,
      "model achieve": 7101,
      "achieves excellent": 288,
      "human instructions": 4971,
      "drawn widespread": 3094,
      "models vlms": 7559,
      "construct new": 2114,
      "method propose": 6961,
      "variational autoencoder": 12035,
      "comprehensive analyses": 1925,
      "results terms": 9933,
      "image quality": 5062,
      "findings contribute": 4086,
      "generating natural": 4503,
      "language descriptions": 5762,
      "guidance given": 4777,
      "control signals": 2186,
      "novel promptbased": 7931,
      "prompts different": 9032,
      "different kinds": 2884,
      "inspired recent": 5377,
      "denoising autoencoders": 2711,
      "intermediate layers": 5513,
      "suggesting potential": 10945,
      "models mainstream": 7464,
      "segmentation object": 10202,
      "object localization": 7971,
      "direct use": 2935,
      "performance unsupervised": 8441,
      "unsupervised settings": 11860,
      "tackle issues": 11085,
      "architectures extensive": 794,
      "based large": 1043,
      "technology enables": 11340,
      "including semantic": 5193,
      "semantic text": 10244,
      "understand natural": 11758,
      "provide guidance": 9155,
      "based generated": 1033,
      "language navigation": 5958,
      "opens new": 8054,
      "significant attention": 10404,
      "remarkable progress": 9684,
      "information present": 5310,
      "generation leverages": 4547,
      "analysis capabilities": 618,
      "llms gpt": 6550,
      "design prompts": 2755,
      "information textual": 5317,
      "classification problem": 1654,
      "effectively generates": 3153,
      "offering new": 8011,
      "new perspective": 7830,
      "methods commonly": 6978,
      "scene representation": 10136,
      "thorough experiments": 11457,
      "ones different": 8024,
      "task settings": 11145,
      "visual grounding": 12149,
      "knowledge text": 5707,
      "linguistic knowledge": 6371,
      "knowledge different": 5661,
      "text features": 11393,
      "performance benchmarks": 8365,
      "learning systems": 6244,
      "attention paid": 892,
      "examples different": 3619,
      "adversarial samples": 481,
      "address gap": 391,
      "patch generation": 8321,
      "reasoning visual": 9440,
      "visual question": 12152,
      "answering image": 667,
      "tasks require": 11273,
      "processing models": 8904,
      "advancements gpt": 461,
      "endtoend trainable": 3364,
      "generate coherent": 4441,
      "mimicking human": 7045,
      "human thought": 4989,
      "understanding question": 11780,
      "publically available": 9205,
      "analysis furthermore": 622,
      "multimodal abilities": 7623,
      "foundation llm": 4221,
      "llm visual": 6430,
      "frozen llm": 4292,
      "lowrank adaption": 6743,
      "adaption lora": 364,
      "multiturn conversation": 7683,
      "conversation ability": 2201,
      "makes possible": 6793,
      "instructiontuned models": 5443,
      "models evaluation": 7318,
      "online demo": 8026,
      "demo available": 2642,
      "practical value": 8671,
      "models struggle": 7534,
      "perform poorly": 8355,
      "network based": 7786,
      "detection performance": 2808,
      "performance demonstrating": 8376,
      "llms developing": 6504,
      "reduce cost": 9543,
      "key factors": 5631,
      "simple highly": 10464,
      "significantly speed": 10451,
      "data compared": 2396,
      "intriguing findings": 5532,
      "rationales provided": 9357,
      "network designed": 7787,
      "various human": 12069,
      "llms contains": 6488,
      "information evaluate": 5293,
      "benchmarks demonstrating": 1137,
      "inspired success": 5382,
      "performance work": 8457,
      "representation facilitates": 9727,
      "summarization method": 10959,
      "experiments public": 3793,
      "baselines furthermore": 1073,
      "small datasets": 10506,
      "datasets limited": 2536,
      "emerged popular": 3242,
      "produce highquality": 8916,
      "input prompts": 5352,
      "annotate new": 641,
      "new dataset": 7815,
      "transfer knowledge": 11595,
      "knowledge distillation": 5662,
      "experiments integrating": 3783,
      "llms popular": 6611,
      "popular pretrained": 8578,
      "models understand": 7551,
      "concise natural": 2003,
      "language image": 5767,
      "better user": 1185,
      "style transfer": 10874,
      "exceptional ability": 3633,
      "computational resources": 1975,
      "resources training": 9830,
      "directly applying": 2946,
      "remains difficult": 9653,
      "challenging paper": 1503,
      "utilizes generative": 11993,
      "employ sampling": 3285,
      "previous solutions": 8813,
      "understanding systems": 11784,
      "broad spectrum": 1291,
      "hallucination large": 4792,
      "models inspired": 7365,
      "abilities large": 124,
      "llms improving": 6561,
      "performance complex": 8375,
      "complex multimodal": 1898,
      "tend generate": 11349,
      "systematic study": 11050,
      "conduct evaluation": 2026,
      "suffer severe": 10936,
      "humanmachine interaction": 5013,
      "model medical": 7180,
      "pretrained vision": 8771,
      "largescale medical": 6138,
      "questionanswering dataset": 9285,
      "best models": 1166,
      "struggle solve": 10829,
      "languageimage pretraining": 5993,
      "achieve goal": 248,
      "pretrained image": 8744,
      "model achieves": 7103,
      "sota performance": 10568,
      "performance zeroshot": 8458,
      "evaluation approach": 3541,
      "methods generating": 6991,
      "highly correlated": 4902,
      "compared human": 1847,
      "evaluation models": 3569,
      "validate effectiveness": 12012,
      "stateoftheart sota": 10731,
      "previous evaluation": 8808,
      "generation prompts": 4566,
      "project website": 8957,
      "immense potential": 5073,
      "range applications": 9317,
      "applications field": 705,
      "framework provides": 4274,
      "tasks language": 11234,
      "experiments proposed": 3790,
      "model set": 7216,
      "new baseline": 7808,
      "benchmark large": 1121,
      "samples evaluating": 10090,
      "evaluating performance": 3535,
      "hallucination generate": 4791,
      "samples propose": 10091,
      "suggest chatgpt": 10941,
      "existing llms": 3697,
      "great challenges": 4748,
      "experiments prove": 3791,
      "models empirically": 7310,
      "pretraining methods": 8789,
      "making large": 6800,
      "gap narrowed": 4380,
      "instructiontuning dataset": 5446,
      "obtain intriguing": 7995,
      "obtain new": 7996,
      "human language": 4978,
      "incorporate knowledge": 5212,
      "including chatgpt": 5176,
      "results highlight": 9907,
      "method code": 6943,
      "models pretrained": 7495,
      "generalization capabilities": 4427,
      "method zeroshot": 6971,
      "different perspectives": 2896,
      "significantly boosts": 10427,
      "chainofthought method": 1462,
      "demonstrated effectiveness": 2672,
      "models shown": 7521,
      "problem data": 8860,
      "aigc technology": 531,
      "core idea": 2228,
      "diverse models": 3020,
      "achieve controllable": 246,
      "make attempt": 6788,
      "finally present": 4077,
      "codes available": 1739,
      "coherent text": 1762,
      "ai assistant": 512,
      "specifically start": 10639,
      "factual errors": 3975,
      "paper make": 8241,
      "supervised manner": 10989,
      "stage propose": 10677,
      "instruction prompts": 5413,
      "prompts activate": 9029,
      "finetuned large": 4114,
      "training model": 11569,
      "model develop": 7135,
      "achieve promising": 259,
      "enhance reasoning": 3394,
      "shown excellent": 10376,
      "excellent performance": 3628,
      "contrast large": 2168,
      "llms emerge": 6512,
      "model zeroshot": 7240,
      "prompt llm": 8995,
      "llm inference": 6418,
      "final result": 4068,
      "urgent need": 11876,
      "taskspecific lack": 11310,
      "lack comprehensive": 5741,
      "gpt demonstrated": 4667,
      "capabilities pretrained": 1355,
      "token sequence": 11486,
      "unified framework": 11802,
      "evaluate efficacy": 3507,
      "datasets experimental": 2531,
      "object location": 7972,
      "vision tasks": 12144,
      "tasks example": 11200,
      "work aims": 12248,
      "tasks visual": 11300,
      "interaction world": 5492,
      "random guessing": 9312,
      "achieve humanlevel": 251,
      "achieving performance": 315,
      "performance gain": 8387,
      "dataset available": 2478,
      "development large": 2839,
      "models enabled": 7312,
      "paving way": 8332,
      "novel techniques": 7936,
      "intelligence paper": 5475,
      "unlike conventional": 11832,
      "specific object": 10616,
      "object names": 7973,
      "openvocabulary object": 8069,
      "object detectors": 7970,
      "detectors perform": 2813,
      "perform reasoning": 8356,
      "reasoning context": 9418,
      "users instructions": 11928,
      "object based": 7967,
      "autonomous driving": 961,
      "provide inspiration": 9157,
      "detection systems": 2809,
      "latent space": 6154,
      "methods limited": 6999,
      "zeroshot reasoning": 12321,
      "perform complex": 8352,
      "opening new": 8052,
      "approach outperforms": 754,
      "outperforms previous": 8157,
      "recently growing": 9498,
      "capability large": 1369,
      "prohibitively expensive": 8953,
      "multimodal instructions": 7631,
      "llm called": 6403,
      "science question": 10148,
      "demonstrate competitive": 2649,
      "training efficiency": 11550,
      "multimodal llms": 7640,
      "llms integration": 6570,
      "holds great": 4928,
      "medical advice": 6895,
      "diverse domains": 3015,
      "provide reliable": 9165,
      "reliable medical": 9634,
      "advice additionally": 484,
      "generation performance": 4560,
      "model leverages": 7177,
      "recent large": 9465,
      "multimodal inputs": 7630,
      "brings emergent": 1285,
      "newly proposed": 7850,
      "tuning dataset": 11690,
      "covers wide": 2289,
      "tasks text": 11292,
      "data different": 2402,
      "data image": 2421,
      "robustness large": 10047,
      "unprecedented performance": 11843,
      "response generation": 9845,
      "safety concerns": 10081,
      "high success": 4876,
      "scene based": 10135,
      "based text": 1060,
      "text use": 11418,
      "model synthesize": 7225,
      "conditioned input": 2017,
      "way finally": 12175,
      "tasks using": 11298,
      "evaluation demonstrate": 3552,
      "utilization large": 11985,
      "limited number": 6355,
      "framework tailored": 4277,
      "specifically leverage": 10634,
      "fewshot prompt": 4036,
      "learning based": 6194,
      "codes publicly": 1746,
      "publicly accessible": 9209,
      "given texts": 4643,
      "single perspective": 10486,
      "framework employs": 4247,
      "various perspectives": 12088,
      "sentence multiple": 10263,
      "framework effectively": 4246,
      "achieving stateoftheart": 317,
      "popular datasets": 8572,
      "temporal information": 11347,
      "techniques improve": 11335,
      "use pretrained": 11894,
      "llms augment": 6461,
      "alleviate problem": 588,
      "semantic consistency": 10231,
      "model use": 7234,
      "use tools": 11895,
      "gpt4 shown": 4698,
      "shown great": 10378,
      "models typically": 7549,
      "data address": 2382,
      "llama opt": 6391,
      "instructionfollowing dataset": 5429,
      "prompting advanced": 9007,
      "lowrank adaptation": 6742,
      "adaptation lora": 358,
      "optimization approach": 8093,
      "llms solve": 6658,
      "enables zeroshot": 3313,
      "tools code": 11498,
      "realistic images": 9375,
      "methods fail": 6987,
      "score measuring": 10158,
      "given prompt": 4637,
      "measuring likelihood": 6888,
      "reward functions": 9996,
      "guide model": 4782,
      "texttoimage models": 11429,
      "benchmark proposed": 1126,
      "semantic similarity": 10242,
      "input prompt": 5351,
      "performance improved": 8397,
      "require additional": 9754,
      "memory overhead": 6918,
      "tasks inspired": 11226,
      "model inference": 7163,
      "learning approach": 6192,
      "module obtain": 7579,
      "search algorithm": 10175,
      "plms achieve": 8544,
      "representative plms": 9738,
      "plms bert": 8545,
      "instruction prompt": 5412,
      "introduce extra": 5540,
      "images text": 5067,
      "recently shown": 9506,
      "promising potential": 8972,
      "generated answers": 4473,
      "requirements propose": 9764,
      "analyses demonstrate": 616,
      "learning community": 6201,
      "achieve new": 254,
      "variety benchmarks": 12041,
      "ai model": 517,
      "model conduct": 7126,
      "high memory": 4873,
      "memory computational": 6913,
      "large model": 6108,
      "visual perception": 12150,
      "propose enhance": 9066,
      "taking advantage": 11103,
      "new learning": 7824,
      "knowledge extracted": 5671,
      "models utilized": 7557,
      "descriptions pretrained": 2742,
      "pretrained encoder": 8743,
      "representations learned": 9732,
      "learn better": 6177,
      "higher accuracy": 4880,
      "segment model": 10199,
      "systems like": 11063,
      "model sam": 7211,
      "model image": 7160,
      "ability downstream": 140,
      "detection paper": 2807,
      "models presents": 7494,
      "challenge propose": 1473,
      "embedding space": 3232,
      "ai assistants": 513,
      "precise information": 8680,
      "current datasets": 2349,
      "ego4d dataset": 3206,
      "models especially": 7317,
      "understanding generating": 11771,
      "promote development": 8980,
      "wellknown chinese": 12203,
      "diversity quality": 3032,
      "chinese benchmarks": 1622,
      "conduct indepth": 2033,
      "research develop": 9782,
      "develop better": 2823,
      "achieves new": 290,
      "stateoftheart result": 10725,
      "instruction understanding": 5423,
      "representations textual": 9735,
      "challenging address": 1494,
      "utilizes large": 11995,
      "synthetic text": 11046,
      "effectively mitigates": 3159,
      "effectiveness versatility": 3179,
      "bypassing need": 1322,
      "generalpurpose foundation": 4437,
      "coherent accurate": 1761,
      "specific focus": 10611,
      "approach introduce": 747,
      "model model": 7183,
      "model utilizes": 7238,
      "components model": 1915,
      "crucial factors": 2330,
      "parameterefficient training": 8288,
      "prediction task": 8692,
      "framework benchmark": 4241,
      "achieving artificial": 309,
      "point clouds": 8554,
      "point cloud": 8553,
      "experiments validate": 3808,
      "provide primary": 9163,
      "observations analysis": 7986,
      "codes datasets": 1744,
      "text instructions": 11403,
      "perform tasks": 8358,
      "productivity paper": 8924,
      "highlevel textual": 4889,
      "dataset constructed": 2490,
      "instructions generated": 5434,
      "model chatgpt": 7122,
      "specific regions": 10618,
      "single forward": 10483,
      "forward pass": 4213,
      "instructions despite": 5432,
      "limited data": 6350,
      "model enhanced": 7138,
      "ability recently": 164,
      "advanced large": 446,
      "straightforward effective": 10771,
      "data despite": 2401,
      "widely explored": 12219,
      "model capable": 7120,
      "language general": 5763,
      "framework achieve": 4236,
      "ability specifically": 165,
      "model designed": 7134,
      "unified multilingual": 11803,
      "data including": 2423,
      "conversations humans": 2207,
      "effective multilingual": 3144,
      "natural languages": 7746,
      "fully automated": 4302,
      "using simple": 11972,
      "text inputs": 11402,
      "surpassing existing": 11017,
      "leverage chatgpt": 6273,
      "model new": 7187,
      "seamlessly fitting": 10172,
      "textual information": 11438,
      "module seamlessly": 7580,
      "dataset terms": 2509,
      "complex realworld": 1900,
      "dominant role": 3070,
      "available large": 975,
      "large multimodal": 6111,
      "models building": 7270,
      "embodied artificial": 3236,
      "current evaluation": 2350,
      "evaluation metric": 3567,
      "wrong answers": 12288,
      "evaluation framework": 3556,
      "light developing": 6315,
      "llms key": 6572,
      "key idea": 5632,
      "capacity llms": 1386,
      "fully exploited": 4306,
      "limited domain": 6351,
      "13b parameters": 20,
      "success general": 10912,
      "general domains": 4404,
      "scenarios limited": 10131,
      "diagnosis relies": 2854,
      "paper study": 8268,
      "realworld medical": 9390,
      "medical dialogue": 6898,
      "model complete": 7125,
      "performs exceptionally": 8470,
      "dataset code": 2483,
      "entity linking": 3426,
      "mainly focus": 6773,
      "require finetuning": 9755,
      "adapt llms": 349,
      "offtheshelf language": 8022,
      "llm perform": 6422,
      "emergent abilities": 3256,
      "evaluation paper": 3571,
      "avoid data": 988,
      "manually designed": 6825,
      "existing mllms": 3702,
      "directions subsequent": 2944,
      "llms brain": 6469,
      "summarize recent": 10965,
      "recent progress": 9471,
      "applications including": 706,
      "akin human": 556,
      "data largescale": 2430,
      "model handle": 7159,
      "handle multiple": 4802,
      "specifically employ": 10628,
      "vector quantization": 12106,
      "tokens building": 11490,
      "specific language": 10614,
      "questionandanswer tasks": 9280,
      "performances multiple": 8460,
      "motion prediction": 7590,
      "prediction motion": 8691,
      "model addition": 7104,
      "addition existing": 373,
      "instructions performing": 5440,
      "referring expression": 9559,
      "generation work": 4589,
      "step artificial": 10745,
      "ability dialogue": 138,
      "performance furthermore": 8386,
      "chains thoughts": 1467,
      "model dataset": 7130,
      "finetuning multimodal": 4137,
      "enhances performance": 3403,
      "transforms raw": 11629,
      "understanding response": 11783,
      "document understanding": 3043,
      "models tend": 7543,
      "understanding evaluation": 11770,
      "models capabilities": 7271,
      "propose instruction": 9073,
      "new conversational": 7814,
      "model supports": 7223,
      "allows users": 598,
      "process obtain": 8892,
      "result shows": 9871,
      "create better": 2297,
      "broader range": 1293,
      "perception reasoning": 8347,
      "offer comprehensive": 8007,
      "comprehensive evaluations": 1936,
      "evaluations models": 3588,
      "incorporating human": 5216,
      "evaluation pipeline": 3572,
      "similar benchmarks": 10454,
      "variety evaluation": 12042,
      "robust evaluation": 10042,
      "evaluating various": 3537,
      "better evaluating": 1177,
      "generation paper": 4558,
      "develop scalable": 2826,
      "scalable approach": 10104,
      "dataset large": 2498,
      "furthermore introduce": 4334,
      "model demonstrates": 7133,
      "generation research": 4576,
      "medical data": 6897,
      "remains limited": 9659,
      "limited paper": 6356,
      "dialogue model": 2863,
      "dialogue data": 2860,
      "exhibits excellent": 3668,
      "incorporating visual": 5219,
      "models make": 7465,
      "scale language": 10107,
      "gradientbased methods": 4718,
      "methods various": 7019,
      "tasks fewshot": 11211,
      "fewshot settings": 4040,
      "settings furthermore": 10318,
      "learning different": 6205,
      "emergence incontext": 3249,
      "model present": 7200,
      "emergent ability": 3257,
      "compared blip2": 1842,
      "generation compared": 4524,
      "textual representations": 11440,
      "semantics consistent": 10253,
      "able perform": 178,
      "pretraining instruction": 8785,
      "study emphasizes": 10851,
      "instructions leading": 5438,
      "endtoend multimodal": 3363,
      "provides flexible": 9175,
      "based existing": 1030,
      "furthermore design": 4329,
      "capabilities demonstrated": 1339,
      "current methodologies": 2355,
      "datasets training": 2553,
      "datasets exhibit": 2530,
      "generative capabilities": 4594,
      "mitigate limitations": 7071,
      "novel data": 7918,
      "harnesses power": 4823,
      "conducted various": 2047,
      "datasets using": 2554,
      "using opensource": 11962,
      "surge generative": 11009,
      "current benchmarks": 2347,
      "novel llmbased": 7925,
      "dataset task": 2508,
      "direction release": 2938,
      "gpt4 significantly": 4699,
      "models leading": 7377,
      "english data": 3381,
      "data collected": 2394,
      "model demonstrated": 7132,
      "significant advantages": 10403,
      "generation question": 4569,
      "gap present": 4384,
      "generating questionanswer": 4506,
      "questionanswer pairs": 9282,
      "dataset designed": 2494,
      "answering openended": 671,
      "outstanding performance": 8170,
      "generation various": 4588,
      "metrics outperforming": 7032,
      "current stateoftheart": 2362,
      "sota models": 10567,
      "text detection": 11391,
      "rich world": 10009,
      "tasks context": 11183,
      "explored work": 3856,
      "performance individual": 8400,
      "recently significant": 9507,
      "lowresource nature": 6747,
      "effective training": 3148,
      "strong multilingual": 10810,
      "build large": 1303,
      "achieve stateoftheart": 261,
      "stateoftheart opensource": 10720,
      "opensource performance": 8064,
      "performance chinese": 8370,
      "model weights": 7239,
      "instructiontuning data": 5445,
      "remains challenge": 9650,
      "current leading": 2351,
      "generate data": 4444,
      "tasks worth": 11304,
      "false information": 3995,
      "framework enables": 4248,
      "data ensure": 2406,
      "generation quality": 4568,
      "diverse highquality": 3016,
      "success existing": 10911,
      "existing visual": 3714,
      "tuning methods": 11699,
      "qualitative analysis": 9233,
      "data released": 2452,
      "evaluation based": 3542,
      "low cost": 6729,
      "privacy preservation": 8846,
      "furthermore analyze": 4325,
      "helpful suggestions": 4853,
      "despite strong": 2786,
      "strong abilities": 10804,
      "common objects": 1795,
      "design prompt": 2754,
      "multiturn dialogues": 7687,
      "impressive fewshot": 5110,
      "task previous": 11141,
      "methods suffer": 7013,
      "insufficient knowledge": 5453,
      "model novel": 7188,
      "respective strengths": 9835,
      "uses llm": 11933,
      "final answer": 4066,
      "results datasets": 9890,
      "datasets prove": 2544,
      "models exhibit": 7320,
      "models extend": 7328,
      "involving multiple": 5581,
      "training introduce": 11560,
      "furthermore construct": 4326,
      "dialogue turns": 2867,
      "reasoning task": 9438,
      "common sense": 1797,
      "llm effectively": 6406,
      "dataset comprising": 2487,
      "evaluation traditional": 3581,
      "human annotators": 4951,
      "semantic segmentation": 10241,
      "novel object": 7928,
      "inference time": 5276,
      "practical scenarios": 8670,
      "issues work": 5600,
      "proposes novel": 9126,
      "inspired human": 5375,
      "human cognition": 4958,
      "class names": 1647,
      "strategies designed": 10777,
      "target object": 11106,
      "datasets attribute": 2515,
      "generation instruction": 4537,
      "finetuning techniques": 4151,
      "exhibits superior": 3673,
      "task address": 11112,
      "accomplish task": 213,
      "fully exploit": 4305,
      "knowledge generate": 5672,
      "methods consistently": 6979,
      "consistently significantly": 2098,
      "large ai": 6001,
      "model empowered": 7137,
      "semantic ambiguity": 10228,
      "potential solutions": 8636,
      "framework present": 4272,
      "effectively addresses": 3150,
      "finally apply": 4071,
      "generative adversarial": 4592,
      "state information": 10696,
      "approach effectively": 737,
      "mitigates impact": 7075,
      "demonstrate superior": 2666,
      "contrastive instruction": 2175,
      "method better": 6941,
      "better instruction": 1179,
      "tuning method": 11698,
      "tuning extensive": 11692,
      "gating mechanism": 4396,
      "exhibit superior": 3660,
      "quality code": 9238,
      "training samples": 11580,
      "method improve": 6953,
      "improve prompt": 5134,
      "incorporating pretrained": 5218,
      "model context": 7128,
      "generated llms": 4482,
      "llms underexplored": 6672,
      "introduce pretrained": 5547,
      "baseline code": 1065,
      "manipulation tasks": 6813,
      "tasks models": 11246,
      "complexity diversity": 1909,
      "mixtureofexpert moe": 7084,
      "generate large": 4453,
      "dataset using": 2511,
      "form specifically": 4195,
      "llms suffer": 6666,
      "llms previous": 6617,
      "finetuning process": 4142,
      "process llms": 8888,
      "task essential": 11125,
      "task visual": 11150,
      "datasets obtain": 2539,
      "dataset method": 2501,
      "revolutionized field": 9983,
      "larger language": 6123,
      "encoder decoder": 3323,
      "models release": 7512,
      "release dataset": 9621,
      "challenges paper": 1486,
      "entities target": 3424,
      "key insight": 5634,
      "harnessing capabilities": 4825,
      "framework framework": 4254,
      "plays significant": 8536,
      "significant role": 10418,
      "methods primarily": 7003,
      "optimization task": 8096,
      "generation code": 4522,
      "enhance semantic": 3397,
      "code code": 1703,
      "completed code": 1888,
      "highly interpretable": 4905,
      "performance 50": 8361,
      "improvements multiple": 5146,
      "learning finetune": 6209,
      "visual programming": 12151,
      "training performance": 11577,
      "employing finetuning": 3288,
      "significant performance": 10416,
      "overall task": 8174,
      "task performance": 11138,
      "distill knowledge": 2992,
      "extensive comprehensive": 3886,
      "experimental evaluations": 3737,
      "achieve substantial": 262,
      "substantial performance": 10896,
      "performance improvement": 8398,
      "methods large": 6997,
      "large margins": 6107,
      "provide valuable": 9168,
      "process method": 8890,
      "information loss": 5306,
      "capable generating": 1377,
      "experiments highlight": 3781,
      "text modalities": 11405,
      "security risks": 10192,
      "work study": 12268,
      "design corresponding": 2746,
      "models dalle": 7293,
      "generate highly": 4448,
      "concerns regarding": 2001,
      "nsfw content": 7945,
      "subjective objective": 10882,
      "specialized models": 10603,
      "systematically evaluate": 11053,
      "evaluate potential": 3512,
      "attributes measure": 906,
      "specifically design": 10625,
      "evaluation abilities": 3539,
      "pipeline harnesses": 8504,
      "harnesses large": 4819,
      "information introduce": 5300,
      "model gpt": 7156,
      "language semantics": 5975,
      "information code": 5289,
      "building ai": 1309,
      "existing detectors": 3684,
      "weak generalization": 12180,
      "llms garnered": 6541,
      "garnered widespread": 4391,
      "applications various": 713,
      "content generated": 2136,
      "method automatically": 6938,
      "automatically constructing": 951,
      "stateoftheart results benchmark": 10727,
      "results benchmark datasets": 9882,
      "generative pretraining transformer": 4620,
      "autoregressive language model": 965,
      "tasks natural language": 11248,
      "paper proposes new": 8260,
      "paper propose method": 8254,
      "contrastive learning framework": 2178,
      "computer vision cv": 1982,
      "significantly improves stateoftheart": 10438,
      "based pretrained language": 1055,
      "transformerbased language model": 11618,
      "experiments verify effectiveness": 3812,
      "text generation tasks": 11397,
      "information paper present": 5309,
      "present simple effective": 8723,
      "simple effective method": 10460,
      "conduct thorough ablation": 2037,
      "thorough ablation studies": 11456,
      "generation text generation": 4585,
      "task aims generate": 11114,
      "number training data": 7957,
      "number trainable parameters": 7955,
      "challenges propose novel": 1490,
      "pretrained transformer gpt": 8767,
      "generation remains open": 4574,
      "remains open question": 9661,
      "convolutional neural networks": 2221,
      "experiments demonstrate method": 3775,
      "multimodal foundation model": 7629,
      "results demonstrate potential": 9895,
      "code pretrained models": 1727,
      "using automatic human": 11938,
      "language model text": 5804,
      "experimental results proposed": 3754,
      "models code available": 7279,
      "recently gained significant": 9496,
      "language processing task": 5970,
      "solve problem propose": 10553,
      "kullbackleibler divergence kld": 5727,
      "models nlp tasks": 7477,
      "shown impressive performance": 10381,
      "approach generally applied": 742,
      "outperform competitive baselines": 8136,
      "codes data publicly": 1742,
      "data publicly available": 2449,
      "results demonstrate proposed": 9896,
      "achieves best results": 280,
      "propose novel approach": 9085,
      "ablation studies demonstrate": 171,
      "studies demonstrate effectiveness": 10839,
      "demonstrate effectiveness approach": 2652,
      "model specifically designed": 7222,
      "llms medical domain": 6592,
      "medical domain knowledge": 6900,
      "achieved great success": 265,
      "models natural language": 7472,
      "learning generative pretrained": 6213,
      "drawn widespread attention": 3095,
      "language models vlms": 5953,
      "experimental results terms": 3759,
      "generating natural language": 4504,
      "natural language descriptions": 7711,
      "end propose novel": 3353,
      "propose novel promptbased": 9093,
      "architectures extensive experiments": 795,
      "extensive experiments ablation": 3894,
      "based large language": 1044,
      "including semantic text": 5194,
      "understand natural language": 11759,
      "natural language navigation": 7721,
      "gained significant attention": 4366,
      "achieved remarkable progress": 270,
      "work propose new": 12261,
      "conduct thorough experiments": 2039,
      "achieves superior performance": 307,
      "examples different tasks": 3620,
      "address gap propose": 392,
      "gap propose novel": 4386,
      "visual question answering": 12153,
      "question answering image": 9269,
      "answering image captioning": 668,
      "surpasses previous methods": 11014,
      "language processing models": 5963,
      "llms demonstrated impressive": 6498,
      "lowrank adaption lora": 6744,
      "results model outperforms": 9916,
      "multiturn conversation ability": 7684,
      "online demo available": 8027,
      "extensive experiments benchmark": 3896,
      "performance demonstrating effectiveness": 8377,
      "simple highly effective": 10465,
      "work propose novel": 12262,
      "extensive experiments public": 3905,
      "limited training data": 6360,
      "semantic understanding reasoning": 10246,
      "concise natural language": 2004,
      "better user experience": 1186,
      "utilizes generative pretrained": 11994,
      "foundation models large": 4225,
      "abilities large language": 125,
      "experiment results demonstrate": 3735,
      "model medical domain": 7181,
      "language models design": 5822,
      "achieves sota performance": 300,
      "language models paper": 5927,
      "models paper introduces": 7483,
      "compared human evaluation": 1848,
      "llms work present": 6680,
      "vision language models": 12139,
      "benchmark large language": 1122,
      "making large language": 6801,
      "language models pretrained": 5934,
      "propose novel method": 9091,
      "significantly boosts performance": 10428,
      "models shown promising": 7522,
      "address limitations propose": 406,
      "natural language description": 7710,
      "paper make attempt": 8242,
      "experimental results multiple": 3751,
      "enhance reasoning ability": 3395,
      "shown excellent performance": 10377,
      "contrast large language": 2169,
      "models llms emerge": 7408,
      "language model zeroshot": 5805,
      "novel framework called": 7922,
      "llms natural language": 6596,
      "datasets experimental results": 2532,
      "reasoning capabilities llms": 9413,
      "development large language": 2840,
      "paper introduce new": 8235,
      "openvocabulary object detectors": 8070,
      "zeroshot reasoning ability": 12322,
      "approach outperforms previous": 755,
      "language models recently": 5940,
      "capability large language": 1370,
      "science question answering": 10149,
      "demonstrate competitive performance": 2650,
      "tackle challenges introduce": 11084,
      "medical advice additionally": 6896,
      "source code available": 10573,
      "recent large language": 9466,
      "instruction tuning dataset": 5416,
      "covers wide range": 2290,
      "codes data models": 1741,
      "data image text": 2422,
      "human evaluation demonstrate": 4963,
      "utilization large language": 11986,
      "models demonstrated remarkable": 7298,
      "achieving stateoftheart performance": 318,
      "address challenges propose": 390,
      "extensive experiments method": 3904,
      "model use tools": 7235,
      "chatgpt gpt4 shown": 1570,
      "shown great potential": 10379,
      "language models significantly": 5946,
      "models achieved remarkable": 7256,
      "representative plms bert": 9739,
      "multimodal understanding capability": 7645,
      "achieve new stateoftheart": 255,
      "consistently improves performance": 2096,
      "segment model sam": 10200,
      "inspired recent success": 5378,
      "language models especially": 5831,
      "achieves new stateoftheart": 291,
      "new stateoftheart result": 7840,
      "challenging address challenges": 1495,
      "utilizes large language": 11996,
      "llms work propose": 6681,
      "achieving artificial general": 310,
      "extensive experiments validate": 3907,
      "experiments validate effectiveness": 3809,
      "codes datasets available": 1745,
      "powerful language models": 8657,
      "language model chatgpt": 5781,
      "single forward pass": 10484,
      "advanced large language": 447,
      "capabilities various nlp": 1362,
      "despite great success": 2782,
      "publicly available large": 9212,
      "embodied artificial intelligence": 3237,
      "llms key idea": 6573,
      "achieved significant success": 274,
      "success general domains": 10913,
      "realworld medical dialogue": 9391,
      "language model complete": 5782,
      "dataset code models": 2484,
      "code models publicly": 1724,
      "methods mainly focus": 7001,
      "framework based llms": 4240,
      "avoid data leakage": 989,
      "summarize recent progress": 10966,
      "akin human language": 557,
      "achieves stateoftheart performances": 303,
      "stateoftheart performances multiple": 10724,
      "motion prediction motion": 7591,
      "range tasks including": 9322,
      "step artificial general": 10746,
      "gap paper proposes": 4383,
      "models llms using": 7460,
      "tasks code models": 11175,
      "training data evaluation": 11546,
      "dataset large language": 2499,
      "representation learning model": 9729,
      "exhibits excellent performance": 3669,
      "language models make": 5920,
      "scale language models": 10108,
      "emergence incontext learning": 3250,
      "experiments conducted various": 3770,
      "existing evaluation metrics": 3687,
      "propose novel llmbased": 9090,
      "future research direction": 4352,
      "research direction release": 9785,
      "direction release code": 2939,
      "chatgpt gpt4 significantly": 1571,
      "chinese english data": 1625,
      "generation question answering": 4570,
      "generating questionanswer pairs": 4507,
      "stateoftheart sota models": 10732,
      "rich world knowledge": 10010,
      "diverse highquality data": 3017,
      "code data released": 1710,
      "achieved remarkable performance": 269,
      "scenarios involving multiple": 10130,
      "superior performance existing": 10977,
      "generation instruction following": 4538,
      "large ai models": 6002,
      "effectively mitigates impact": 3160,
      "demonstrate superior performance": 2667,
      "tasks work propose": 11303,
      "generate large number": 4454,
      "hallucination large language": 4793,
      "performance various nlp": 8452,
      "larger language models": 6124,
      "future research area": 4351,
      "harnessing capabilities large": 4826,
      "plays significant role": 8537,
      "existing methods primarily": 3699,
      "generation code generation": 4523,
      "code generation task": 1718,
      "comprehensive experimental evaluations": 1938,
      "evaluations demonstrate method": 3586,
      "substantial performance improvement": 10897,
      "outperforms compared stateoftheart": 8151,
      "compared stateoftheart methods": 1854,
      "texttoimage models dalle": 11430,
      "harnesses large language": 4820,
      "language model gpt": 5787,
      "models llms garnered": 7420,
      "applications various domains": 714,
      "new stateoftheart results benchmark": 7842,
      "stateoftheart results benchmark datasets": 10728,
      "present simple effective method": 8724,
      "conduct thorough ablation studies": 2038,
      "generative pretrained transformer gpt": 4614,
      "generation remains open question": 4575,
      "extensive experiments demonstrate method": 3902,
      "natural language processing task": 7732,
      "codes data publicly available": 1743,
      "experimental results demonstrate proposed": 3743,
      "ablation studies demonstrate effectiveness": 172,
      "models natural language processing": 7473,
      "generating natural language descriptions": 4505,
      "extensive experiments ablation studies": 3895,
      "address gap propose novel": 393,
      "visual question answering image": 12154,
      "question answering image captioning": 9270,
      "models llms demonstrated impressive": 7402,
      "experimental results model outperforms": 3750,
      "extensive experiments benchmark datasets": 3897,
      "foundation models large language": 4226,
      "abilities large language models": 126,
      "large language models paper": 6089,
      "language models paper introduces": 5928,
      "models paper introduces novel": 7484,
      "benchmark large language models": 1123,
      "making large language models": 6802,
      "based natural language instructions": 1051,
      "large language models pretrained": 6092,
      "end propose novel method": 3354,
      "models shown promising results": 7523,
      "given natural language description": 4636,
      "contrast large language models": 2170,
      "language models llms emerge": 5871,
      "propose novel framework called": 9088,
      "development large language models": 2841,
      "zeroshot reasoning ability large": 12323,
      "large language models recently": 6096,
      "capability large language models": 1371,
      "recent large language models": 9467,
      "models demonstrated remarkable capabilities": 7299,
      "llms chatgpt gpt4 shown": 6476,
      "inspired recent success large": 5379,
      "large language models especially": 6035,
      "utilizes large language models": 11997,
      "achieving artificial general intelligence": 311,
      "extensive experiments validate effectiveness": 3908,
      "large language model chatgpt": 6011,
      "advanced large language models": 448,
      "capabilities various nlp tasks": 1363,
      "dataset code models publicly": 2485,
      "code models publicly available": 1725,
      "achieves stateoftheart performances multiple": 304,
      "wide range tasks including": 12212,
      "step artificial general intelligence": 10747,
      "language models llms using": 5917,
      "tuning large language model": 11695,
      "dataset large language models": 2500,
      "large language models make": 6085,
      "future research direction release": 4353,
      "research direction release code": 9786,
      "based large language models": 1045,
      "knowledge large language model": 5686,
      "demonstrates superior performance existing": 2701,
      "tasks code models available": 11176,
      "hallucination large language models": 4794,
      "performance various nlp tasks": 8453,
      "harnessing capabilities large language": 4827,
      "harnesses large language models": 4821,
      "language models llms garnered": 5883,
      "new stateoftheart results benchmark datasets": 7843,
      "using large language models large": 11956,
      "leverages large language models llms": 6288,
      "capabilities large language models llms": 1345,
      "visual question answering image captioning": 12155,
      "language models llms demonstrated impressive": 5867,
      "knowledge large language models llms": 5688,
      "foundation models large language models": 4227,
      "language models paper introduces novel": 5929,
      "tasks large language models llms": 11238,
      "benchmark large language models large": 1124,
      "advances large language models llms": 470,
      "contrast large language models llms": 2171,
      "large language models llms emerge": 6054,
      "development large language models llms": 2842,
      "zeroshot reasoning ability large language": 12324,
      "recent large language models llm": 9468,
      "inspired recent success large language": 5380,
      "recent large language models llms": 9469,
      "dataset code models publicly available": 2486,
      "era large language models llms": 3455,
      "large language models llms using": 6083,
      "future research direction release code": 4354,
      "harnesses large language models llms": 4822,
      "large language models llms garnered": 6064,
      "python": 9224,
      "section": 10188,
      "classifies": 1658,
      "adjustable": 429,
      "accommodating": 210,
      "catering": 1422,
      "validated": 12013,
      "tabletop": 11077,
      "executing": 3649,
      "longhorizon": 6711,
      "robot": 10037,
      "relieve": 9640,
      "acquisition": 324,
      "burden": 1317,
      "involvement": 5575,
      "simulator": 10477,
      "lights": 6318,
      "uncommon": 11741,
      "twin": 11709,
      "15k": 25,
      "environmental": 3437,
      "perturbations": 8484,
      "violations": 12134,
      "continuously": 2163,
      "monitor": 7585,
      "shorter": 10356,
      "infeasible": 5268,
      "rgb": 10002,
      "achievable": 236,
      "longtail": 6715,
      "30": 52,
      "sr": 10667,
      "lifelike": 6311,
      "witnessed": 12234,
      "stimulating": 10758,
      "reused": 9963,
      "scattered": 10119,
      "reusing": 9964,
      "accumulation": 228,
      "max": 6872,
      "traverse": 11651,
      "tag": 11091,
      "load": 6685,
      "lifting": 6313,
      "humanrobot": 5015,
      "deploy": 2721,
      "highresolution": 4913,
      "salient": 10084,
      "branches": 1264,
      "prioritizing": 8842,
      "48": 72,
      "want": 12170,
      "unfamiliar": 11796,
      "equivalent": 3450,
      "matches": 6853,
      "operate": 8072,
      "knowledgedriven": 5712,
      "overfitting": 8186,
      "significant strides": 10420,
      "llm model": 6420,
      "python programs": 9225,
      "planning action": 8514,
      "input types": 5356,
      "tasks different": 11190,
      "different scenarios": 2905,
      "outperformed stateoftheart": 8141,
      "embodied ai": 3235,
      "longhorizon tasks": 6712,
      "introduce efficient": 5539,
      "efficient training": 3199,
      "training approach": 11540,
      "closed loop": 1681,
      "task extracting": 11128,
      "building blocks": 1310,
      "multistep reasoning": 7673,
      "connecting human": 2069,
      "suboptimal results": 10886,
      "data acquisition": 2381,
      "human involvement": 4975,
      "existing open": 3704,
      "methods achieve": 6975,
      "tasks realworld": 11265,
      "environments agents": 3439,
      "diverse training": 3029,
      "better generalization": 1178,
      "agents focus": 500,
      "propose benchmark": 9059,
      "benchmark named": 1125,
      "multitask setting": 7679,
      "realistic scenarios": 9376,
      "simulator contains": 10478,
      "tasks recently": 11268,
      "llms unified": 6674,
      "understand execute": 11756,
      "end work": 3355,
      "generated chatgpt": 4474,
      "propose general": 9068,
      "reasoning levels": 9428,
      "different llms": 2888,
      "llms encode": 6517,
      "work explored": 12251,
      "tasks generate": 11216,
      "physical world": 8491,
      "llms play": 6610,
      "various complex": 12055,
      "success rates": 10925,
      "task completion": 11119,
      "task planning": 11139,
      "planning large": 8517,
      "successfully complete": 10931,
      "generation complex": 4525,
      "lack information": 5743,
      "realistic world": 9377,
      "dataset containing": 2491,
      "action plans": 328,
      "designed prompts": 2765,
      "llms inference": 6567,
      "results generated": 9902,
      "complex environments": 1896,
      "potential using": 8638,
      "llm understand": 6429,
      "analyze ability": 631,
      "ability reason": 163,
      "complex scenarios": 1903,
      "systems face": 11059,
      "performance limitations": 8407,
      "solve problems": 10554,
      "employing llm": 3289,
      "closer human": 1688,
      "poses challenges": 8590,
      "llms great": 6554,
      "environment paper": 3436,
      "previous stateoftheart": 8816,
      "2023 competition": 41,
      "dialog history": 2858,
      "state tracking": 10698,
      "30 absolute": 53,
      "respectively code": 9838,
      "rl methods": 10025,
      "methods taskspecific": 7015,
      "previous approaches": 8806,
      "continual knowledge": 2158,
      "pretrained knowledge": 8745,
      "real world": 9373,
      "engineering paper": 3376,
      "enhance effectiveness": 3387,
      "efficacy proposed": 3182,
      "tasks resulting": 11275,
      "feature maps": 4012,
      "scenarios challenging": 10122,
      "process experiments": 8883,
      "features improve": 4016,
      "outperform baseline": 8133,
      "develop powerful": 2825,
      "approach involves": 749,
      "modeling tasks": 7249,
      "finetuned downstream": 4111,
      "feature engineering": 4010,
      "outperforming previous": 8144,
      "widely adopted": 12216,
      "nature human": 7753,
      "leveraging large": 6299,
      "abilities propose": 128,
      "significant advantage": 10402,
      "various applications including": 12052,
      "propose benchmark named": 9060,
      "like chatgpt gpt4": 6327,
      "conduct comprehensive analysis": 2022,
      "models llms encode": 7411,
      "various complex tasks": 12056,
      "experimental results generated": 3744,
      "explore potential using": 3845,
      "providing valuable insights": 9185,
      "llms great potential": 6555,
      "outperforms previous stateoftheart": 8158,
      "models llms existing": 7417,
      "outperform baseline methods": 8134,
      "models recent advancements": 7510,
      "leveraging large language": 6300,
      "natural language processing paper": 7730,
      "llms like chatgpt gpt4": 6583,
      "natural language understanding tasks": 7744,
      "language models llms encode": 5874,
      "language models llms existing": 5880,
      "language models recent advancements": 5939,
      "leveraging large language models": 6301,
      "models llms like chatgpt gpt4": 7431,
      "large language models llms encode": 6057,
      "using large language model llm": 11954,
      "large language models llms existing": 6061,
      "large language models recent advancements": 6095,
      "personalize": 8477,
      "instantiate": 5388,
      "editor": 3129,
      "trustworthiness": 11679,
      "authenticity": 927,
      "lastly": 6151,
      "acquired": 322,
      "amazon": 608,
      "beauty": 1091,
      "disparity": 2985,
      "executed": 3648,
      "rtx": 10068,
      "3090": 55,
      "llama7b": 6396,
      "devoted": 2850,
      "describing": 2738,
      "widelystudied": 12221,
      "fairness": 3984,
      "note": 7910,
      "protocol": 9135,
      "groundtruth": 4762,
      "simulators": 10479,
      "endeavors": 3357,
      "rating": 9350,
      "analyzes": 636,
      "2000": 36,
      "humancentered": 4996,
      "reliably": 9635,
      "let": 6267,
      "browsing": 1297,
      "clicking": 1672,
      "influential": 5287,
      "chatting": 1613,
      "profiling": 8933,
      "playing": 8533,
      "giving": 4645,
      "orthogonal": 8122,
      "prospects": 9130,
      "actively": 338,
      "streamline": 10791,
      "restricting": 9865,
      "promotion": 8984,
      "platforms": 8522,
      "degrade": 2627,
      "profile": 8932,
      "lifelong": 6312,
      "received": 9444,
      "ecommerce": 3116,
      "workflow": 12269,
      "satisfying": 10098,
      "multidomain": 7607,
      "card": 1396,
      "diverse information": 3018,
      "success various": 10926,
      "offering potential": 8012,
      "overcome limitations": 8180,
      "meet users": 6905,
      "instructions guide": 5435,
      "content generation": 2137,
      "showing promising": 10370,
      "witnessed significant": 12235,
      "recommendation methods": 9519,
      "recently emergence": 9493,
      "emergence chatgpt": 3246,
      "conversational models": 2204,
      "thoroughly investigated": 11460,
      "investigated paper": 5565,
      "knowledge acquired": 5650,
      "unlike traditional": 11835,
      "explore use": 3848,
      "evaluate quality": 3514,
      "provided information": 9171,
      "researchers explore": 9813,
      "chatgpt improve": 1574,
      "performance diverse": 8381,
      "learning involves": 6220,
      "training tasks": 11587,
      "domains limited": 3059,
      "highly efficient": 4904,
      "rtx 3090": 10069,
      "following large": 4187,
      "attracted attention": 897,
      "attention research": 894,
      "industry communities": 5264,
      "progress large": 8945,
      "models considering": 7285,
      "experiments tasks": 3804,
      "baselines including": 1074,
      "tasks approach": 11166,
      "approach sheds": 758,
      "obtain accurate": 7994,
      "led emergence": 6255,
      "contain social": 2125,
      "avoid potential": 991,
      "directly use": 2952,
      "novel benchmark": 7916,
      "benchmark called": 1112,
      "code dataset": 1711,
      "powerful conversational": 8655,
      "utilization chatgpt": 11984,
      "evaluation protocol": 3574,
      "interactive evaluation": 5496,
      "llms named": 6594,
      "user simulators": 11918,
      "experiments publicly": 3794,
      "notable improvements": 7906,
      "improvements compared": 5145,
      "deeper comprehension": 2612,
      "new opportunities": 7828,
      "opportunities paper": 8083,
      "chatgpt paper": 1586,
      "based different": 1029,
      "paper discusses": 8220,
      "opportunities improvement": 8082,
      "efficiency transparency": 3189,
      "generation based": 4517,
      "significantly improve": 10433,
      "knowledge models": 5694,
      "models improve": 7355,
      "generate realistic": 4464,
      "based user": 1062,
      "user preferences": 11915,
      "challenging problem": 1504,
      "human cognitive": 4959,
      "achieve humanlike": 252,
      "humanlike intelligence": 5008,
      "autonomous agent": 959,
      "playing intervention": 8534,
      "models survey": 7540,
      "match users": 6851,
      "applications natural": 708,
      "survey research": 11029,
      "training inference": 11558,
      "finegrained taxonomy": 4101,
      "key challenges": 5628,
      "finally summarize": 4079,
      "discuss future": 2973,
      "models novel": 7478,
      "propose train": 9105,
      "model evaluate": 7139,
      "rl method": 10024,
      "preferences particular": 8702,
      "experiments largescale": 3786,
      "exploring large": 3859,
      "tasks demonstrating": 11188,
      "demonstrating exceptional": 2704,
      "framework harnesses": 4259,
      "models analyze": 7260,
      "leverages llm": 6289,
      "understand behavior": 11754,
      "comprehensive dataset": 1930,
      "models provides": 7502,
      "provides valuable": 9180,
      "growing field": 4768,
      "offer practical": 8009,
      "llms utilizing": 6677,
      "leveraging llms": 6302,
      "generation fewshot": 4531,
      "alleviate limitation": 587,
      "generation llms": 4548,
      "information users": 5319,
      "specifically extract": 10631,
      "models generating": 7342,
      "highquality generated": 4910,
      "experiments large": 3785,
      "problem llms": 8865,
      "extract useful": 3922,
      "useful information": 11905,
      "augmentation technique": 919,
      "specifically develop": 10627,
      "training dataset": 11547,
      "experiments realworld": 3796,
      "public dataset": 9201,
      "models capability": 7272,
      "ai agent": 510,
      "models excel": 7319,
      "leveraging extensive": 6296,
      "despite ability": 2778,
      "tasks providing": 11263,
      "engaging conversations": 3372,
      "llms lack": 6577,
      "finetuning llms": 4133,
      "task execution": 11126,
      "llms experimental": 6529,
      "search engines": 10177,
      "data multiple": 2440,
      "shared parameters": 10332,
      "tasks taskspecific": 11291,
      "taskspecific parameters": 11312,
      "llm extract": 6410,
      "trained jointly": 11534,
      "achieves better": 281,
      "mobile applications": 7092,
      "aigenerated content aigc": 534,
      "showing promising results": 10371,
      "recently emergence chatgpt": 9494,
      "thoroughly investigated paper": 11461,
      "incontext learning involves": 5208,
      "instruction following large": 5405,
      "following large language": 4188,
      "language model empowered": 5784,
      "recent progress large": 9472,
      "progress large language": 8946,
      "approach sheds light": 759,
      "avoid potential risks": 992,
      "novel benchmark called": 7917,
      "llms shown great": 6649,
      "experiments publicly available": 3795,
      "applications natural language": 709,
      "language models novel": 5925,
      "exploring large language": 3860,
      "novel framework harnesses": 7923,
      "language models analyze": 5811,
      "provides valuable insights": 9181,
      "field natural language": 4051,
      "rapid development large": 9338,
      "extract useful information": 3923,
      "llms address issue": 6451,
      "llms propose novel": 6621,
      "zeroshot fewshot settings": 12315,
      "data augmentation technique": 2388,
      "natural language interface": 7717,
      "llms experimental results": 6530,
      "achieves better performance": 282,
      "instruction following large language": 5406,
      "following large language model": 4189,
      "large language model empowered": 6012,
      "recent progress large language": 9473,
      "progress large language models": 8947,
      "models llms shown great": 7451,
      "empowered large language model": 3298,
      "applications natural language processing": 710,
      "large language models analyze": 6021,
      "field natural language processing": 4052,
      "rapid development large language": 9339,
      "datasets demonstrate effectiveness proposed": 2525,
      "instruction following large language model": 5407,
      "recent progress large language models": 9474,
      "progress large language models llms": 8948,
      "language models llms shown great": 5909,
      "revolutionized natural language processing tasks": 9988,
      "rapid development large language models": 9340,
      "influenced": 5286,
      "society": 10530,
      "nn": 7884,
      "belief": 1106,
      "status": 10741,
      "biological": 1209,
      "conjectures": 2064,
      "pack": 8198,
      "pieces": 8496,
      "articulate": 811,
      "knowing": 5648,
      "delivers": 2634,
      "trials": 11665,
      "outlining": 8128,
      "bots": 1250,
      "accomplishing": 216,
      "stimulate": 10755,
      "exploratory": 3837,
      "collective": 1776,
      "exert": 3654,
      "organized": 8116,
      "worlds": 12280,
      "roleplaying": 10053,
      "overseeing": 8193,
      "compensating": 1869,
      "custom": 2368,
      "unavailable": 11732,
      "gathers": 4394,
      "accumulates": 227,
      "accomplishment": 217,
      "dynamics": 3109,
      "collaboratively": 1769,
      "positive": 8597,
      "proactive": 8851,
      "ais": 554,
      "hinges": 4921,
      "modularity": 7577,
      "selfplay": 10222,
      "populationbased": 8582,
      "isolated": 5585,
      "delivering": 2633,
      "encompasses": 3335,
      "classroom": 1661,
      "economics": 3118,
      "journey": 5613,
      "scholars": 10145,
      "tasksolving": 11306,
      "couples": 2281,
      "tasks study": 11283,
      "finetuning llm": 4132,
      "experiments involving": 3784,
      "ai tasks": 523,
      "new research": 7837,
      "understanding deep": 11769,
      "does need": 3047,
      "models constructed": 7287,
      "intelligence large": 5472,
      "perspective paper": 8481,
      "problems current": 8872,
      "intelligent agents": 5477,
      "knowledge acquisition": 5651,
      "trials errors": 11666,
      "directions field": 2942,
      "traditional tasks": 11523,
      "enabling efficient": 3315,
      "lack systematic": 5748,
      "systematic research": 11048,
      "possess enhanced": 8600,
      "publicly released": 9213,
      "datasets research": 2545,
      "language large": 5773,
      "llms enabled": 6515,
      "ai agents": 511,
      "presents challenges": 8729,
      "play crucial": 8528,
      "crucial role": 2331,
      "baseline evaluate": 1066,
      "development advanced": 2834,
      "recent surge": 9482,
      "applying large": 724,
      "growing demand": 4767,
      "finetuning specific": 4144,
      "models generalization": 7338,
      "stateoftheart language": 10708,
      "claude primarily": 1663,
      "primarily accessible": 8825,
      "accessible api": 206,
      "tasks inference": 11225,
      "informed decisions": 5323,
      "decisions empirical": 2578,
      "learning potential": 6234,
      "multiagent collaboration": 7605,
      "agents autonomous": 498,
      "spectrum tasks": 10647,
      "propose multiagent": 9080,
      "furthermore delve": 4327,
      "discuss possible": 2975,
      "negative ones": 7774,
      "discuss potential": 2976,
      "research current": 9781,
      "high degree": 4871,
      "facilitating seamless": 3962,
      "evaluations conducted": 3584,
      "average improvement": 982,
      "inspire future": 5370,
      "research focus": 9792,
      "limited knowledge": 6353,
      "significantly human": 10432,
      "human learning": 4979,
      "vast amounts": 12104,
      "humanlevel intelligence": 5003,
      "present comprehensive": 8715,
      "perspective specifically": 8482,
      "propose unified": 9108,
      "science engineering": 10147,
      "agents based": 499,
      "present challenges": 8714,
      "repository relevant": 9722,
      "interaction framework": 5488,
      "emulate human": 3306,
      "human behaviors": 4953,
      "cognitive architecture": 1756,
      "address present": 408,
      "model contains": 7127,
      "experiments indicate": 3782,
      "settings open": 10319,
      "open source": 8035,
      "incomplete information": 5199,
      "language communication": 5760,
      "parameters llms": 8297,
      "language modelbased": 5806,
      "agents handling": 501,
      "language knowledge": 5772,
      "tool use": 11493,
      "crucial component": 2328,
      "methods providing": 7006,
      "offering valuable": 8013,
      "researchers field": 9814,
      "simple tasks": 10467,
      "innovative framework": 5345,
      "generating multiple": 4502,
      "plans agents": 8519,
      "various benchmarks": 12054,
      "generates coherent": 4493,
      "solutions existing": 10546,
      "new perspectives": 7831,
      "tackling complex": 11090,
      "project available": 8955,
      "results various tasks": 9938,
      "fewshot zeroshot learning": 4042,
      "general intelligence large": 4408,
      "intelligence large language": 5473,
      "lack systematic research": 5749,
      "natural language large": 7718,
      "language large language": 5774,
      "models llms enabled": 7410,
      "play crucial role": 8529,
      "applying large language": 725,
      "stateoftheart language models": 10709,
      "claude primarily accessible": 1664,
      "primarily accessible api": 8826,
      "accessible api calls": 207,
      "inspire future research": 5371,
      "demonstrated remarkable potential": 2691,
      "paper present comprehensive": 8245,
      "present comprehensive survey": 8716,
      "future directions field": 4349,
      "settings open source": 10320,
      "natural language communication": 7709,
      "tuning parameters llms": 11701,
      "offering valuable insights": 8014,
      "general intelligence large language": 4409,
      "intelligence large language models": 5474,
      "natural language large language": 7719,
      "language large language models": 5775,
      "language models llms enabled": 5873,
      "applying large language models": 726,
      "stateoftheart language models like": 10710,
      "claude primarily accessible api": 1665,
      "primarily accessible api calls": 8827,
      "llms demonstrated remarkable potential": 6501,
      "paper present comprehensive survey": 8246,
      "general intelligence large language models": 4410,
      "natural language large language models": 7720,
      "language large language models llms": 5776,
      "large language models llms enabled": 6056,
      "claude primarily accessible api calls": 1666,
      "models llms demonstrated remarkable potential": 7405,
      "generation large language models llms": 4545,
      "distributed": 3004,
      "minutes": 7059,
      "dota": 3073,
      "champions": 1508,
      "2019": 38,
      "enormous": 3410,
      "combinations": 1779,
      "pool": 8566,
      "mastered": 6849,
      "treesearch": 11660,
      "skillfully": 10497,
      "actor": 341,
      "biologically": 1210,
      "conjunction": 2065,
      "deterministic": 2820,
      "plausibility": 8523,
      "tradeoff": 11513,
      "solvers": 10557,
      "ushered": 11934,
      "drawbacks": 3090,
      "introduction": 5552,
      "understood": 11788,
      "decreasing": 2592,
      "stochastic": 10759,
      "casts": 1411,
      "theorem": 11443,
      "fit": 4157,
      "damage": 2379,
      "determining": 2819,
      "optimally": 8091,
      "bounded": 1256,
      "satisfied": 10095,
      "cumulative": 2341,
      "transition": 11630,
      "parameterized": 8290,
      "interacts": 5501,
      "compound": 1920,
      "clipping": 1678,
      "exceeds": 3625,
      "envision": 3440,
      "reuse": 9962,
      "replay": 9710,
      "harm": 4813,
      "uniform": 11809,
      "remedy": 9689,
      "475": 71,
      "cpu": 2291,
      "srl": 10668,
      "libraries": 6305,
      "deepmind": 2616,
      "dataflows": 2474,
      "unifies": 11808,
      "optimizations": 8097,
      "massively": 6848,
      "reproduces": 9748,
      "5x": 85,
      "a100": 120,
      "inherently": 5328,
      "connection": 2074,
      "major challenge": 6784,
      "given black": 4631,
      "black box": 1216,
      "learning specifically": 6242,
      "learning deep": 6203,
      "control tasks": 2187,
      "state representation": 10697,
      "policy gradient": 8559,
      "current approaches": 2346,
      "approaches tackling": 776,
      "new generation": 7821,
      "approach introduces": 748,
      "performance theoretically": 8439,
      "theoretically prove": 11447,
      "human players": 4982,
      "handle complex": 4801,
      "challenges current": 1478,
      "survey recent": 11028,
      "real time": 9372,
      "field ai": 4045,
      "key problem": 5635,
      "value function": 12024,
      "especially complex": 3472,
      "different popular": 2897,
      "leading efficient": 6171,
      "efficient learning": 3196,
      "sequence modeling": 10283,
      "gpt series": 4670,
      "power modern": 8650,
      "unlike prior": 11834,
      "benchmarks results": 1145,
      "compared strong": 1855,
      "successfully applied": 10930,
      "joint probability": 5609,
      "making better": 6795,
      "better use": 1184,
      "perform experiments": 8353,
      "hybrid model": 5029,
      "model improves": 7161,
      "better balance": 1175,
      "learning algorithms": 6190,
      "paper envision": 8222,
      "information transfer": 5318,
      "need attention": 7764,
      "learning basic": 6195,
      "paper analyze": 8211,
      "propose uniform": 9109,
      "scalable training": 10105,
      "remedy issue": 9690,
      "experiments results": 3798,
      "hybrid methods": 5028,
      "research recent": 9805,
      "serving rich": 10306,
      "methods achieving": 6976,
      "remarkable improvement": 9675,
      "demonstrating superior": 2705,
      "training single": 11582,
      "process massive": 8889,
      "data train": 2464,
      "largescale training": 6145,
      "implementation details": 5088,
      "single machine": 10485,
      "speedup compared": 10653,
      "design choices": 2745,
      "academic community": 192,
      "llm framework": 6411,
      "design framework": 2749,
      "llms potentially": 6613,
      "encounter difficulties": 3338,
      "tasks common": 11178,
      "approach mitigating": 752,
      "significant computational": 10407,
      "information generated": 5299,
      "compared strong baselines": 1856,
      "making better use": 6796,
      "high success rates": 4877,
      "remedy issue propose": 9691,
      "research recent years": 9806,
      "paper present novel": 8247,
      "significant computational resources": 10408,
      "sphere": 10655,
      "traditionally": 11524,
      "handful": 4799,
      "harmonized": 4816,
      "scrutinize": 10166,
      "designers": 2768,
      "plm": 8541,
      "car": 1395,
      "relevancy": 9626,
      "delta": 2635,
      "55": 81,
      "differs": 2920,
      "record": 9530,
      "briefly": 1280,
      "vice": 12125,
      "versa": 12117,
      "comparably": 1828,
      "half": 4787,
      "resulted": 9873,
      "utmost": 12004,
      "determination": 2816,
      "associations": 864,
      "ingredients": 5326,
      "accelerating": 197,
      "formulas": 4207,
      "emission": 3262,
      "screen": 10162,
      "40000": 66,
      "disciplines": 2956,
      "validates": 12014,
      "18": 32,
      "humidity": 5024,
      "root": 10058,
      "rmse": 10028,
      "literature survey": 6381,
      "proven beneficial": 9145,
      "advance artificial": 441,
      "models applied": 7261,
      "finetuning pretrained": 4140,
      "effective finetuning": 3140,
      "finetuning approaches": 4121,
      "transformerbased models": 11623,
      "approaches directly": 768,
      "tuning techniques": 11703,
      "adapt downstream": 348,
      "tasks effectively": 11192,
      "improve generalization": 5125,
      "processing related": 8910,
      "vice versa": 12126,
      "performs comparably": 8468,
      "substantial progress": 10898,
      "requirements paper": 9763,
      "challenge introduce": 1470,
      "comprehensive instruction": 1943,
      "aims improve": 550,
      "experiments llms": 3787,
      "enhancing large": 3406,
      "improve interpretability": 5127,
      "better accomplish": 1173,
      "challenge conversational": 1469,
      "knowledge enhancement": 5668,
      "recent advancement": 9449,
      "models openais": 7480,
      "gpt4 demonstrates": 4694,
      "dataset achieving": 2476,
      "possibility leveraging": 8604,
      "human supervision": 4988,
      "text recently": 11412,
      "field nlp": 4053,
      "utilize llms": 11989,
      "exploration llms": 3835,
      "specific prompt": 10617,
      "multiple downstream": 7655,
      "prediction tasks": 8693,
      "human intelligence": 4972,
      "advance artificial intelligence": 442,
      "bridge gap paper": 1273,
      "finetuning pretrained models": 4141,
      "remarkable performance gains": 9677,
      "large pretrained model": 6116,
      "models generalization ability": 7339,
      "improve generalization ability": 5126,
      "language processing related": 5969,
      "address challenge introduce": 387,
      "recent advancement large": 9450,
      "llms revolutionized field": 6638,
      "revolutionized field nlp": 9984,
      "multiple downstream tasks": 7656,
      "downstream tasks experimental": 3079,
      "based pretrained language model": 1056,
      "natural language processing related": 7731,
      "recent advancement large language": 9451,
      "models llms revolutionized field": 7445,
      "downstream tasks experimental results": 3080,
      "recent advancement large language models": 9452,
      "language models llms revolutionized field": 5904,
      "program": 8936,
      "acceptance": 200,
      "partially": 8307,
      "apart": 681,
      "triggered": 11669,
      "percentage": 8345,
      "reorder": 9697,
      "candidates": 1336,
      "lyra": 6750,
      "reduction": 9549,
      "ignoring": 5052,
      "functional": 4311,
      "adds": 419,
      "discrimination": 2966,
      "repair": 9698,
      "codex": 1750,
      "fix": 4159,
      "desirable": 2771,
      "derive": 2731,
      "repairing": 9699,
      "detects": 2815,
      "erroneous": 3458,
      "repairs": 9700,
      "40": 64,
      "fatal": 4004,
      "deduction": 2594,
      "bugs": 1300,
      "looking": 6720,
      "trick": 11667,
      "requirement": 9761,
      "87": 107,
      "93": 113,
      "42": 67,
      "app": 686,
      "52": 79,
      "122": 12,
      "equivalence": 3449,
      "estimates": 3486,
      "builtin": 1316,
      "prompttuning": 9044,
      "checking": 1615,
      "nl": 7855,
      "847": 104,
      "120": 11,
      "come": 1786,
      "debugging": 2564,
      "motivation": 7595,
      "participants": 8308,
      "70": 95,
      "33": 59,
      "humanllm": 5011,
      "edited": 3125,
      "dl": 3038,
      "bad": 1011,
      "bottleneck": 1251,
      "fulfill": 4297,
      "facts": 3971,
      "adequate": 421,
      "nuances": 7947,
      "differential": 2918,
      "subtle": 10907,
      "versions": 12121,
      "inferring": 5282,
      "31": 57,
      "resemble": 9818,
      "maximum": 6875,
      "cots": 2272,
      "intuitively": 5556,
      "assurance": 870,
      "heavy": 4844,
      "iterating": 5602,
      "actionable": 330,
      "71": 96,
      "36": 61,
      "specifications": 10642,
      "algorithmic": 562,
      "tracing": 11510,
      "provenance": 9147,
      "scrutiny": 10167,
      "bit": 1214,
      "strings": 10802,
      "respecting": 9833,
      "preserved": 8737,
      "guaranteeing": 4773,
      "begins": 1095,
      "manager": 6808,
      "corrects": 2245,
      "severity": 10326,
      "compiling": 1882,
      "manipulate": 6810,
      "offloading": 8020,
      "bringing": 1283,
      "spent": 10654,
      "repetitive": 9703,
      "away": 1001,
      "burgeoning": 1318,
      "strength": 10792,
      "promptingbased": 9026,
      "toolaugmented": 11494,
      "rest": 9861,
      "coarsetofine": 1697,
      "paves": 8330,
      "day": 2557,
      "advocate": 485,
      "connects": 2076,
      "experienced": 3731,
      "company": 1815,
      "plagiarism": 8510,
      "detrimental": 2821,
      "elaborating": 3208,
      "ethically": 3496,
      "emphasis": 3268,
      "reusable": 9961,
      "chatbased": 1528,
      "humanauthored": 4995,
      "chatgptgenerated": 1607,
      "secure": 10190,
      "devising": 2849,
      "attribution": 908,
      "rephrase": 9704,
      "unannotated": 11731,
      "corrupted": 2255,
      "trains": 11590,
      "expansions": 3723,
      "continue": 2159,
      "grow": 4765,
      "preparation": 8709,
      "summarizes": 10967,
      "wireless": 12233,
      "nuanced": 7946,
      "consultation": 2119,
      "started": 10691,
      "engages": 3370,
      "breaking": 1267,
      "validating": 12015,
      "resolution": 9819,
      "alleviates": 590,
      "unveils": 11865,
      "hints": 4922,
      "characterize": 1519,
      "mitigated": 7073,
      "runtime": 10076,
      "uncovered": 11743,
      "eda": 3120,
      "67b": 92,
      "plugins": 8549,
      "trust": 11678,
      "concrete": 2010,
      "stack": 10672,
      "overflow": 8187,
      "decade": 2566,
      "chatgpt4": 1605,
      "chatgpt35": 1603,
      "evident": 3599,
      "programaided": 8937,
      "backbones": 1003,
      "consumed": 2120,
      "llmintegrated": 6439,
      "attackers": 880,
      "smart": 10520,
      "blockchain": 1229,
      "week": 12194,
      "hour": 4942,
      "62": 88,
      "applicationspecific": 715,
      "decide": 2570,
      "30k": 56,
      "reproducing": 9749,
      "compatibility": 1866,
      "mitigation": 7077,
      "unintended": 11814,
      "gpt35turbo": 4686,
      "minimizes": 7052,
      "formidable": 4201,
      "granularities": 4727,
      "sampled": 10087,
      "multiperspective": 7648,
      "selfconsistency": 10217,
      "frequent": 4286,
      "generation benchmark": 4518,
      "datasets significant": 2549,
      "programming language": 8939,
      "methods support": 7014,
      "multiple models": 7657,
      "tasks introduce": 11227,
      "models best": 7268,
      "various models": 12078,
      "exact matching": 3609,
      "provides new": 9177,
      "current mainstream": 2354,
      "time paper": 11474,
      "different previous": 2901,
      "optimal model": 8089,
      "automatically generating": 955,
      "computational linguistics": 1973,
      "software engineering": 10536,
      "approaches model": 774,
      "models largescale": 7376,
      "programs paper": 8942,
      "experiments code": 3767,
      "tasks demonstrate": 11186,
      "comparing stateoftheart": 1861,
      "programs programs": 8943,
      "automatically generated": 954,
      "fix patterns": 4160,
      "data future": 2414,
      "testing repairing": 11378,
      "unstructured text": 11854,
      "blackbox settings": 1222,
      "set novel": 10310,
      "additionally framework": 382,
      "public benchmark": 9200,
      "leverage existing": 6275,
      "zeroshot setting": 12326,
      "models important": 7354,
      "model robustness": 7210,
      "widely applied": 12217,
      "consists components": 2101,
      "original input": 8119,
      "generation learning": 4546,
      "important research": 5102,
      "generation different": 4529,
      "pretraining finetuning": 8781,
      "finetuning paradigm": 4138,
      "academia industry": 190,
      "existing benchmarks": 3680,
      "proposed including": 9118,
      "assess models": 836,
      "models compared": 7280,
      "assess performance": 837,
      "approaches proposed": 775,
      "trained scratch": 11537,
      "efficiency model": 3187,
      "making difficult": 6799,
      "existing deep": 3683,
      "surpassing stateoftheart": 11019,
      "stateoftheart baseline": 10704,
      "respectively approach": 9837,
      "trained models": 11536,
      "research paper": 9802,
      "debugging techniques": 2565,
      "critical issue": 2311,
      "existing techniques": 3712,
      "results existing": 9901,
      "propose automated": 9057,
      "test prompts": 11368,
      "prompts large": 9033,
      "models automatically": 7262,
      "efficient accurate": 3191,
      "empirical analysis": 3274,
      "desired task": 2775,
      "make choice": 6790,
      "typically trained": 11723,
      "trained large": 11535,
      "ability make": 155,
      "tasks average": 11167,
      "llms complex": 6480,
      "tasks challenging": 11170,
      "challenging involving": 1498,
      "generates responses": 4494,
      "responses following": 9851,
      "controllable generation": 2190,
      "gap humans": 4378,
      "humans llms": 5019,
      "utilization llms": 11987,
      "study prompt": 10862,
      "learning program": 6237,
      "learning dl": 6206,
      "far satisfactory": 3999,
      "models fewshot": 7329,
      "long time": 6708,
      "used pretraining": 11903,
      "pretraining process": 8794,
      "pretraining experiments": 8780,
      "light future": 6316,
      "oracle detect": 8106,
      "chatgpt stateoftheart": 1599,
      "study shows": 10869,
      "shows chatgpt": 10391,
      "possible reason": 8606,
      "evaluate approach": 3502,
      "models encounter": 7313,
      "using tools": 11976,
      "method using": 6969,
      "model automatically": 7109,
      "relatively small": 9616,
      "current best": 2348,
      "poor accuracy": 8568,
      "llms improve": 6560,
      "perform extensive": 8354,
      "directly generating": 2949,
      "llms approach": 6458,
      "parameter sizes": 8281,
      "superior accuracy": 10973,
      "evaluating improving": 3524,
      "exhibit low": 3659,
      "work shown": 12266,
      "user study": 11919,
      "study systematically": 10870,
      "systematically investigate": 11054,
      "issues including": 5594,
      "chatgpt resemble": 1593,
      "chatgpt promising": 1589,
      "demonstrates effectiveness": 2694,
      "fundamental aspect": 4317,
      "analysis provides": 625,
      "cot prompting": 2268,
      "language reasoning": 5974,
      "designed natural": 2763,
      "propose structured": 9104,
      "compared cot": 1844,
      "generation apply": 4515,
      "prompting llms": 9017,
      "substantial improvements": 10893,
      "evaluation platform": 3573,
      "llm era": 6407,
      "little work": 6384,
      "evaluating capability": 3522,
      "benchmark based": 1111,
      "provide better": 9151,
      "facilitate development": 3953,
      "daily life": 2377,
      "growing using": 4770,
      "generating humanlike": 4501,
      "need effective": 7766,
      "chatgpt natural": 1580,
      "approaches based": 767,
      "metrics chatgpt": 7024,
      "llms serve": 6642,
      "solve issue": 10551,
      "contexts introduce": 2153,
      "outperforms sota": 8159,
      "summarization techniques": 10963,
      "chatgpt popular": 1588,
      "attracted wide": 899,
      "wide attention": 12207,
      "engineering community": 3375,
      "specifically explore": 10630,
      "chatgpt generate": 1566,
      "metrics including": 7028,
      "significantly worse": 10452,
      "findings outline": 4093,
      "hardware design": 4811,
      "design large": 2750,
      "chatgpt exhibited": 1558,
      "shows great": 10392,
      "potential hardware": 8626,
      "described natural": 2735,
      "bias problem": 1188,
      "code prompts": 1728,
      "results pretrained": 9922,
      "examples potentially": 3621,
      "llms proficient": 6618,
      "data flow": 2412,
      "data processing": 2443,
      "user requests": 11917,
      "language task": 5977,
      "automated evaluation": 934,
      "wide margin": 12208,
      "novel evaluation": 7920,
      "thinking capabilities": 11452,
      "human problemsolving": 4985,
      "problemsolving abilities": 8877,
      "framework large": 4264,
      "generation pretrained": 4562,
      "data various": 2468,
      "various methods": 12077,
      "retrieved knowledge": 9955,
      "empirical experiments": 3276,
      "baselines significant": 1075,
      "promptingbased methods": 9027,
      "advanced models": 450,
      "models realworld": 7508,
      "fully evaluate": 4304,
      "able achieve": 174,
      "impressive results": 5116,
      "results complex": 9887,
      "new way": 7847,
      "billions data": 1203,
      "sources end": 10579,
      "raw data": 9360,
      "privacy data": 8845,
      "key elements": 5630,
      "ethical principles": 3495,
      "matrix multiplication": 6870,
      "applied classification": 717,
      "model models": 7184,
      "exceptional performance": 3637,
      "llms substantial": 6664,
      "emergence foundation": 3247,
      "chatbots chatgpt": 1532,
      "ai services": 521,
      "apis like": 685,
      "propose concept": 9062,
      "ai chain": 514,
      "chains prompt": 1466,
      "feature set": 4013,
      "ablation experiments": 168,
      "extensive dataset": 3887,
      "binary classification": 1207,
      "translation task": 11641,
      "given query": 4638,
      "requires large": 9767,
      "does rely": 3048,
      "modeling task": 7248,
      "new pretraining": 7833,
      "content gaps": 2135,
      "unsupervised baselines": 11856,
      "baselines significantly": 1076,
      "compared supervised": 1857,
      "transformerbased large": 11619,
      "llms applications": 6456,
      "development process": 2843,
      "llms perspectives": 6609,
      "garnered significant": 4389,
      "studies demonstrated": 10840,
      "demonstrated ability": 2671,
      "role llms": 10052,
      "signal processing": 10397,
      "researchers developers": 9812,
      "solve certain": 10548,
      "llms generalization": 6544,
      "decisionmaking processes": 2576,
      "advancements deep": 458,
      "remarkable efficacy": 9674,
      "potential vulnerabilities": 8640,
      "llms realm": 6626,
      "zeroshot approaches": 12311,
      "enabling language": 3316,
      "example prompts": 3617,
      "human annotations": 4950,
      "exact match": 3608,
      "using examples": 11944,
      "influence effectiveness": 5285,
      "language time": 5982,
      "programs contain": 8941,
      "experiments suggest": 3803,
      "current limitations": 2352,
      "complex set": 1904,
      "diverse requirements": 3025,
      "compared gpt4": 1846,
      "models parameterefficient": 7486,
      "models frequently": 7336,
      "demand extensive": 2638,
      "llama base": 6388,
      "parameters limited": 8296,
      "experiments provide": 3792,
      "components including": 1914,
      "input representation": 5354,
      "performance tasks": 8436,
      "generation reasoning": 4571,
      "chatgpt extensively": 1561,
      "research application": 9774,
      "effectively handle": 3154,
      "related literature": 9602,
      "tasks hoping": 11219,
      "help researchers": 4849,
      "researchers better": 9810,
      "reveal performance": 9967,
      "llms various": 6678,
      "received considerable": 9445,
      "considerable attention": 2083,
      "characteristics llms": 1518,
      "study performance": 10859,
      "different prompt": 2902,
      "multiround dialogue": 7669,
      "generation systems": 4580,
      "instructions code": 5431,
      "despite advancements": 2779,
      "general texttotext": 4419,
      "novel technique": 7935,
      "stack overflow": 10673,
      "chatgpt enhancing": 1554,
      "survey participants": 11027,
      "presents indepth": 8732,
      "chatgpt35 chatgpt4": 1604,
      "improve chatgpt": 5121,
      "chatgpt models": 1579,
      "effective methods": 3143,
      "complex reasoning": 1901,
      "understood llms": 11789,
      "reasoning code": 9417,
      "approach code": 734,
      "new programming": 7834,
      "primarily focused": 8828,
      "models backbones": 7264,
      "build models": 1305,
      "investigate performance": 5563,
      "fewshot scenarios": 4038,
      "indicate model": 5242,
      "different backbones": 2876,
      "demonstrate better": 2647,
      "dataset fewshot": 2497,
      "provide new": 9159,
      "datasets respectively": 2546,
      "transformerbased pretrained": 11624,
      "results code": 9885,
      "generation existing": 4530,
      "better decoding": 1176,
      "allowing llms": 595,
      "years large": 12293,
      "great challenge": 4747,
      "specifically focusing": 10632,
      "bleu scores": 1226,
      "insights potential": 5367,
      "generated models": 4483,
      "lines code": 6367,
      "generating code": 4496,
      "approach efficiently": 738,
      "efficiently effectively": 3202,
      "studies investigated": 10843,
      "error propagation": 3463,
      "approach new": 753,
      "tasks application": 11165,
      "prediction accuracy": 8690,
      "furthermore explore": 4333,
      "adaptability various": 351,
      "higher levels": 4882,
      "llms automatic": 6463,
      "models play": 7490,
      "framework conduct": 4244,
      "findings reveal": 4096,
      "low level": 6730,
      "gpt35turbo gpt4": 4687,
      "tasks experiments": 11206,
      "outperforms models": 8156,
      "change model": 1511,
      "results automatic": 9879,
      "assist llms": 854,
      "llms achieves": 6448,
      "formidable challenge": 4202,
      "challenge llms": 1471,
      "multiple outputs": 7660,
      "multiple perspectives": 7661,
      "multiple diverse": 7653,
      "evaluation code": 3547,
      "specific generation": 10612,
      "tasks stateoftheart": 11279,
      "addressed current": 414,
      "models generalize": 7340,
      "language tasks paper": 5979,
      "results proposed method": 9925,
      "improve quality generated": 5136,
      "language models largescale": 5847,
      "language model finetuning": 5786,
      "code generation tasks": 1719,
      "generation tasks demonstrate": 4583,
      "code generation models": 1716,
      "pretraining finetuning paradigm": 8782,
      "paper propose benchmark": 8252,
      "models trained scratch": 7546,
      "deep learning based": 2598,
      "prompts large language": 9034,
      "language models automatically": 5812,
      "deep learning dl": 2599,
      "language models fewshot": 5834,
      "shed light future": 10339,
      "method using chatgpt": 6970,
      "code generation process": 1717,
      "language models code": 5818,
      "llms chatgpt shown": 6477,
      "chainofthought cot prompting": 1460,
      "natural language reasoning": 7736,
      "designed natural language": 2764,
      "generation paper propose": 4559,
      "chatgpt natural language": 1581,
      "llms shown remarkable": 6654,
      "attracted wide attention": 900,
      "software engineering community": 10537,
      "hardware design large": 4812,
      "like chatgpt exhibited": 6326,
      "described natural language": 2736,
      "framework large language": 4265,
      "language models realworld": 5937,
      "demonstrated exceptional performance": 2674,
      "emergence foundation models": 3248,
      "machine translation task": 6761,
      "language modeling task": 5808,
      "paper provides comprehensive": 8263,
      "transformerbased large language": 11620,
      "llms garnered significant": 6542,
      "garnered significant attention": 4390,
      "recent advancements deep": 9454,
      "advancements deep learning": 459,
      "enabling language models": 3317,
      "factors influence effectiveness": 3970,
      "study provides valuable": 10866,
      "superior performance compared": 10976,
      "language models parameterefficient": 5930,
      "publicly available datasets": 9211,
      "llama base model": 6389,
      "tasks text generation": 11293,
      "tasks code generation": 11174,
      "help researchers better": 4850,
      "received considerable attention": 9446,
      "propose novel technique": 9094,
      "results demonstrate effectiveness": 9893,
      "gap paper presents": 4382,
      "paper presents indepth": 8249,
      "reasoning capabilities large": 9410,
      "complex reasoning tasks": 1902,
      "proposed approach code": 9113,
      "new programming language": 7835,
      "pretrained models backbones": 8764,
      "llms different sizes": 6506,
      "recent years large": 9486,
      "years large language": 12294,
      "paper conduct empirical": 8216,
      "conduct empirical study": 2025,
      "llms shown promising": 6653,
      "study propose novel": 10864,
      "enhancing large language": 3407,
      "adequately addressed current": 424,
      "experimental results proposed method": 3755,
      "prompts large language models": 9035,
      "large language models fewshot": 6038,
      "models llms chatgpt shown": 7396,
      "chatgpt natural language understanding": 1582,
      "large language models code": 6025,
      "models llms shown remarkable": 7456,
      "llms like chatgpt exhibited": 6582,
      "framework large language models": 4266,
      "large language models realworld": 6093,
      "masked language modeling task": 6840,
      "transformerbased large language models": 11621,
      "models llms garnered significant": 7421,
      "llms garnered significant attention": 6543,
      "recent advancements deep learning": 9455,
      "study provides valuable insights": 10867,
      "large language models parameterefficient": 6090,
      "reasoning capabilities large language": 9411,
      "recent years large language": 9487,
      "years large language models": 12295,
      "models llms shown promising": 7455,
      "extensive experimental results demonstrate": 3892,
      "enhancing large language models": 3408,
      "language models llms chatgpt shown": 5861,
      "based large language models llms": 1046,
      "language models llms shown remarkable": 5914,
      "models llms like chatgpt exhibited": 7430,
      "transformerbased large language models llms": 11622,
      "language models llms garnered significant": 5884,
      "models llms garnered significant attention": 7422,
      "reasoning capabilities large language models": 9412,
      "recent years large language models": 9488,
      "language models llms shown promising": 5913,
      "recorded": 9531,
      "generaldomain": 4420,
      "llamabased": 6398,
      "national": 7705,
      "simplified": 10470,
      "distilled": 2995,
      "rlaif": 10026,
      "reinforced": 9590,
      "drastic": 3086,
      "check": 1614,
      "span": 10583,
      "resourceconstrained": 9824,
      "qlora": 9231,
      "nvidia": 7965,
      "truthful": 11682,
      "probing": 8858,
      "eliminate": 3219,
      "potent": 8614,
      "ner": 7781,
      "rapid growth": 9342,
      "particular propose": 8311,
      "fuse multiple": 4340,
      "finetuning chinese": 4122,
      "explicitly trained": 3828,
      "training deploying": 11549,
      "llama model": 6390,
      "biomedical domain": 1213,
      "llamabased model": 6399,
      "knowledge paper": 5695,
      "process adapting": 8882,
      "knowledge injection": 5680,
      "dataset encompasses": 2496,
      "comprising total": 1959,
      "various public": 12089,
      "13 billion": 14,
      "knowledge enhanced": 5667,
      "model generative": 7154,
      "paper evaluate": 8223,
      "knowledge enabling": 5665,
      "learning using": 6248,
      "effective retrieval": 3145,
      "background knowledge": 1007,
      "guide inference": 4781,
      "questions answered": 9289,
      "average score": 985,
      "chatgpt serve": 1595,
      "benchmark chinese": 1113,
      "llms ability": 6441,
      "answer given": 658,
      "generating rationales": 4508,
      "qa datasets": 9227,
      "limitations current": 6342,
      "current llms": 2353,
      "reasoning experiment": 9420,
      "different preferences": 2898,
      "presents significant": 8734,
      "safety trustworthiness": 10083,
      "attention work": 895,
      "bring following": 1282,
      "learning ai": 6187,
      "ai feedback": 515,
      "evaluation scheme": 3577,
      "manual metrics": 6822,
      "teacher model": 11318,
      "modern llms": 7570,
      "gpt4 struggle": 4700,
      "struggle issues": 10826,
      "issues regarding": 5599,
      "framework using": 4278,
      "extraction tasks": 3932,
      "short text": 10351,
      "resourceconstrained scenarios": 9825,
      "models exhibited": 7321,
      "exhibited exceptional": 3662,
      "tasks leveraging": 11241,
      "introduce comprehensive": 5537,
      "datasets employ": 2527,
      "scenarios extensive": 10127,
      "traditional chinese": 11517,
      "research domain": 9788,
      "require llms": 9756,
      "tasks benchmark": 11169,
      "finetuning training": 4152,
      "proposed benchmark": 9115,
      "leverages structured": 6292,
      "bases llms": 1085,
      "compared vanilla": 1859,
      "offer new": 8008,
      "adaptation llms": 357,
      "studies focused": 10842,
      "work introduces": 12255,
      "llms tend": 6668,
      "pretraining phase": 8793,
      "interactive scenarios": 5499,
      "performance nlp": 8416,
      "recognition ner": 9512,
      "span extraction": 10584,
      "llms including chatgpt": 6563,
      "language model specifically": 5803,
      "foundation language model": 4220,
      "generative pretraining model": 4619,
      "exceptional performance various": 3638,
      "simple effective retrieval": 10461,
      "different llms different": 2889,
      "learning ai feedback": 6188,
      "automatic manual metrics": 942,
      "evaluation human evaluation": 3558,
      "struggle issues regarding": 10827,
      "experimental results method": 3748,
      "information extraction tasks": 5298,
      "models exhibited exceptional": 7322,
      "exhibited exceptional performance": 3663,
      "comprehensive evaluation framework": 1934,
      "chatgpt shown remarkable": 1597,
      "llms automatic evaluation": 6464,
      "improve llms performance": 5129,
      "pose potential risks": 8586,
      "knowledge bases llms": 5659,
      "experimental results llms": 3747,
      "performance nlp tasks": 8417,
      "entity recognition ner": 3428,
      "capabilities natural language understanding": 1353,
      "models like chatgpt demonstrated": 7382,
      "demonstrated exceptional performance various": 2675,
      "exceptional performance various natural": 3639,
      "models exhibited exceptional performance": 7323,
      "experimental results demonstrate effectiveness": 3741,
      "named entity recognition ner": 7695,
      "remarkable capabilities natural language understanding": 9672,
      "demonstrated exceptional performance various natural": 2676,
      "exceptional performance various natural language": 3640,
      "research large language models llms": 9799,
      "generative large language models llms": 4600,
      "application large language models llms": 699,
      "inspiring": 5384,
      "entityrelation": 3432,
      "triple": 11671,
      "invariance": 5559,
      "provably": 9140,
      "schemas": 10141,
      "validity": 12017,
      "unlocked": 11838,
      "instructive": 5447,
      "revisiting": 9978,
      "spans": 10587,
      "push": 9221,
      "flant5": 4162,
      "recast": 9443,
      "wellaligned": 12199,
      "codestyle": 1748,
      "occurrence": 8005,
      "toolkit": 11495,
      "entitycentric": 3431,
      "toolkits": 11496,
      "823": 103,
      "secondary": 10186,
      "bottlenecks": 1252,
      "university": 11823,
      "text challenging": 11382,
      "data labeling": 2428,
      "explore promptbased": 3846,
      "methods work": 7020,
      "directly prompting": 2950,
      "learning algorithm": 6189,
      "fundamental task": 4319,
      "involves identifying": 5577,
      "extracting information": 3926,
      "tasks simple": 11277,
      "used complex": 11898,
      "conducted series": 2045,
      "text paper": 11406,
      "relations directly": 9606,
      "directly extracted": 2947,
      "unified text": 11804,
      "fields natural": 4056,
      "require specialized": 9758,
      "professional knowledge": 8928,
      "languages knowledge": 5997,
      "firstly propose": 4156,
      "propose generative": 9069,
      "framework generative": 4258,
      "models unlocked": 7552,
      "unlocked strong": 11839,
      "f1 score": 3943,
      "uniformly model": 11811,
      "enhance fewshot": 3388,
      "fewshot performance": 4035,
      "achieve performance": 258,
      "nlp task": 7871,
      "standard supervised": 10684,
      "sota results": 10569,
      "pretrained massive": 8761,
      "learning ability": 6185,
      "tasks particular": 11255,
      "tasks experiment": 11202,
      "seven benchmarks": 10324,
      "outperforms finetuning": 8155,
      "models specially": 7530,
      "capabilities paper": 1354,
      "existing toolkits": 3713,
      "efficiency stability": 3188,
      "semantic parsing": 10238,
      "subtasks approach": 10906,
      "architecture different": 791,
      "downstream nlp": 3076,
      "tasks parameter": 11254,
      "aim explore": 538,
      "popular large": 8573,
      "generate prompts": 4463,
      "directly prompting llms": 2951,
      "models limited resources": 7386,
      "foundation models like": 4228,
      "fundamental task natural": 4320,
      "text challenging task": 11383,
      "relations directly extracted": 9607,
      "fields natural language": 4057,
      "information extraction large": 5295,
      "extraction large language": 3930,
      "language models unlocked": 5949,
      "models unlocked strong": 7553,
      "performance paper propose": 8421,
      "demonstrate method achieves": 2660,
      "comparable performance bert": 1824,
      "text paper propose": 11407,
      "various downstream nlp": 12062,
      "downstream nlp tasks": 3077,
      "language models zeroshot": 5956,
      "popular large language": 8574,
      "foundation models like chatgpt": 4229,
      "demonstrated remarkable performance various": 2690,
      "remarkable performance various tasks": 9681,
      "fundamental task natural language": 4321,
      "fields natural language processing": 4058,
      "information extraction large language": 5296,
      "extraction large language models": 3931,
      "large language models unlocked": 6099,
      "language models unlocked strong": 5950,
      "experimental results demonstrate method": 3742,
      "various downstream nlp tasks": 12063,
      "large language models zeroshot": 6103,
      "popular large language model": 8575,
      "fundamental task natural language processing": 4322,
      "information extraction large language models": 5297,
      "large language models unlocked strong": 6100,
      "malicious": 6804,
      "dissemination": 2989,
      "expose": 3866,
      "did": 2870,
      "say": 10100,
      "violation": 12133,
      "uncovers": 11745,
      "llama13b": 6394,
      "ecosystem": 3119,
      "primitive": 8831,
      "inevitable": 5267,
      "chance": 1509,
      "extreme": 3938,
      "lifecycle": 6310,
      "regulations": 9589,
      "theft": 11442,
      "topk": 11505,
      "compromise": 1960,
      "impacting": 5082,
      "replacements": 9708,
      "semanticlevel": 10251,
      "bypass": 1320,
      "payloads": 8334,
      "arabic": 785,
      "hate": 4828,
      "formal": 4196,
      "analyzer": 635,
      "desktop": 2776,
      "missed": 7064,
      "discussing": 2980,
      "ahead": 507,
      "explaining": 3820,
      "experimentally": 3760,
      "conflicting": 2058,
      "right": 10011,
      "upper": 11871,
      "bound": 1253,
      "adaptivity": 367,
      "assessment chinese": 846,
      "assessment benchmark": 845,
      "generated responses": 4485,
      "llms strong": 6662,
      "openai gpt": 8037,
      "test llms": 11367,
      "task automatically": 11115,
      "popular llms": 8576,
      "llms empirical": 6514,
      "llms brought": 6470,
      "brought significant": 1296,
      "widespread deployment": 12228,
      "conduct preliminary": 2034,
      "mainstream llms": 6776,
      "chatgpt capable": 1541,
      "llms raises": 6625,
      "raises concerns": 9309,
      "knowledge domains": 5663,
      "evaluate capabilities": 3503,
      "challenging benchmark": 1496,
      "encourage llms": 3343,
      "like previous": 6334,
      "llms accurately": 6443,
      "study investigate": 10856,
      "require model": 9757,
      "methods benchmarking": 6977,
      "types datasets": 11718,
      "best performance": 1167,
      "tasks requiring": 11274,
      "evaluating text": 3536,
      "models considerable": 7284,
      "compromise models": 1961,
      "tasks previous": 11258,
      "previous benchmarks": 8807,
      "robustness paper": 10049,
      "introduce latent": 5541,
      "instruction embedding": 5402,
      "harmful content": 4815,
      "content consequently": 2132,
      "provide technical": 9167,
      "languages english": 5995,
      "multiple choice": 7651,
      "llms increasing": 6566,
      "essential task": 3479,
      "performance advantage": 8362,
      "significant room": 10419,
      "foster development": 4215,
      "evaluated language": 3519,
      "cases addition": 1408,
      "years witnessed": 12297,
      "wide variety": 12214,
      "benchmarks evaluation": 1138,
      "lack interpretability": 5744,
      "propose possible": 9096,
      "systems compared": 11058,
      "information realworld": 5312,
      "end establish": 3348,
      "experiments seven": 3800,
      "detailed instructions": 2795,
      "invalid responses": 5558,
      "llms specific": 6659,
      "upper bound": 11872,
      "popular llms chatgpt": 8577,
      "models llms brought": 7392,
      "llms brought significant": 6471,
      "deep learning models": 2600,
      "recent years witnessed": 9489,
      "baseline methods including": 1068,
      "methods including large": 6994,
      "language models llms brought": 5857,
      "models llms brought significant": 7393,
      "large language models multiple": 6087,
      "methods including large language": 6995,
      "large language models llms brought": 6049,
      "language models llms brought significant": 5858,
      "methods including large language models": 6996,
      "sum": 10955,
      "222": 45,
      "simulates": 10474,
      "exemplars": 3653,
      "pruning": 9194,
      "textdavinci003": 11420,
      "92": 111,
      "rectify": 9538,
      "federated": 4019,
      "asked": 822,
      "crowdsourced": 2324,
      "factuality": 3977,
      "postediting": 8609,
      "varies": 12038,
      "langauge": 5757,
      "initiative": 5337,
      "condensed": 2013,
      "clearly": 1671,
      "activations": 334,
      "clarification": 1644,
      "initialize": 5335,
      "hotpotqa": 4936,
      "choosing": 1639,
      "parallelly": 8278,
      "composing": 1918,
      "approximating": 782,
      "outlines": 8127,
      "triggers": 11670,
      "debate": 2562,
      "stance": 10679,
      "chainofknowledge": 1456,
      "cok": 1764,
      "controlling": 2194,
      "reallife": 9382,
      "acceptable": 199,
      "constant": 2105,
      "setup": 10321,
      "promoting": 8983,
      "kbqa": 5622,
      "webqsp": 12189,
      "categorizing": 1419,
      "cumbersome": 2340,
      "mathematics": 6867,
      "socratic": 10531,
      "structuring": 10823,
      "peer": 8335,
      "triplet": 11673,
      "llama2": 6395,
      "115": 9,
      "protoqa": 9136,
      "512": 78,
      "adjustment": 431,
      "data existing": 2408,
      "t5 bart": 11070,
      "demonstrated stateoftheart": 2692,
      "multiple benchmarks": 7650,
      "prompting cot": 9010,
      "tasks gpt3": 11218,
      "requires manual": 9768,
      "systems propose": 11065,
      "different existing": 2881,
      "effectively utilize": 3165,
      "gap compared": 4377,
      "prompting chainofthought": 9008,
      "models increasing": 7362,
      "scale large": 10109,
      "cot reasoning": 2270,
      "purpose propose": 9218,
      "propose solution": 9102,
      "challenges realworld": 1491,
      "labeled training": 5731,
      "creates barriers": 2301,
      "general tasks": 4418,
      "selects optimal": 10213,
      "optimal combination": 8088,
      "models knowledge": 7369,
      "zeroshot commonsense": 12313,
      "models experiments": 7324,
      "experiments commonsense": 3768,
      "ability methods": 156,
      "new prompting": 7836,
      "correct answers": 2235,
      "used guide": 11900,
      "encouraging results": 3345,
      "llms experiments": 6531,
      "difficulty introduce": 2925,
      "questions accompanied": 9288,
      "chainofthought reasoning": 1464,
      "focuses typical": 4180,
      "propose improve": 9071,
      "methods significantly": 7011,
      "answering task": 675,
      "task finetuning": 11129,
      "smaller models": 10515,
      "additionally introduce": 383,
      "question answer": 9266,
      "tasks tackle": 11287,
      "llms despite": 6502,
      "proposed prompting": 9123,
      "margin comparable": 6830,
      "performance varies": 8443,
      "varies substantially": 12039,
      "significantly reduces": 10449,
      "approach solving": 764,
      "approach construct": 736,
      "based collected": 1027,
      "conducted types": 2046,
      "current popular": 2358,
      "additional training": 380,
      "reduces number": 9547,
      "achieves remarkable": 296,
      "zeroshot methods": 12318,
      "comparable gpt35": 1820,
      "conversational systems": 2205,
      "impressive capabilities": 5109,
      "work conduct": 12249,
      "challenges extensive": 1480,
      "practical application": 8663,
      "capability tackle": 1373,
      "llms obtain": 6601,
      "small models": 10510,
      "higher training": 4885,
      "multiturn conversations": 7685,
      "shown effectiveness": 10375,
      "tasks achieving": 11161,
      "model selection": 7214,
      "best worlds": 1171,
      "model reasoning": 7208,
      "approach shows": 760,
      "models problem": 7497,
      "shed new": 10340,
      "new light": 7825,
      "rationales answers": 9356,
      "process prompting": 8895,
      "making convenient": 6798,
      "showcases impressive": 10364,
      "robustness evaluation": 10046,
      "perform significantly": 8357,
      "leveraging incontext": 6297,
      "new knowledge": 7823,
      "approximating different": 783,
      "paper outlines": 8243,
      "common effective": 1793,
      "model accuracy": 7100,
      "experimental outcomes": 3738,
      "available github": 974,
      "llms nlp": 6599,
      "framework generating": 4257,
      "experiments widelyused": 3813,
      "divergent thinking": 3010,
      "performance general": 8390,
      "framework multiple": 4269,
      "framework extensive": 4250,
      "extensive analyses": 3884,
      "improving large": 5158,
      "answers based": 678,
      "new approach": 7807,
      "augmenting llms": 925,
      "memory large": 6914,
      "conventional neural": 2198,
      "paper seek": 8266,
      "synthetic dataset": 11045,
      "chainofknowledge cok": 1457,
      "answering complex": 665,
      "analysis model": 624,
      "datasets tend": 2552,
      "development language": 2837,
      "ability humans": 145,
      "ability language": 147,
      "order explore": 8108,
      "humans language": 5018,
      "ability paper": 159,
      "human performance": 4981,
      "proven effective": 9146,
      "aims provide": 552,
      "health counseling": 4836,
      "strategies tailored": 10782,
      "humanlike responses": 5009,
      "manual evaluations": 6820,
      "tasks exploring": 11208,
      "generate answers": 4440,
      "sota methods": 10566,
      "pretraining data": 8774,
      "improve accuracy": 5119,
      "llms evaluation": 6522,
      "integrate information": 5456,
      "llms knowledgeintensive": 6575,
      "knowledgeintensive question": 5716,
      "tasks kbqa": 11228,
      "outperforms vanilla": 8164,
      "advantages proposed": 476,
      "potential limitations": 8631,
      "data generating": 2417,
      "positive negative": 8598,
      "negative responses": 7775,
      "involving gpt4": 5580,
      "journey ahead": 5614,
      "augmentation large": 914,
      "multiple sources": 7662,
      "improving model": 5161,
      "augmentation method": 917,
      "nlu nlg": 7880,
      "reasoning language": 9422,
      "challenging issue": 1499,
      "llms approaches": 6459,
      "causal language": 1424,
      "underscore effectiveness": 11751,
      "effectiveness generality": 3171,
      "applicable different": 692,
      "task gap": 11130,
      "training explore": 11554,
      "explore possibility": 3843,
      "statistical information": 10739,
      "potential unified": 8637,
      "finetuned language": 4112,
      "extra knowledge": 3919,
      "results popular": 9920,
      "llms significant": 6657,
      "slightly better": 10500,
      "training code": 11541,
      "models good": 7344,
      "traditional finetuning": 11518,
      "models tailored": 7541,
      "previous sota": 8814,
      "nlp community": 7863,
      "llms present": 6615,
      "overall accuracy": 8172,
      "achieved stateoftheart": 275,
      "focus llms": 4177,
      "heavily rely": 4843,
      "applied different": 718,
      "largescale models": 6139,
      "inference training": 5277,
      "empirical evaluations": 3275,
      "alignment tasks": 581,
      "prompting cot prompting": 9011,
      "scale large language": 10110,
      "prompting chainofthought cot": 9009,
      "chainofthought cot reasoning": 1461,
      "significantly improves performance": 10437,
      "improves performance llms": 5151,
      "different tasks paper": 2910,
      "labeled training data": 5732,
      "language models knowledge": 5843,
      "using chatgpt gpt4": 11940,
      "language models performance": 5932,
      "shown remarkable performance": 10387,
      "question answering task": 9274,
      "new stateoftheart performance": 7839,
      "performance varies substantially": 8444,
      "novel method called": 7927,
      "achieves remarkable performance": 297,
      "performance comparable gpt35": 8373,
      "tackle issues propose": 11086,
      "approach shows significant": 761,
      "shed new light": 10341,
      "models llms nlp": 7436,
      "llms nlp tasks": 6600,
      "performance general language": 8391,
      "general language tasks": 4413,
      "improving large language": 5159,
      "memory large language": 6915,
      "method improve performance": 6954,
      "development language models": 2838,
      "ability language models": 148,
      "mental health counseling": 6921,
      "tackle challenge propose": 11082,
      "previous stateoftheart methods": 8817,
      "propose novel evaluation": 9086,
      "augmentation large language": 915,
      "neural language models": 7800,
      "nlu nlg tasks": 7881,
      "reasoning language models": 9423,
      "causal language models": 1425,
      "finetuned language model": 4113,
      "experimental results popular": 3752,
      "results popular benchmarks": 9921,
      "previous sota models": 8815,
      "models llms present": 7438,
      "stateoftheart sota performance": 10733,
      "achieves new stateoftheart results": 293,
      "llms shown remarkable performance": 6655,
      "shown remarkable performance various": 10388,
      "achieves new stateoftheart performance": 292,
      "propose novel method called": 9092,
      "achieve new stateoftheart results": 256,
      "large language models incontext": 6041,
      "language models llms nlp": 5895,
      "performance general language tasks": 8392,
      "memory large language models": 6916,
      "natural language processing models": 7725,
      "augmentation large language models": 916,
      "experimental results popular benchmarks": 3753,
      "language models llms present": 5897,
      "performance large language models llms": 8406,
      "models llms shown remarkable performance": 7457,
      "llms shown remarkable performance various": 6656,
      "prompting large language models llms": 9016,
      "large language models llms nlp": 6071,
      "large language models llms present": 6073,
      "semisupervised": 10255,
      "inherits": 5331,
      "cope": 2225,
      "kb": 5621,
      "analogous": 613,
      "longform": 6710,
      "facto": 3966,
      "searches": 10182,
      "sparql": 10590,
      "judged": 5616,
      "published": 9214,
      "alleviated": 589,
      "passage": 8315,
      "oriented": 8117,
      "triples": 11672,
      "knowledgegrounded": 5714,
      "untrained": 11862,
      "kbs": 5623,
      "store": 10762,
      "literal": 6378,
      "perfect": 8348,
      "knowledge recent": 5700,
      "improve downstream": 5123,
      "models finetuning": 7333,
      "demo video": 2643,
      "text corpora": 11386,
      "highresource languages": 4915,
      "languages experiments": 5996,
      "comparable improved": 1822,
      "performance knowledge": 8401,
      "especially lowresource": 3473,
      "maintaining performance": 6780,
      "shared task": 10333,
      "error analysis": 3460,
      "explore various": 3849,
      "code scripts": 1735,
      "base kb": 1022,
      "large lms": 6105,
      "successfully enables": 10933,
      "aims answering": 543,
      "supporting facts": 11003,
      "search engine": 10176,
      "finetune pretrained": 4108,
      "models imitate": 7353,
      "humanwritten ones": 5023,
      "using search": 11970,
      "models dynamic": 7306,
      "relevant knowledge": 9629,
      "knowledge sources": 5703,
      "natural sentences": 7749,
      "models real": 7507,
      "llama7b model": 6397,
      "supervised data": 10985,
      "finetune model": 4107,
      "llms applying": 6457,
      "stored parameters": 10764,
      "document retrieval": 3042,
      "nonenglish languages": 7892,
      "largest chinese": 6148,
      "smaller pretrained": 10518,
      "limitations researchers": 6347,
      "inspired existing": 5374,
      "models specific": 7531,
      "bases kbs": 1080,
      "various knowledge": 12071,
      "user demands": 11909,
      "vanilla llms": 12027,
      "llms framework": 6539,
      "llms limitations": 6587,
      "benchmarks proposed": 1142,
      "knowledge statements": 5704,
      "neural knowledge": 7798,
      "questions options": 9297,
      "compared baselines": 1840,
      "improve downstream nlp": 5124,
      "language models finetuning": 5836,
      "knowledge base kb": 5653,
      "using search engine": 11971,
      "significantly outperforms previous": 10446,
      "language models dynamic": 5826,
      "knowledge stored parameters": 5706,
      "model paper propose": 7194,
      "knowledge bases kbs": 5655,
      "large language models dynamic": 6032,
      "large language models knowledge": 6042,
      "attacked": 879,
      "keyphrases": 5637,
      "penalize": 8338,
      "releasing": 9624,
      "positional": 8595,
      "v1": 12008,
      "normalized": 7903,
      "plmbased": 8542,
      "frustratingly": 4295,
      "lengths": 6264,
      "concatenate": 1987,
      "keyphrase": 5636,
      "exposure": 3868,
      "familiar": 3996,
      "solution use": 10544,
      "sentence structures": 10264,
      "generating high": 4498,
      "important information": 5101,
      "accordingly propose": 221,
      "communication model": 1808,
      "build model": 1304,
      "existing metrics": 3701,
      "input context": 5347,
      "deep understanding": 2608,
      "generation multiple": 4555,
      "respectively compared": 9839,
      "llms new": 6598,
      "propose model": 9079,
      "promptbased fewshot": 9003,
      "nlp systems": 7870,
      "pairs generated": 8204,
      "gpt3 shown": 4683,
      "methods far": 6988,
      "massive knowledge": 6847,
      "learning experimental": 6207,
      "method surpasses": 6967,
      "datasets achieves": 2513,
      "gained increasing": 4362,
      "datasets tasks": 2551,
      "including classification": 5177,
      "assess ability": 834,
      "learning applying": 6191,
      "previous pretrained": 8810,
      "methods finetuned": 6989,
      "task nlp": 11137,
      "random sampling": 9313,
      "challenging large": 1500,
      "knowledge keywords": 5681,
      "networks used": 7796,
      "models current": 7292,
      "poses challenge": 8589,
      "shown strong": 10389,
      "explore new": 3841,
      "models capacity": 7273,
      "general scenarios": 4416,
      "specific datasets": 10608,
      "sampled negative": 10088,
      "frustratingly simple": 4296,
      "leveraging knowledge": 6298,
      "build new": 1306,
      "llms construct": 6486,
      "llms provides": 6623,
      "mainstream datasets": 6775,
      "strategy called": 10785,
      "generating high quality": 4499,
      "models bert gpt2": 7267,
      "correlation human judgments": 2249,
      "promptbased fewshot learning": 9004,
      "learning experimental results": 6208,
      "gained increasing attention": 4363,
      "models datasets tasks": 7295,
      "nlp tasks including": 7876,
      "tasks including classification": 11222,
      "results various natural": 9935,
      "assess ability llms": 835,
      "challenging large language": 1501,
      "substantial improvements compared": 10894,
      "existing knowledge bases": 3690,
      "models llms construct": 7397,
      "results various natural language": 9936,
      "challenging large language models": 1502,
      "language models llms construct": 5862,
      "results various natural language processing": 9937,
      "slow": 10504,
      "resourcerich": 9827,
      "nmt": 7883,
      "scheduled": 10138,
      "pretrains": 8800,
      "unchanged": 11736,
      "heuristics": 4862,
      "inheritance": 5330,
      "figure": 4059,
      "periods": 8473,
      "convey": 2216,
      "tokenlevel": 11488,
      "titan": 11483,
      "continues": 2161,
      "kl": 5645,
      "recurrent": 9539,
      "dozens": 3084,
      "slm": 10502,
      "generations": 4590,
      "houlsby": 4938,
      "xsum": 12291,
      "weather": 12187,
      "describes": 2737,
      "exceed": 3623,
      "posttraining": 8613,
      "fundamentally": 4323,
      "usual": 11977,
      "retaining": 9939,
      "observing": 7990,
      "causing": 1433,
      "smoothly": 10522,
      "incorrectly": 5222,
      "predicts": 8695,
      "fullparameter": 4299,
      "lowcost": 6737,
      "sacrificing": 10078,
      "usable": 11881,
      "130b": 16,
      "converting": 2212,
      "elusive": 3226,
      "translators": 11648,
      "inferences": 5278,
      "loads": 6686,
      "revolution": 9979,
      "multitude": 7681,
      "asymmetric": 873,
      "wider": 12224,
      "variance": 12031,
      "mmlu": 7090,
      "freedom": 4282,
      "int4": 5454,
      "selector": 10210,
      "averages": 986,
      "inference speed": 5275,
      "largescale unlabeled": 6146,
      "comparative experiments": 1831,
      "analysis shows": 627,
      "new pretrained": 7832,
      "best practice": 1168,
      "parameters available": 8292,
      "classical text": 1650,
      "tasks story": 11280,
      "dialogue generation": 2862,
      "performance terms": 8437,
      "model effective": 7136,
      "recent pretrained": 9470,
      "training corpus": 11542,
      "effectively transfer": 3162,
      "advanced knowledge": 445,
      "pretraining largescale": 8788,
      "model scratch": 7213,
      "information different": 5292,
      "gpt2 improved": 4674,
      "models proposed": 7500,
      "gpt2 paper": 4676,
      "models improving": 7357,
      "different modules": 2895,
      "modeling representation": 7247,
      "desired attributes": 2773,
      "computational overhead": 1974,
      "collect largescale": 1771,
      "experiments demonstrated": 3777,
      "thoroughly analyze": 11459,
      "potential solution": 8635,
      "classification accuracy": 1652,
      "variety tasks": 12048,
      "roberta models": 10034,
      "houlsby et": 4939,
      "al 2019": 560,
      "task dataset": 11123,
      "directions improving": 2943,
      "size training": 10493,
      "negative impact": 7773,
      "topic coverage": 11503,
      "improvement especially": 5143,
      "work leverage": 12256,
      "tasks addition": 11162,
      "method investigate": 6955,
      "finetuning strategies": 4147,
      "accuracy drop": 231,
      "tasks taskoriented": 11290,
      "inference efficiency": 5271,
      "recently garnered": 9497,
      "attention academia": 889,
      "model including": 7162,
      "including limited": 5184,
      "make challenging": 6789,
      "industrial communities": 5261,
      "comprehensive understanding": 1949,
      "models furthermore": 7337,
      "llms necessitates": 6597,
      "conduct comparative": 2020,
      "different training": 2911,
      "predict response": 8686,
      "instruction datasets": 5401,
      "stateoftheart sentence": 10730,
      "tens thousands": 11353,
      "specific challenges": 10607,
      "dialogue models": 2864,
      "data availability": 2389,
      "promising technique": 8978,
      "classification models": 1653,
      "exposure bias": 3869,
      "learning bias": 6197,
      "performance training": 8440,
      "promising solution": 8977,
      "solution achieve": 10542,
      "generation present": 4561,
      "new models": 7827,
      "fullparameter finetuning": 4300,
      "investigate impact": 5561,
      "differences observed": 2873,
      "data propose": 2445,
      "identify potential": 5047,
      "multilevel large": 7614,
      "past years": 8319,
      "specific models": 10615,
      "models remarkably": 7514,
      "efficient finetuning": 3195,
      "low rank": 6733,
      "rank adaptation": 9325,
      "models scaling": 7520,
      "130b parameters": 17,
      "using single": 11973,
      "stateoftheart deep": 10706,
      "detection language": 2805,
      "remains elusive": 9654,
      "language learners": 5777,
      "models scale": 7519,
      "finetuning instruction": 4127,
      "survey paper": 11026,
      "100 languages": 5,
      "systems paper": 11064,
      "highlight current": 4891,
      "low performance": 6731,
      "capabilities wide": 1365,
      "models aimed": 7258,
      "foundational model": 4232,
      "models era": 7316,
      "opensourced llms": 8066,
      "bloom llama": 1232,
      "learning llms": 6224,
      "tasks data": 11185,
      "build endtoend": 1302,
      "advanced llms": 449,
      "llms research": 6635,
      "performance analysis": 8363,
      "results comprehensive": 9888,
      "heavily depends": 4841,
      "wider range": 12225,
      "unsupervised methods": 11859,
      "boosts model": 1244,
      "applying approach": 723,
      "parameters demonstrated": 8293,
      "average accuracy": 981,
      "method requires": 6964,
      "scenarios code": 10124,
      "llms foundation": 6538,
      "models foundational": 7335,
      "text generation paper": 11396,
      "effectively transfer knowledge": 3163,
      "plms bert gpt": 8546,
      "generation pretrained language": 4563,
      "train model scratch": 11529,
      "codes publicly available": 1747,
      "language models improving": 5840,
      "houlsby et al": 4940,
      "et al 2019": 3491,
      "room improvement especially": 10057,
      "automatic human evaluations": 940,
      "large pretrained language": 6114,
      "understanding tasks including": 11786,
      "training inference efficiency": 11559,
      "model pretrained language": 7202,
      "quality evaluation shows": 9241,
      "paper aims provide": 8210,
      "stateoftheart performance various": 10722,
      "performance various downstream": 8447,
      "models llms necessitates": 7435,
      "finetuning large pretrained": 4131,
      "realworld datasets demonstrate": 9389,
      "performance differences observed": 8379,
      "multilevel large language": 7615,
      "language models remarkably": 5941,
      "low rank adaptation": 6734,
      "rank adaptation lora": 9326,
      "language models scaling": 5943,
      "models perform tasks": 7488,
      "highlight current limitations": 4892,
      "capabilities wide range": 1366,
      "language models era": 5830,
      "billions parameters demonstrated": 1205,
      "demonstrated impressive capabilities": 2680,
      "wide range applications": 12210,
      "generation pretrained language models": 4564,
      "pretrained language models achieved": 8750,
      "houlsby et al 2019": 4941,
      "range natural language processing": 9320,
      "language model pretrained language": 5799,
      "model pretrained language models": 7203,
      "large pretrained language models": 6115,
      "pretrained language models llms": 8752,
      "achieving stateoftheart performance various": 319,
      "language models llms necessitates": 5894,
      "low rank adaptation lora": 6735,
      "large language models despite": 6029,
      "language model pretrained language models": 5800,
      "model pretrained language models plms": 7204,
      "large language models llms necessitates": 6070,
      "multireference": 7667,
      "posts": 8612,
      "monolingual": 7587,
      "educated": 3131,
      "breadth": 1265,
      "customer": 2369,
      "resultant": 9872,
      "resolving": 9821,
      "multigranularity": 7609,
      "model examples": 7141,
      "existing stateoftheart": 3709,
      "comprehensive empirical": 1931,
      "large conversational": 6004,
      "real life": 9371,
      "summarization systems": 10961,
      "keywords topics": 5641,
      "generation developed": 4528,
      "model introduce": 7168,
      "models public": 7503,
      "chinese pretrained": 1632,
      "context pretrained": 2146,
      "brings significant": 1286,
      "capacity fewshot": 1381,
      "conversational ai": 2203,
      "different knowledge": 2885,
      "various lowresource": 12075,
      "various topics": 12096,
      "comprehensive human": 1942,
      "source language": 10576,
      "scenarios number": 10132,
      "tasks present": 11257,
      "use external": 11886,
      "opensource model": 8063,
      "discussed impact": 2978,
      "humanlike characteristics": 5006,
      "data alleviate": 2383,
      "context code": 2140,
      "models responses": 7516,
      "bert model": 1155,
      "content detection": 2134,
      "design training objectives": 2758,
      "largest chinese pretrained": 6149,
      "language models shown": 5944,
      "alignment different languages": 575,
      "achieve competitive performance": 245,
      "models llms explore": 7418,
      "largescale pretrained language models": 6143,
      "language models llms explore": 5881,
      "large language models llms explore": 6062,
      "polish": 8564,
      "robertabased": 10035,
      "served": 10298,
      "scientists": 10154,
      "hc3": 4831,
      "substitute": 10901,
      "fake news": 3987,
      "great importance": 4749,
      "benchmark future": 1120,
      "certain language": 1445,
      "information social": 5315,
      "piece text": 8495,
      "tasks known": 11229,
      "known llms": 5722,
      "llms served": 6643,
      "served highquality": 10299,
      "chinese benchmark": 1621,
      "results compared": 9886,
      "findings offer": 4091,
      "work step": 12267,
      "human chatgpt": 4955,
      "comparison corpus": 1864,
      "chatgpt gained": 1564,
      "robertabased detector": 10036,
      "llms substitute": 6665,
      "variety tasks including": 12049,
      "information social media": 5316,
      "tasks known llms": 11230,
      "known llms served": 5723,
      "llms served highquality": 6644,
      "findings offer new": 4092,
      "human chatgpt comparison": 4956,
      "chatgpt comparison corpus": 1544,
      "chatgpt gained significant": 1565,
      "tasks known llms served": 11231,
      "known llms served highquality": 5724,
      "human chatgpt comparison corpus": 4957,
      "tasks known llms served highquality": 11232,
      "generalpurposed": 4438,
      "meanings": 6880,
      "drew": 3096,
      "cultural": 2337,
      "journals": 5612,
      "models traditional": 7544,
      "traditional machine": 11519,
      "characteristics language": 1517,
      "english prompts": 3383,
      "using human": 11947,
      "specifically evaluate": 10629,
      "elicit llms": 3216,
      "stateoftheart finetuned": 10707,
      "gpt model": 4668,
      "strategies pretrained": 10778,
      "tasks evaluated": 11197,
      "using existing": 11945,
      "scientific research": 10153,
      "provide valuable insights": 9169,
      "comprehensive empirical study": 1932,
      "finetuning strategies pretrained": 4148,
      "strategies pretrained language": 10779,
      "finetuning strategies pretrained language": 4149,
      "strategies pretrained language models": 10780,
      "finetuning strategies pretrained language models": 4150,
      "strategies pretrained language models plms": 10781,
      "intralingual": 5529,
      "quantity": 9256,
      "ceval": 1446,
      "middle": 7034,
      "chineseoriented": 1635,
      "gaokao": 4375,
      "lessons": 6266,
      "knowledge employ": 5664,
      "chat models": 1527,
      "ceval hard": 1447,
      "mt systems": 7603,
      "chinese gaokao": 1626,
      "evaluation data": 3548,
      "task largescale": 11133,
      "llms particular": 6605,
      "chinese pretrained language": 1633,
      "evaluation data specifically": 3549,
      "chinese pretrained language model": 1634
    }
  }
}