import os from tqdm import tqdm import numpy as np import sys if __name__ == "__main__": captions = [] with open(sys.argv[1]) as f: for line in tqdm(f): line = line.rstrip().split("\t") caption = line[2] captions.append(caption) lengths = [len(c.split(" ")) for c in captions] print(np.mean(lengths))