#!/usr/bin/env python import nltk from nltk import word_tokenize from nltk.util import ngrams es = open('enronsent_all.txt','r') text = es.read() token = nltk.word_tokenize(text) unigrams = ngrams(token,1) bigrams = ngrams(token,2) trigrams = ngrams(token,3) fourgrams = ngrams(token,4) fivegrams = ngrams(token,5)