from pymongo import MongoClient
import pandas as pd
client = MongoClient()
db = client.twitterCleanDb
tweetCollection = db.tenKtweetsCleaned
hashtags = []
retweets = []
followers = []
name = []
statusesCount = []
text = []
polarity = []
lat = []
long = []
uid = []
hashTagCollection = tweetCollection.find({'entities.hashtags.text': {'$ne': None}})
for tweet in hashTagCollection:
    if len(tweet['entities']['hashtags']) > 0 :
        hashtags.append(tweet['entities']['hashtags'][0]['text'])
        retweets.append(tweet['retweet_count'])
        followers.append(tweet['user']['followers_count'])
#         lat.append(tweet['lat'])
#         long.append(tweet['long'])
        statusesCount.append(tweet['user']['statuses_count'])
        text.append(tweet['text'])
        polarity.append(tweet['polarity'])
        name.append(tweet['user']['name'])
        uid.append(tweet['id_str'])
print (len(uid), len(name), len(polarity), len(text), len(statusesCount), len(followers), len(retweets), len(hashtags))
hashtags = []
retweets = []
followers = []
name = []
statusesCount = []
text = []
polarity = []
lat = []
long = []
uid = []
for tweet in hashTagCollection:
    hashtags.append(tweeet['entities']['tweet'][0]['text'])
    retweets.append(tweet['retweet_count'])
    followers.append(tweet['user']['followers_count'])
    lat.append(tweet['lat'])
    long.append(tweet['long'])
    statusesCount.append(tweet['statuses_count'])
    text.append(tweet['text'])
    polarity.append(tweet['polarity'])
    name.append(tweet['user']['name'])
    uid.append(tweet['id_str'])
len(uid)
import pandas as pd
df = pd.DataFrame({'id': uid, 'name': name, 'retweets': retweets, 'hashtag': hashtags, 'followers': followers, 'statusCount': statusesCount, 'text': text, 'polarity': polarity})
df.to_j()