In [2]:
from pymongo import MongoClient
import pandas as pd

client = MongoClient()
db = client.twitterCleanDb
tweetCollection = db.tenKtweetsCleaned

hashtags = []
retweets = []
followers = []
name = []
statusesCount = []
text = []
polarity = []
lat = []
long = []
uid = []

hashTagCollection = tweetCollection.find({'entities.hashtags.text': {'$ne': None}})

for tweet in hashTagCollection:
    if len(tweet['entities']['hashtags']) > 0 :
        hashtags.append(tweet['entities']['hashtags'][0]['text'])
        retweets.append(tweet['retweet_count'])
        followers.append(tweet['user']['followers_count'])
#         lat.append(tweet['lat'])
#         long.append(tweet['long'])
        statusesCount.append(tweet['user']['statuses_count'])
        text.append(tweet['text'])
        polarity.append(tweet['polarity'])
        name.append(tweet['user']['name'])
        uid.append(tweet['id_str'])

print (len(uid), len(name), len(polarity), len(text), len(statusesCount), len(followers), len(retweets), len(hashtags))
857 857 857 857 857 857 857 857
In [14]:
hashtags = []
retweets = []
followers = []
name = []
statusesCount = []
text = []
polarity = []
lat = []
long = []
uid = []

for tweet in hashTagCollection:
    hashtags.append(tweeet['entities']['tweet'][0]['text'])
    retweets.append(tweet['retweet_count'])
    followers.append(tweet['user']['followers_count'])
    lat.append(tweet['lat'])
    long.append(tweet['long'])
    statusesCount.append(tweet['statuses_count'])
    text.append(tweet['text'])
    polarity.append(tweet['polarity'])
    name.append(tweet['user']['name'])
    uid.append(tweet['id_str'])

len(uid)
Out[14]:
0
In [24]:
import pandas as pd

df = pd.DataFrame({'id': uid, 'name': name, 'retweets': retweets, 'hashtag': hashtags, 'followers': followers, 'statusCount': statusesCount, 'text': text, 'polarity': polarity})
df.to_j()
Out[24]:
followers hashtag id name polarity retweets statusCount text
0 321 OneDirectionReunion 985576840533331968 Emily loves zayn 0.285714 31 636 RT @softforavery: me right now #OneDirectionRe...
1 1687 CLFC 985576840445251584 Oyeniyi Bamz 0.000000 550 4518 RT @ChelseaLFC: What a result, we are in to th...
2 3833 PrisonComplaints 985576840441057280 PrisonStorm 0.200000 1 12150 Please get ready to move from Topic 1 #PrisonC...
3 89 SVWRBL 985576840256479234 Fredrik Kennefors 0.416667 6 32573 RT @Bundesliga_EN: A fine evening of football ...
4 283 NickiDay 985577236521738241 Jeremy Harrell 0.250000 3135 1200 RT @YoungboyUpdates: Youngboy on when he first...