|
41 | 41 | You need to install python-twitter: |
42 | 42 | pip install python-twitter |
43 | 43 | If pip is not found you might have to install it using easy_install. |
44 | | -If it does not work on your system, you might want to follow instructions |
| 44 | +If it does not work on your system, you might want to follow instructions |
45 | 45 | at http://code.google.com/p/python-twitter/ """) |
46 | 46 |
|
47 | 47 | sys.exit(1) |
@@ -97,22 +97,25 @@ def purge_already_fetched(fetch_list, raw_dir): |
97 | 97 |
|
98 | 98 | # list of tweet ids that still need downloading |
99 | 99 | rem_list= [] |
| 100 | +count_done=0 |
100 | 101 |
|
101 | 102 | # check each tweet to see if we have it |
102 | 103 | foriteminfetch_list: |
103 | 104 |
|
104 | 105 | # check if json file exists |
105 | | -tweet_file=raw_dir+item[2] +'.json' |
| 106 | +tweet_file=os.path.join(raw_dir, item[2] +'.json') |
106 | 107 | ifos.path.exists(tweet_file): |
107 | 108 |
|
108 | 109 | # attempt to parse json file |
109 | 110 | try: |
110 | 111 | parse_tweet_json(tweet_file) |
111 | 112 | print'--> already downloaded #'+item[2] |
| 113 | +count_done+=1 |
112 | 114 | exceptRuntimeError: |
113 | 115 | rem_list.append(item) |
114 | 116 | else: |
115 | 117 | rem_list.append(item) |
| 118 | +print"done=",count_done |
116 | 119 |
|
117 | 120 | returnrem_list |
118 | 121 |
|
@@ -158,14 +161,19 @@ def download_tweets(fetch_list, raw_dir): |
158 | 161 | # New Twitter API 1.1 |
159 | 162 | try: |
160 | 163 | json_data=api.GetStatus(item[2]).AsJsonString() |
| 164 | + |
161 | 165 | excepttwitter.TwitterError, e: |
162 | | -fatal=False |
| 166 | +fatal=True |
163 | 167 | formine.message: |
164 | 168 | ifm['code'] ==34: |
165 | 169 | print"Tweet missing: ",item |
166 | 170 | # [{u'message': u'Sorry, that page does not exist', u'code': 34}] |
167 | 171 | fatal=False |
168 | 172 | break |
| 173 | +elifm['code'] ==88: |
| 174 | +print"Rate limit exceeded. Please lower max_tweets_per_hr." |
| 175 | +fatal=True |
| 176 | +break |
169 | 177 |
|
170 | 178 | iffatal: |
171 | 179 | raise |
@@ -256,11 +264,13 @@ def main(data_path): |
256 | 264 |
|
257 | 265 | # get user parameters |
258 | 266 | user_params=get_user_params(data_path) |
| 267 | +printuser_params |
259 | 268 | dump_user_params(user_params) |
260 | 269 |
|
261 | 270 | # get fetch list |
262 | 271 | total_list=read_total_list(user_params['inList']) |
263 | 272 | fetch_list=purge_already_fetched(total_list, user_params['rawDir']) |
| 273 | +print"Fetching %i tweets"%len(fetch_list) |
264 | 274 |
|
265 | 275 | # start fetching data from twitter |
266 | 276 | download_tweets(fetch_list, user_params['rawDir']) |
|
0 commit comments