正在显示
3 个修改的文件
包含
169 行增加
和
44 行删除
| @@ -6,7 +6,8 @@ | @@ -6,7 +6,8 @@ | ||
| 6 | # @Software: PyCharm | 6 | # @Software: PyCharm |
| 7 | 7 | ||
| 8 | import datetime | 8 | import datetime |
| 9 | - | 9 | +import demjson |
| 10 | +import base64 | ||
| 10 | from enum import Enum | 11 | from enum import Enum |
| 11 | from attr import attrs, attrib | 12 | from attr import attrs, attrib |
| 12 | 13 | ||
| @@ -106,7 +107,7 @@ class Exchange(Enum): | @@ -106,7 +107,7 @@ class Exchange(Enum): | ||
| 106 | return None | 107 | return None |
| 107 | 108 | ||
| 108 | 109 | ||
| 109 | -def todict(obj,include:list=None): | 110 | +def todict(obj, include: list = None): |
| 110 | keys = dir(obj) | 111 | keys = dir(obj) |
| 111 | res = {} | 112 | res = {} |
| 112 | if include: | 113 | if include: |
| @@ -123,3 +124,19 @@ def todict(obj,include:list=None): | @@ -123,3 +124,19 @@ def todict(obj,include:list=None): | ||
| 123 | value = getattr(obj, key) | 124 | value = getattr(obj, key) |
| 124 | res[key] = value | 125 | res[key] = value |
| 125 | return res | 126 | return res |
| 127 | + | ||
| 128 | + | ||
| 129 | +def tobase64(obj): | ||
| 130 | + if isinstance(obj, (dict, list)): | ||
| 131 | + obj = demjson.encode(obj) | ||
| 132 | + if isinstance(obj, str): | ||
| 133 | + obj = obj.encode('utf-8') | ||
| 134 | + if isinstance(obj, bytes): | ||
| 135 | + bin = base64.b64encode(obj) | ||
| 136 | + return str(bin, 'utf-8') | ||
| 137 | + raise BaseException("must str,list,dict,bytes") | ||
| 138 | + | ||
| 139 | + | ||
| 140 | +def frombase64(base): | ||
| 141 | + string = base64.b64decode(base).decode() | ||
| 142 | + return demjson.decode(string) |
| @@ -17,7 +17,7 @@ import furl | @@ -17,7 +17,7 @@ import furl | ||
| 17 | from fbchat import Client, ThreadType, Message, Sticker, FBchatUserError, _exception, log, _util | 17 | from fbchat import Client, ThreadType, Message, Sticker, FBchatUserError, _exception, log, _util |
| 18 | from fbchat._state import State, session_factory, is_home | 18 | from fbchat._state import State, session_factory, is_home |
| 19 | 19 | ||
| 20 | -from lib import google_map | 20 | +from lib import google_map, common |
| 21 | from lib.common import WorkPlace, College | 21 | from lib.common import WorkPlace, College |
| 22 | from utils import parse_html, _attachment | 22 | from utils import parse_html, _attachment |
| 23 | 23 | ||
| @@ -782,21 +782,39 @@ class FacebookClient(Client): | @@ -782,21 +782,39 @@ class FacebookClient(Client): | ||
| 782 | 782 | ||
| 783 | def friendsList(self, ext_data=None): | 783 | def friendsList(self, ext_data=None): |
| 784 | if ext_data: | 784 | if ext_data: |
| 785 | - data = {'av': self.uid, | ||
| 786 | - 'fb_api_caller_class': 'RelayModern', | ||
| 787 | - 'fb_api_req_friendly_name': 'ProfileCometAppCollectionListRendererPaginationQuery', | ||
| 788 | - 'variables': json.dumps({"count": 20, | ||
| 789 | - "cursor": ext_data['cursor'], | ||
| 790 | - "search": None, "scale": 2, | ||
| 791 | - "privacySelectorRenderLocation": "COMET_PROFILE_COLLECTIONS", | ||
| 792 | - "id": ext_data['id']}), | ||
| 793 | - 'doc_id': '2773917206008873'} | ||
| 794 | - res = self._post('/api/graphql/', data) | ||
| 795 | - res = res['data']['node'] | 785 | + ext_data = common.frombase64(ext_data) |
| 786 | + | ||
| 787 | + if 'collection_token' in ext_data: | ||
| 788 | + data = { | ||
| 789 | + 'fb_dtsg_ag': self._state.fb_dtsg_ag, | ||
| 790 | + 'data': demjson.encode(ext_data), | ||
| 791 | + } | ||
| 792 | + res = self._get('/ajax/pagelet/generic.php/AllFriendsAppCollectionPagelet', data) | ||
| 793 | + | ||
| 794 | + cursor = parse_html.get_pagelet_info(res) | ||
| 795 | + ext_data.update({'cursor': cursor}) | ||
| 796 | + | ||
| 797 | + res = parse_html.get_friend_div(res.get('payload')) | ||
| 798 | + res['items']['ext_data'] = ext_data | ||
| 799 | + res['items']['count'] = ext_data['count'] | ||
| 800 | + res['items']['has_next_page'] = not cursor is None | ||
| 801 | + else: | ||
| 802 | + data = {'av': self.uid, | ||
| 803 | + 'fb_api_caller_class': 'RelayModern', | ||
| 804 | + 'fb_api_req_friendly_name': 'ProfileCometAppCollectionListRendererPaginationQuery', | ||
| 805 | + 'variables': json.dumps({"count": 20, | ||
| 806 | + "cursor": ext_data['cursor'], | ||
| 807 | + "search": None, "scale": 2, | ||
| 808 | + "privacySelectorRenderLocation": "COMET_PROFILE_COLLECTIONS", | ||
| 809 | + "id": ext_data['id']}), | ||
| 810 | + 'doc_id': '2773917206008873'} | ||
| 811 | + res = self._post('/api/graphql/', data) | ||
| 812 | + res = res['data']['node'] | ||
| 813 | + | ||
| 796 | else: | 814 | else: |
| 797 | - res = self._state._session.get(self._state.page_url + '/friends') | 815 | + res = self._state._session.get(self._state.page_url.replace('profile.php?id=', '') + '/friends') |
| 798 | if res.status_code == 200 and '/friends' in res.url: | 816 | if res.status_code == 200 and '/friends' in res.url: |
| 799 | - res = parse_html.get_frient_div(res.text) | 817 | + res = parse_html.get_friend_div(res.text) |
| 800 | if not res: | 818 | if not res: |
| 801 | respone = { | 819 | respone = { |
| 802 | 'items': [], | 820 | 'items': [], |
| @@ -820,15 +838,31 @@ class FacebookClient(Client): | @@ -820,15 +838,31 @@ class FacebookClient(Client): | ||
| 820 | } | 838 | } |
| 821 | friends.append(data) | 839 | friends.append(data) |
| 822 | 840 | ||
| 823 | - respone = { | ||
| 824 | - 'items': friends, | ||
| 825 | - 'count': count, | ||
| 826 | - 'ext_data': { | ||
| 827 | - 'id': res['id'], | ||
| 828 | - 'cursor': res['items']['page_info']['end_cursor'] | ||
| 829 | - }, | ||
| 830 | - 'has_next_page': res['items']['page_info']['has_next_page'] | ||
| 831 | - } | 841 | + if 'page_info' in res['items']: |
| 842 | + respone = { | ||
| 843 | + 'items': friends, | ||
| 844 | + 'count': count, | ||
| 845 | + 'ext_data': { | ||
| 846 | + 'id': res['id'], | ||
| 847 | + 'cursor': res['items']['page_info']['end_cursor'] | ||
| 848 | + }, | ||
| 849 | + 'has_next_page': res['items']['page_info']['has_next_page'] | ||
| 850 | + } | ||
| 851 | + elif 'ext_data' in res['items']: | ||
| 852 | + respone = { | ||
| 853 | + 'items': friends, | ||
| 854 | + 'count': count, | ||
| 855 | + 'ext_data': res['items']['ext_data'], | ||
| 856 | + 'has_next_page': res['items']['has_next_page'], | ||
| 857 | + } | ||
| 858 | + else: | ||
| 859 | + respone = { | ||
| 860 | + 'items': [], | ||
| 861 | + 'count': 0, | ||
| 862 | + 'ext_data': {}, | ||
| 863 | + 'has_next_page': False | ||
| 864 | + } | ||
| 865 | + respone['ext_data'] = common.tobase64(respone['ext_data']) | ||
| 832 | return respone | 866 | return respone |
| 833 | 867 | ||
| 834 | def changePwd(self, old, new): | 868 | def changePwd(self, old, new): |
| @@ -130,22 +130,28 @@ def get_current_city(res): | @@ -130,22 +130,28 @@ def get_current_city(res): | ||
| 130 | def get_user_info(b): | 130 | def get_user_info(b): |
| 131 | pattern = re.compile(r"viewer_actor:(.*?)comment_count", re.MULTILINE | re.DOTALL) | 131 | pattern = re.compile(r"viewer_actor:(.*?)comment_count", re.MULTILINE | re.DOTALL) |
| 132 | script = b.find("script", text=pattern) | 132 | script = b.find("script", text=pattern) |
| 133 | - if script: | 133 | + if not script: |
| 134 | + a_lable = b.find('a', attrs={'data-gt': '{"chrome_nav_item":"timeline_chrome"}'}) | ||
| 135 | + name = a_lable.text | ||
| 136 | + url = a_lable.attrs.get('href') | ||
| 137 | + img_data = a_lable.find('img').attrs | ||
| 138 | + image = img_data.get('src') | ||
| 139 | + id = re.search("_header_(\d+)", a_lable.find('img').attrs.get('id')).group(1) | ||
| 140 | + else: | ||
| 134 | info = pattern.search(script.text).group() | 141 | info = pattern.search(script.text).group() |
| 135 | id = re.findall(r'id:"(.*?)"', info)[0] | 142 | id = re.findall(r'id:"(.*?)"', info)[0] |
| 136 | name = re.findall(r',name:"(.*?)"', info)[0] | 143 | name = re.findall(r',name:"(.*?)"', info)[0] |
| 137 | url = re.findall(r'url:"(.*?)"', info)[0] | 144 | url = re.findall(r'url:"(.*?)"', info)[0] |
| 138 | image = re.findall(r'profile_picture_depth_0.*?uri:"(.*?)"', info)[0] | 145 | image = re.findall(r'profile_picture_depth_0.*?uri:"(.*?)"', info)[0] |
| 139 | - try: | ||
| 140 | - r = re.compile('/p\d+x\d+/(.*?)\?') | ||
| 141 | - iname = r.findall(image)[0] | ||
| 142 | - pattern = re.compile('src="(https.*?/p\d{3,}x\d{3,}/%s.*?)"' % (iname), re.MULTILINE | re.DOTALL) | ||
| 143 | - elem = b.find('div', class_='hidden_elem', string=pattern) | ||
| 144 | - image = re.sub('&', lambda x: "&", pattern.findall(elem.string)[0]) | ||
| 145 | - except: | ||
| 146 | - pass | ||
| 147 | - return id, name, url, image | ||
| 148 | - return None, None, None, None | 146 | + try: |
| 147 | + r = re.compile('/p\d+x\d+/(.*?)\?') | ||
| 148 | + iname = r.findall(image)[0] | ||
| 149 | + pattern = re.compile('src="(https.*?/p\d{3,}x\d{3,}/%s.*?)"' % (iname), re.MULTILINE | re.DOTALL) | ||
| 150 | + elem = b.find('div', class_='hidden_elem', string=pattern) | ||
| 151 | + image = re.sub('&', lambda x: "&", pattern.findall(elem.string)[0]) | ||
| 152 | + except: | ||
| 153 | + pass | ||
| 154 | + return id, name, url, image | ||
| 149 | 155 | ||
| 150 | 156 | ||
| 151 | def get_all_raw_id(text): | 157 | def get_all_raw_id(text): |
| @@ -182,11 +188,79 @@ def get_div_text(html): | @@ -182,11 +188,79 @@ def get_div_text(html): | ||
| 182 | return b.text | 188 | return b.text |
| 183 | 189 | ||
| 184 | 190 | ||
| 185 | -def get_frient_div(text): | ||
| 186 | - if not 'ProfileCometAppSectionFriendsList' in text: | ||
| 187 | - return None | ||
| 188 | - b = bs4.BeautifulSoup(text, 'html.parser') | ||
| 189 | - script = b.find('script', string=re.compile(r'ProfileCometAppSectionFriendsList')) | ||
| 190 | - a = re.findall(r'TimelineAppCollectionListRenderer",collection:(\{.*?\}\}\})', script.string)[0] | ||
| 191 | - res = demjson.decode(a) | ||
| 192 | - return res | 191 | +def get_friend_div(text): |
| 192 | + if 'ProfileCometAppSectionFriendsList' in text: | ||
| 193 | + b = bs4.BeautifulSoup(text, 'html.parser') | ||
| 194 | + script = b.find('script', string=re.compile(r'ProfileCometAppSectionFriendsList')) | ||
| 195 | + a = re.findall(r'TimelineAppCollectionListRenderer",collection:(\{.*?\}\}\})', script.string)[0] | ||
| 196 | + res = demjson.decode(a) | ||
| 197 | + return res | ||
| 198 | + elif 'friend_list_item' in text: | ||
| 199 | + data = {'items': {'edges': [], 'count': 0}} | ||
| 200 | + b = bs4.BeautifulSoup(text, 'html.parser') | ||
| 201 | + elem = b.find('div', class_='hidden_elem', string=re.compile(r'data-testid="friend_list_item"')) | ||
| 202 | + if elem: | ||
| 203 | + elem = bs4.BeautifulSoup(elem.string, 'html.parser') | ||
| 204 | + divs = elem.find_all('div', {"data-testid": "friend_list_item"}) | ||
| 205 | + first_page = True | ||
| 206 | + else: | ||
| 207 | + first_page = False | ||
| 208 | + divs = b.find_all('li') | ||
| 209 | + | ||
| 210 | + for dd in divs: | ||
| 211 | + node = dict() | ||
| 212 | + item = dict() | ||
| 213 | + try: | ||
| 214 | + a_data = dd.find('a').attrs | ||
| 215 | + img_data = dd.find('img').attrs | ||
| 216 | + | ||
| 217 | + url = a_data.get('href') | ||
| 218 | + fbid = re.findall(r'user.php\?id=(\d+)&', a_data.get('data-hovercard', ""))[0] | ||
| 219 | + image = img_data.get('src') | ||
| 220 | + name = img_data.get('aria-label') | ||
| 221 | + item['image'] = {'uri': image} | ||
| 222 | + item['title'] = {'text': name} | ||
| 223 | + item['node'] = {'id': fbid, 'url': url} | ||
| 224 | + node['node'] = item | ||
| 225 | + data['items']['edges'].append(node) | ||
| 226 | + except: | ||
| 227 | + pass | ||
| 228 | + if first_page: | ||
| 229 | + script = b.find('script', string=re.compile('"MedleyPageletRequestData"')) | ||
| 230 | + page = re.compile(r'"MedleyPageletRequestData","set",\[\],\[(.*?)\]') | ||
| 231 | + ext_data = demjson.decode(page.search(script.string).group(1)) | ||
| 232 | + | ||
| 233 | + load = re.compile('\["TimelineAppCollection","enableContentLoader".*?\]\]') | ||
| 234 | + script_126 = b.find('script', string=load) | ||
| 235 | + if script_126: | ||
| 236 | + tttt = load.search(script_126.string).group() | ||
| 237 | + ext_data.update({ | ||
| 238 | + 'cursor': re.search(r'"([A-Za-z0-9-_]{50,200})"', tttt).group(1), | ||
| 239 | + 'collection_token': re.search(r'"pagelet_timeline_app_collection_(.*?)"', tttt).group(1), | ||
| 240 | + }) | ||
| 241 | + data['items']['has_next_page'] = True | ||
| 242 | + else: | ||
| 243 | + data['items']['has_next_page'] = False | ||
| 244 | + | ||
| 245 | + data['items']['count'] = int( | ||
| 246 | + re.search(r'AllFriendsAppCollectionPagelet".*?,tab_count:(\d+),', text).group(1)) | ||
| 247 | + | ||
| 248 | + ext_data['count'] = data['items']['count'] | ||
| 249 | + data['items']['ext_data'] = ext_data | ||
| 250 | + else: | ||
| 251 | + data['items']['ext_data'] = "update" | ||
| 252 | + | ||
| 253 | + return data | ||
| 254 | + | ||
| 255 | + | ||
| 256 | +def get_pagelet_info(res): | ||
| 257 | + y = {} | ||
| 258 | + for x in res['jsmods']['require']: | ||
| 259 | + if isinstance(x, list) and x[0] == 'TimelineAppCollection': | ||
| 260 | + y = x | ||
| 261 | + break | ||
| 262 | + try: | ||
| 263 | + cursor = y[3][2] | ||
| 264 | + except: | ||
| 265 | + cursor = None | ||
| 266 | + return cursor |
-
请 注册 或 登录 后发表评论