|
...
|
...
|
@@ -130,22 +130,28 @@ def get_current_city(res): |
|
|
|
def get_user_info(b):
|
|
|
|
pattern = re.compile(r"viewer_actor:(.*?)comment_count", re.MULTILINE | re.DOTALL)
|
|
|
|
script = b.find("script", text=pattern)
|
|
|
|
if script:
|
|
|
|
if not script:
|
|
|
|
a_lable = b.find('a', attrs={'data-gt': '{"chrome_nav_item":"timeline_chrome"}'})
|
|
|
|
name = a_lable.text
|
|
|
|
url = a_lable.attrs.get('href')
|
|
|
|
img_data = a_lable.find('img').attrs
|
|
|
|
image = img_data.get('src')
|
|
|
|
id = re.search("_header_(\d+)", a_lable.find('img').attrs.get('id')).group(1)
|
|
|
|
else:
|
|
|
|
info = pattern.search(script.text).group()
|
|
|
|
id = re.findall(r'id:"(.*?)"', info)[0]
|
|
|
|
name = re.findall(r',name:"(.*?)"', info)[0]
|
|
|
|
url = re.findall(r'url:"(.*?)"', info)[0]
|
|
|
|
image = re.findall(r'profile_picture_depth_0.*?uri:"(.*?)"', info)[0]
|
|
|
|
try:
|
|
|
|
r = re.compile('/p\d+x\d+/(.*?)\?')
|
|
|
|
iname = r.findall(image)[0]
|
|
|
|
pattern = re.compile('src="(https.*?/p\d{3,}x\d{3,}/%s.*?)"' % (iname), re.MULTILINE | re.DOTALL)
|
|
|
|
elem = b.find('div', class_='hidden_elem', string=pattern)
|
|
|
|
image = re.sub('&', lambda x: "&", pattern.findall(elem.string)[0])
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
return id, name, url, image
|
|
|
|
return None, None, None, None
|
|
|
|
try:
|
|
|
|
r = re.compile('/p\d+x\d+/(.*?)\?')
|
|
|
|
iname = r.findall(image)[0]
|
|
|
|
pattern = re.compile('src="(https.*?/p\d{3,}x\d{3,}/%s.*?)"' % (iname), re.MULTILINE | re.DOTALL)
|
|
|
|
elem = b.find('div', class_='hidden_elem', string=pattern)
|
|
|
|
image = re.sub('&', lambda x: "&", pattern.findall(elem.string)[0])
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
return id, name, url, image
|
|
|
|
|
|
|
|
|
|
|
|
def get_all_raw_id(text):
|
|
...
|
...
|
@@ -182,11 +188,79 @@ def get_div_text(html): |
|
|
|
return b.text
|
|
|
|
|
|
|
|
|
|
|
|
def get_frient_div(text):
|
|
|
|
if not 'ProfileCometAppSectionFriendsList' in text:
|
|
|
|
return None
|
|
|
|
b = bs4.BeautifulSoup(text, 'html.parser')
|
|
|
|
script = b.find('script', string=re.compile(r'ProfileCometAppSectionFriendsList'))
|
|
|
|
a = re.findall(r'TimelineAppCollectionListRenderer",collection:(\{.*?\}\}\})', script.string)[0]
|
|
|
|
res = demjson.decode(a)
|
|
|
|
return res |
|
|
|
def get_friend_div(text):
|
|
|
|
if 'ProfileCometAppSectionFriendsList' in text:
|
|
|
|
b = bs4.BeautifulSoup(text, 'html.parser')
|
|
|
|
script = b.find('script', string=re.compile(r'ProfileCometAppSectionFriendsList'))
|
|
|
|
a = re.findall(r'TimelineAppCollectionListRenderer",collection:(\{.*?\}\}\})', script.string)[0]
|
|
|
|
res = demjson.decode(a)
|
|
|
|
return res
|
|
|
|
elif 'friend_list_item' in text:
|
|
|
|
data = {'items': {'edges': [], 'count': 0}}
|
|
|
|
b = bs4.BeautifulSoup(text, 'html.parser')
|
|
|
|
elem = b.find('div', class_='hidden_elem', string=re.compile(r'data-testid="friend_list_item"'))
|
|
|
|
if elem:
|
|
|
|
elem = bs4.BeautifulSoup(elem.string, 'html.parser')
|
|
|
|
divs = elem.find_all('div', {"data-testid": "friend_list_item"})
|
|
|
|
first_page = True
|
|
|
|
else:
|
|
|
|
first_page = False
|
|
|
|
divs = b.find_all('li')
|
|
|
|
|
|
|
|
for dd in divs:
|
|
|
|
node = dict()
|
|
|
|
item = dict()
|
|
|
|
try:
|
|
|
|
a_data = dd.find('a').attrs
|
|
|
|
img_data = dd.find('img').attrs
|
|
|
|
|
|
|
|
url = a_data.get('href')
|
|
|
|
fbid = re.findall(r'user.php\?id=(\d+)&', a_data.get('data-hovercard', ""))[0]
|
|
|
|
image = img_data.get('src')
|
|
|
|
name = img_data.get('aria-label')
|
|
|
|
item['image'] = {'uri': image}
|
|
|
|
item['title'] = {'text': name}
|
|
|
|
item['node'] = {'id': fbid, 'url': url}
|
|
|
|
node['node'] = item
|
|
|
|
data['items']['edges'].append(node)
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
if first_page:
|
|
|
|
script = b.find('script', string=re.compile('"MedleyPageletRequestData"'))
|
|
|
|
page = re.compile(r'"MedleyPageletRequestData","set",\[\],\[(.*?)\]')
|
|
|
|
ext_data = demjson.decode(page.search(script.string).group(1))
|
|
|
|
|
|
|
|
load = re.compile('\["TimelineAppCollection","enableContentLoader".*?\]\]')
|
|
|
|
script_126 = b.find('script', string=load)
|
|
|
|
if script_126:
|
|
|
|
tttt = load.search(script_126.string).group()
|
|
|
|
ext_data.update({
|
|
|
|
'cursor': re.search(r'"([A-Za-z0-9-_]{50,200})"', tttt).group(1),
|
|
|
|
'collection_token': re.search(r'"pagelet_timeline_app_collection_(.*?)"', tttt).group(1),
|
|
|
|
})
|
|
|
|
data['items']['has_next_page'] = True
|
|
|
|
else:
|
|
|
|
data['items']['has_next_page'] = False
|
|
|
|
|
|
|
|
data['items']['count'] = int(
|
|
|
|
re.search(r'AllFriendsAppCollectionPagelet".*?,tab_count:(\d+),', text).group(1))
|
|
|
|
|
|
|
|
ext_data['count'] = data['items']['count']
|
|
|
|
data['items']['ext_data'] = ext_data
|
|
|
|
else:
|
|
|
|
data['items']['ext_data'] = "update"
|
|
|
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
def get_pagelet_info(res):
|
|
|
|
y = {}
|
|
|
|
for x in res['jsmods']['require']:
|
|
|
|
if isinstance(x, list) and x[0] == 'TimelineAppCollection':
|
|
|
|
y = x
|
|
|
|
break
|
|
|
|
try:
|
|
|
|
cursor = y[3][2]
|
|
|
|
except:
|
|
|
|
cursor = None
|
|
|
|
return cursor |
...
|
...
|
|