通过空间历史浏览,爬出查看你空间的人(一般
限制20人,除非开通黄钻),然后在爬出这20人的浏览记录,依次向下爬,你可以控制爬行深度。
这里仅仅给出怕中代码片段,你可以进一步优化,将Q群分类存储。通过Q群相互浏览关系,可以通过绘图工具绘制好友网络。等等
欢迎跟过讨论,请加Q群注明“读者”
代码涉及p
threads?如果不清楚请阅读:《PHP?高级编程之多
线程》
http://netkiller.github.io/journal/thread.php.html
?
标签: pthreads?PHP
?
代码片段(1)[全屏查看所有代码]
1.?[代码][PHP]代码?????class="jump_to_code" style="padding: 0px; margin: 0px;">
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
monospace !important;"><?php
if
(!
functions" style="padding: 0px !important; margin: 0px !important; border-top-left-radius: 0px !important; border-top-right-radius: 0px !important; border-bottom-right-radius: 0px !important; border-bottom-left-radius: 0px !important; background-image: none !important; border: 0px !important; float: none !important; height: auto !important; line-height: 1.1em !important; overflow: visible !important; vertical-align: baseline !important; width: auto !important; font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; color: #ff1493 !important;">extension_loaded
(
'pthreads'
))
die
(
'Please install pthreads'
);
?
include_once
(
'Snoopy.class.php'
);
?
class
CrawlerWorker
extends
Worker {
?
????
protected
?
static
$dbh
;
????
public
function
__construct() {
?
????
}
????
public
function
run(){
????
????????
????????
????????
????????
?
????????
????????????
????????????
????????????
????????????
????????
????
????
}
????
protected
function
getInstance(){
????????
return
self::
$dbh
;
????
}
?
}
?
class
Crawler
extends
Stackable {
????
public
$depth
= 3;
????
private
static
$level
= 0;
????
public
function
__construct(
$qq
) {
????????
$this
->qq =
$qq
;
????
}
????
public
function
run() {
?
????????
try
{
????????????
$dbh
?
=
$this
->worker->getInstance();
????????????
$this
->recursion(
array
(
$this
->qq));
????????
}
????????
catch
(PDOException
$e
) {
????????????
$error
= sprintf(
"%s,%s\n"
,
$mobile
,
$id
);
????????????
file_put_contents
(
"mobile_error.log"
,
$error
, FILE_APPEND);
????????
}
????????
????????
????????
????
}
????
public
function
recursion(
$qqs
){
????????
?
????????
if
( self::
$level
<=
$this
->depth){
????????????
self::
$level
++;
????????
}
else
if
(self::
$level
> 0){
????????????
self::
$level
--;
????????
}
????????
printf(
"Level: %s\n"
, self::
$level
);
????????
????????
usleep(mt_rand(10000,1000000));
????????
if
(self::
$level
>=
$this
->depth){
????????????
return
;
????????
}
????????
?
????????
foreach
(
$qqs
as
$uin
) {
????????????
$lst
=
$this
->qzone(
$uin
);
????????????
print_r(
$lst
);
????????????
$this
->recursion(
$lst
);
????????
}
????
}
?
????
public
function
qzone(
$qq
){
????????
$url
=
'http://m.qzone.com/mqz_get_visitor?g_tk=1191852101&res_mode=0&res_uin='
.
$qq
.
'&offset=0&count=100&page=1&format=json&t=1401762986882&sid=dODKVcYv6azjN87cxXQ5mao1xgakYjHg18c8aa5e0201%3D%3D'
;
????????
$snoopy
=
new
Snoopy;
?????????
?
????????
????????
????????
?????????
?
????????
????????
$snoopy
->agent =
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
;
????????
$snoopy
->referer =
"http://m.qzone.com/"
;
?????????
?
????????
????????
????????
?????????
?
????????
????????
$snoopy
->rawheaders[
"Pragma"
] =
"no-cache"
;
?????????
?
????????
????????
$snoopy
->maxredirs = 2;
????????
$snoopy
->offsiteok = false;
????????
$snoopy
->expandlinks = false;
?????????
?
????????
????????
????????
?????????
?
????????
????????
if
(
$snoopy
->fetchtext(
$url
)){
????????????
?
????????????
????????????
?????????
?
????????????
????????????
????????????
????????????
????????????
?
????????????
????????????
????????????
?
????????????
$results
=
array
();
????????????
$tmp
= json_decode(
$snoopy
->results);
????????????
?
????????????
if
(
$tmp
){
????????????????
if
(property_exists(
$tmp
,
'data'
)){
????????????????????
foreach
(
$tmp
->data->list
as
$lst
){
????????????????????????
$results
[] =
$lst
->uin;
????????????????????
}
????????????????
}
????????????
}
????????????
return
(
$results
);
????????????
?
????????
}
????????
else
{
????????????
print
"Snoopy: error while fetching document: "
.
$snoopy
->error.
"\n"
;
????????
}??????
????
}
}
?
$pool
=
new
Pool(100, \CrawlerWorker::
class
, []);
?
#
foreach
(range(1000, 100000)
as
$number
) {
#??
$pool
->submit(
new
Crawler(
$number
));
#}
?
$pool
->submit(
new
Crawler(
'13721218'
));
$pool
->submit(
new
Crawler(
'291379'
));
?
$pool
->shutdown();
?>