Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
spider
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
viitto
spider
Commits
132741d8
Commit
132741d8
authored
Jul 23, 2020
by
黄奎
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
新增页面
parent
73cc4efe
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
264 additions
and
76 deletions
+264
-76
SouthEastDetailsSpider.cs
...der.Spiders/Rules/SouthSastRule/SouthEastDetailsSpider.cs
+21
-13
SouthEastListSpider.cs
...Spider.Spiders/Rules/SouthSastRule/SouthEastListSpider.cs
+46
-41
SouthEastPageSpider.cs
...Spider.Spiders/Rules/SouthSastRule/SouthEastPageSpider.cs
+130
-0
SouthEastProdHtmlDetailsSpider.cs
...ers/Rules/SouthSastRule/SouthEastProdHtmlDetailsSpider.cs
+11
-4
SouthEastService.cs
...netSpider.Spiders/Rules/SouthSastRule/SouthEastService.cs
+56
-18
No files found.
src/DotnetSpider.Spiders/Rules/SouthSastRule/SouthEastDetailsSpider.cs
View file @
132741d8
...
...
@@ -24,8 +24,15 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
/// </summary>
public
class
SouthEastDetailsSpider
:
Spider
{
public
static
async
Task
RunAsync
()
/// <summary>
/// 请求配置数据
/// </summary>
public
static
SouthEastRequestConfig
SouthEastRequest
{
get
;
set
;
}
public
static
async
Task
RunAsync
(
SouthEastRequestConfig
requestConfig
)
{
SouthEastRequest
=
requestConfig
;
var
builder
=
Builder
.
CreateDefaultBuilder
<
SouthEastDetailsSpider
>(
options
=>
{
options
.
UseProxy
=
false
;
...
...
@@ -56,12 +63,11 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
/// <param name="requests"></param>
public
void
LoadRequests
(
List
<
Request
>
requests
)
{
string
url
=
"https://gql.settour.com.tw/graphql"
;
string
origin
=
"https://trip.settour.com.tw/"
;
SouthEastService
.
TRAVEL_CONFIGS
.
ForEach
(
x
=>
{
var
keys
=
JObject
.
Parse
(
x
.
KeyInfo
);
foreach
(
var
item
in
SouthEastService
.
TRAVEL_CONFIGS
)
{
var
keys
=
JObject
.
Parse
(
item
.
KeyInfo
);
string
_prodNo
=
keys
[
"code"
].
ToString
();
var
tripRequest
=
new
Request
(
url
);
var
tripRequest
=
new
Request
(
SouthEastRequest
.
URL
);
tripRequest
.
Method
=
"POST"
;
var
tripData
=
new
{
...
...
@@ -84,15 +90,15 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
variables
=
tripData
};
tripRequest
.
SetHeader
(
"prodNo"
,
_prodNo
);
tripRequest
.
SetHeader
(
"origin"
,
o
rigin
);
tripRequest
.
SetHeader
(
"origin"
,
SouthEastRequest
.
O
rigin
);
tripRequest
.
Content
=
MessagePackSerializer
.
Typeless
.
Serialize
(
new
StringContent
(
Newtonsoft
.
Json
.
JsonConvert
.
SerializeObject
(
tripParam
),
"application/json"
,
"UTF-8"
));
requests
.
Add
(
tripRequest
);
var
productRequest
=
new
Request
(
url
)
var
productRequest
=
new
Request
(
SouthEastRequest
.
URL
)
{
Method
=
"POST"
};
productRequest
.
SetHeader
(
"origin"
,
o
rigin
);
productRequest
.
SetHeader
(
"origin"
,
SouthEastRequest
.
O
rigin
);
var
productData
=
new
{
companyNo
=
""
,
...
...
@@ -119,11 +125,9 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
};
productRequest
.
Content
=
MessagePackSerializer
.
Typeless
.
Serialize
(
new
StringContent
(
Newtonsoft
.
Json
.
JsonConvert
.
SerializeObject
(
productParam
),
"application/json"
,
"UTF-8"
));
requests
.
Add
(
productRequest
);
}
);
}
}
/// <summary>
/// 详情数据解析
/// </summary>
...
...
@@ -135,9 +139,13 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
}
/// <summary>
/// 详情数据处理
/// </summary>
/// <param name="context"></param>
/// <returns></returns>
protected
override
Task
Parse
(
DataContext
context
)
{
var
obj
=
context
.
Selectable
.
Value
;
var
rootValue
=
JObject
.
Parse
(
context
.
Selectable
.
Value
);
if
(
rootValue
!=
null
)
{
...
...
src/DotnetSpider.Spiders/Rules/SouthSastRule/SouthEastListSpider.cs
View file @
132741d8
...
...
@@ -24,13 +24,19 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
/// </summary>
public
class
SouthEastListSpider
:
Spider
{
public
int
pageNo
=
1
;
/// <summary>
/// 请求配置数据
/// </summary>
public
static
SouthEastRequestConfig
SouthEastRequest
{
get
;
set
;
}
/// <summary>
/// 配置请求
/// </summary>
/// <returns></returns>
public
static
async
Task
RunAsync
()
public
static
async
Task
RunAsync
(
SouthEastRequestConfig
requestConfig
)
{
SouthEastRequest
=
requestConfig
;
var
builder
=
Builder
.
CreateDefaultBuilder
<
SouthEastListSpider
>(
options
=>
{
options
.
UseProxy
=
false
;
...
...
@@ -56,45 +62,44 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
{
AddDataFlow
(
new
ListParser
());
List
<
Request
>
requests
=
new
List
<
Request
>();
string
url
=
"https://trip.settour.com.tw/taiwan/search?departure=&destination=&startDate=20200722&endDate=20201231&keyWord=&sort=PA&minPrice=-1&maxPrice=-1&tourDays=&travelType="
;
url
=
"https://gql.settour.com.tw/graphql"
;
var
request
=
new
Request
(
url
);
request
.
Method
=
"POST"
;
var
data
=
new
{
companyNo
=
""
,
departure
=
""
,
destination
=
""
,
endDate
=
"20201231"
,
isB2C
=
true
,
isSpecial
=
false
,
keyWord
=
""
,
maxPrice
=
-
1
,
minPrice
=
-
1
,
pageNo
=
1
,
portType
=
"B2C"
,
sort
=
"PA"
,
startDate
=
"20200722"
,
tourDays
=
""
,
travelType
=
""
,
};
var
param
=
new
for
(
int
i
=
1
;
i
<=
SouthEastRequest
.
TotalPage
;
i
++)
{
operationName
=
""
,
query
=
"query ($departure: String, $destination: String, $startDate: String!, $endDate: String!, $keyWord: String, $portType: String!, $tourDays: String, $travelType: String, $isSpecial: Boolean, $pageNo: Int!, $sort: String!, $minPrice: Int!, $maxPrice: Int!, $companyNo: String, $isB2C: Boolean!) {getDtSearchResult(departure: $departure, destination: $destination, startDate: $startDate, endDate: $endDate, keyWord: $keyWord, portType: $portType, tourDays: $tourDays, travelType: $travelType, isSpecial: $isSpecial, pageNo: $pageNo, sort: $sort, minPrice: $minPrice, maxPrice: $maxPrice, companyNo: $companyNo) {error {msgCode msgDesc __typename} data { pageNo totalPage totalSize promote {prodNo prodType prodName tourDay tourNight travelCode travelName saleTag {code name __typename} price { b2cPrice b2bPrice @skip(if: $isB2C) __typename } departureDate {grupNo date __typename } mktInfo imageName promoteTag __typename } products {prodNo prodType prodName tourDay tourNight travelCode travelName saleTag { code name __typename } price { b2cPrice b2bPrice @skip(if: $isB2C) __typename } departureDate{ grupNo date __typename} mktInfo imageName promoteTag __typename } __typename } __typename } }"
,
variables
=
data
};
var
content
=
new
StringContent
(
Newtonsoft
.
Json
.
JsonConvert
.
SerializeObject
(
param
),
"application/json"
,
"UTF-8"
);
request
.
SetHeader
(
"origin"
,
"https://trip.settour.com.tw"
);
request
.
Content
=
MessagePackSerializer
.
Typeless
.
Serialize
(
content
);
requests
.
Add
(
request
);
var
request
=
new
Request
(
SouthEastRequest
.
URL
);
request
.
Method
=
"POST"
;
var
data
=
new
{
companyNo
=
""
,
departure
=
""
,
destination
=
""
,
endDate
=
SouthEastRequest
.
EndDate
,
isB2C
=
true
,
isSpecial
=
false
,
keyWord
=
""
,
maxPrice
=
-
1
,
minPrice
=
-
1
,
pageNo
=
i
,
portType
=
"B2C"
,
sort
=
"PA"
,
startDate
=
SouthEastRequest
.
StartDate
,
tourDays
=
""
,
travelType
=
""
,
};
var
param
=
new
{
operationName
=
""
,
query
=
"query ($departure: String, $destination: String, $startDate: String!, $endDate: String!, $keyWord: String, $portType: String!, $tourDays: String, $travelType: String, $isSpecial: Boolean, $pageNo: Int!, $sort: String!, $minPrice: Int!, $maxPrice: Int!, $companyNo: String, $isB2C: Boolean!) {getDtSearchResult(departure: $departure, destination: $destination, startDate: $startDate, endDate: $endDate, keyWord: $keyWord, portType: $portType, tourDays: $tourDays, travelType: $travelType, isSpecial: $isSpecial, pageNo: $pageNo, sort: $sort, minPrice: $minPrice, maxPrice: $maxPrice, companyNo: $companyNo) {error {msgCode msgDesc __typename} data { pageNo totalPage totalSize promote {prodNo prodType prodName tourDay tourNight travelCode travelName saleTag {code name __typename} price { b2cPrice b2bPrice @skip(if: $isB2C) __typename } departureDate {grupNo date __typename } mktInfo imageName promoteTag __typename } products {prodNo prodType prodName tourDay tourNight travelCode travelName saleTag { code name __typename } price { b2cPrice b2bPrice @skip(if: $isB2C) __typename } departureDate{ grupNo date __typename} mktInfo imageName promoteTag __typename } __typename } __typename } }"
,
variables
=
data
};
var
content
=
new
StringContent
(
Newtonsoft
.
Json
.
JsonConvert
.
SerializeObject
(
param
),
"application/json"
,
"UTF-8"
);
request
.
SetHeader
(
"origin"
,
SouthEastRequest
.
Origin
);
request
.
Content
=
MessagePackSerializer
.
Typeless
.
Serialize
(
content
);
requests
.
Add
(
request
);
}
await
AddRequestsAsync
(
requests
);
}
/// <summary>
/// 东南旅游数据解析
/// 东南旅游
列表
数据解析
/// </summary>
protected
class
ListParser
:
DataParser
{
...
...
@@ -111,15 +116,11 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
protected
override
Task
Parse
(
DataContext
context
)
{
var
rootValue
=
JObject
.
Parse
(
context
.
Selectable
.
Value
);
Console
.
WriteLine
(
"SouthEastListSpider:"
+
rootValue
);
if
(
rootValue
!=
null
)
{
var
rootData
=
JObject
.
Parse
(
rootValue
[
"data"
].
ToString
());
var
getDtSearchResult
=
JObject
.
Parse
(
rootData
[
"getDtSearchResult"
].
ToString
());
var
data
=
JObject
.
Parse
(
getDtSearchResult
[
"data"
].
ToString
());
var
pageNo
=
data
[
"pageNo"
].
ToString
();
var
totalPage
=
data
[
"totalPage"
].
ToString
();
var
totalSize
=
data
[
"totalSize"
].
ToString
();
var
promoteArray
=
JArray
.
Parse
(
data
[
"promote"
].
ToString
());
if
(
promoteArray
!=
null
&&
promoteArray
.
Count
>
0
)
{
...
...
@@ -134,7 +135,11 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
return
Task
.
CompletedTask
;
}
/// <summary>
/// 解析列表数据
/// </summary>
/// <param name="array"></param>
private
void
FormatList
(
JArray
array
)
{
List
<
RB_Travel_Config
>
list
=
new
List
<
RB_Travel_Config
>();
...
...
src/DotnetSpider.Spiders/Rules/SouthSastRule/SouthEastPageSpider.cs
0 → 100644
View file @
132741d8
using
System
;
using
System.Collections.Generic
;
using
System.Text
;
using
System.Threading
;
using
System.Threading.Tasks
;
using
DotnetSpider.DataFlow
;
using
DotnetSpider.DataFlow.Parser
;
using
DotnetSpider.Http
;
using
DotnetSpider.Scheduler.Component
;
using
DotnetSpider.Selector
;
using
DotnetSpider.Spiders.Model
;
using
MessagePack
;
using
Microsoft.Extensions.Hosting
;
using
Microsoft.Extensions.Logging
;
using
Microsoft.Extensions.Options
;
using
Newtonsoft.Json.Linq
;
using
Serilog
;
using
System.Linq
;
namespace
DotnetSpider.Spiders.Rules.SouthSastRule
{
/// <summary>
/// 东南旅游列表总页数
/// </summary>
public
class
SouthEastPageSpider
:
Spider
{
/// <summary>
/// 请求配置数据
/// </summary>
public
static
SouthEastRequestConfig
SouthEastRequest
{
get
;
set
;
}
/// <summary>
/// 配置请求
/// </summary>
/// <returns></returns>
public
static
async
Task
RunAsync
(
SouthEastRequestConfig
requestConfig
)
{
SouthEastRequest
=
requestConfig
;
var
builder
=
Builder
.
CreateDefaultBuilder
<
SouthEastPageSpider
>(
options
=>
{
options
.
UseProxy
=
false
;
options
.
Speed
=
1
;
options
.
RequestTimeout
=
60
;
});
builder
.
UseSerilog
();
builder
.
UseQueueDistinctBfsScheduler
<
HashSetDuplicateRemover
>();
await
builder
.
Build
().
RunAsync
();
}
public
SouthEastPageSpider
(
IOptions
<
SpiderOptions
>
options
,
SpiderServices
services
,
ILogger
<
Spider
>
logger
)
:
base
(
options
,
services
,
logger
)
{
}
/// <summary>
/// 获取列表数据
/// </summary>
/// <param name="stoppingToken"></param>
/// <returns></returns>
protected
override
async
Task
InitializeAsync
(
CancellationToken
stoppingToken
)
{
AddDataFlow
(
new
PageParser
());
List
<
Request
>
requests
=
new
List
<
Request
>();
var
request
=
new
Request
(
SouthEastRequest
.
URL
);
request
.
Method
=
"POST"
;
var
data
=
new
{
companyNo
=
""
,
departure
=
""
,
destination
=
""
,
endDate
=
SouthEastRequest
.
EndDate
,
isB2C
=
true
,
isSpecial
=
false
,
keyWord
=
""
,
maxPrice
=
-
1
,
minPrice
=
-
1
,
pageNo
=
1
,
portType
=
"B2C"
,
sort
=
"PA"
,
startDate
=
SouthEastRequest
.
StartDate
,
tourDays
=
""
,
travelType
=
""
,
};
var
param
=
new
{
operationName
=
""
,
query
=
"query ($departure: String, $destination: String, $startDate: String!, $endDate: String!, $keyWord: String, $portType: String!, $tourDays: String, $travelType: String, $isSpecial: Boolean, $pageNo: Int!, $sort: String!, $minPrice: Int!, $maxPrice: Int!, $companyNo: String, $isB2C: Boolean!) {getDtSearchResult(departure: $departure, destination: $destination, startDate: $startDate, endDate: $endDate, keyWord: $keyWord, portType: $portType, tourDays: $tourDays, travelType: $travelType, isSpecial: $isSpecial, pageNo: $pageNo, sort: $sort, minPrice: $minPrice, maxPrice: $maxPrice, companyNo: $companyNo) {error {msgCode msgDesc __typename} data { pageNo totalPage totalSize promote {prodNo prodType prodName tourDay tourNight travelCode travelName saleTag {code name __typename} price { b2cPrice b2bPrice @skip(if: $isB2C) __typename } departureDate {grupNo date __typename } mktInfo imageName promoteTag __typename } products {prodNo prodType prodName tourDay tourNight travelCode travelName saleTag { code name __typename } price { b2cPrice b2bPrice @skip(if: $isB2C) __typename } departureDate{ grupNo date __typename} mktInfo imageName promoteTag __typename } __typename } __typename } }"
,
variables
=
data
};
var
content
=
new
StringContent
(
Newtonsoft
.
Json
.
JsonConvert
.
SerializeObject
(
param
),
"application/json"
,
"UTF-8"
);
request
.
SetHeader
(
"origin"
,
SouthEastRequest
.
Origin
);
request
.
Content
=
MessagePackSerializer
.
Typeless
.
Serialize
(
content
);
requests
.
Add
(
request
);
await
AddRequestsAsync
(
requests
);
}
/// <summary>
/// 东南旅游总页数解析
/// </summary>
protected
class
PageParser
:
DataParser
{
public
PageParser
()
{
}
/// <summary>
/// 数据解析
/// </summary>
/// <param name="context"></param>
/// <returns></returns>
protected
override
Task
Parse
(
DataContext
context
)
{
var
rootValue
=
JObject
.
Parse
(
context
.
Selectable
.
Value
);
if
(
rootValue
!=
null
)
{
var
rootData
=
JObject
.
Parse
(
rootValue
[
"data"
].
ToString
());
var
getDtSearchResult
=
JObject
.
Parse
(
rootData
[
"getDtSearchResult"
].
ToString
());
var
data
=
JObject
.
Parse
(
getDtSearchResult
[
"data"
].
ToString
());
Int32
.
TryParse
(
data
[
"totalPage"
].
ToString
(),
out
int
totalPage
);
SouthEastService
.
RequestConfig
.
TotalPage
=
totalPage
;
}
return
Task
.
CompletedTask
;
}
}
}
}
src/DotnetSpider.Spiders/Rules/SouthSastRule/SouthEastProdHtmlDetailsSpider.cs
View file @
132741d8
...
...
@@ -99,10 +99,17 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
var
rootValue
=
JObject
.
Parse
(
context
.
Selectable
.
Value
);
if
(
rootValue
!=
null
)
{
var
rootData
=
JObject
.
Parse
(
rootValue
[
"data"
].
ToString
());
var
getGfgProdHtml
=
JObject
.
Parse
(
rootData
[
"getGfgProdHtml"
].
ToString
());
var
data
=
JArray
.
Parse
(
getGfgProdHtml
[
"data"
].
ToString
());
FormatList
(
context
.
Request
.
Headers
[
"prodNo"
].
ToString
(),
data
);
try
{
var
rootData
=
JObject
.
Parse
(
rootValue
[
"data"
].
ToString
());
var
getGfgProdHtml
=
JObject
.
Parse
(
rootData
[
"getGfgProdHtml"
].
ToString
());
var
data
=
JArray
.
Parse
(
getGfgProdHtml
[
"data"
].
ToString
());
FormatList
(
context
.
Request
.
Headers
[
"prodNo"
].
ToString
(),
data
);
}
catch
(
Exception
ex
)
{
}
}
return
Task
.
CompletedTask
;
}
...
...
src/DotnetSpider.Spiders/Rules/SouthSastRule/SouthEastService.cs
View file @
132741d8
...
...
@@ -13,35 +13,41 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
{
public
static
List
<
RB_Travel_Config
>
TRAVEL_CONFIGS
=
new
List
<
RB_Travel_Config
>();
public
static
SouthEastRequestConfig
RequestConfig
=
new
SouthEastRequestConfig
();
public
SouthEastService
()
{
TRAVEL_CONFIGS
=
new
List
<
RB_Travel_Config
>();
RequestConfig
=
new
SouthEastRequestConfig
()
{
URL
=
"https://gql.settour.com.tw/graphql"
,
StartDate
=
DateTime
.
Now
.
AddDays
(
1
).
ToString
(
"yyyyMMdd"
),
EndDate
=
DateTime
.
Now
.
AddDays
(
7
).
ToString
(
"yyyyMMdd"
),
TotalPage
=
1
,
Origin
=
"https://trip.settour.com.tw"
};
}
public
async
Task
RunAsync
()
{
Console
.
WriteLine
(
"********* SouthEastService_Srart ***********"
);
//1、获取列表页数量
//await SouthEastListSpider.RunAsync();
Console
.
WriteLine
(
"********* 2222222222222222222222 ***********"
);
//2、获取详情信息,补充列表不完善信息
var
obj
=
new
{
code
=
"GDP0000001346"
,
no
=
"1"
,
travel_title_key
=
"0"
};
TRAVEL_CONFIGS
.
Add
(
new
RB_Travel_Config
()
{
KeyInfo
=
Newtonsoft
.
Json
.
JsonConvert
.
SerializeObject
(
obj
)
});
//await SouthEastDetailsSpider.RunAsync();
//获取总页数
await
SouthEastPageSpider
.
RunAsync
(
RequestConfig
);
Console
.
WriteLine
(
"获取总页数:"
+
RequestConfig
.
TotalPage
);
//获取列表
await
SouthEastListSpider
.
RunAsync
(
RequestConfig
);
Console
.
WriteLine
(
"获取总条数:"
+
TRAVEL_CONFIGS
.
Count
);
//GFG0000000094
//获取行程信息
await
SouthEastDetailsSpider
.
RunAsync
(
RequestConfig
);
//await SouthEastProdHtmlDetailsSpider.RunAsync();
Console
.
WriteLine
(
"COnfig:"
+
Newtonsoft
.
Json
.
JsonConvert
.
SerializeObject
(
TRAVEL_CONFIGS
));
await
SouthEastProdHtmlDetailsSpider
.
RunAsync
();
Console
.
WriteLine
(
"********* SouthEastService_End ************"
);
////3、补充酒店,餐食等信息
...
...
@@ -51,4 +57,36 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
//await BwtStorageSpider.RunAsync();
}
}
/// <summary>
/// 请求配置
/// </summary>
public
class
SouthEastRequestConfig
{
/// <summary>
/// 开始时间
/// </summary>
public
string
StartDate
{
get
;
set
;
}
/// <summary>
/// 结束时间
/// </summary>
public
string
EndDate
{
get
;
set
;
}
/// <summary>
/// 请求URL
/// </summary>
public
string
URL
{
get
;
set
;
}
/// <summary>
/// 跨域设置
/// </summary>
public
string
Origin
{
get
;
set
;
}
/// <summary>
/// 总页数
/// </summary>
public
int
TotalPage
{
get
;
set
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment