Commit 132741d8 authored by 黄奎's avatar 黄奎

新增页面

parent 73cc4efe
......@@ -24,8 +24,15 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
/// </summary>
public class SouthEastDetailsSpider : Spider
{
public static async Task RunAsync()
/// <summary>
/// 请求配置数据
/// </summary>
public static SouthEastRequestConfig SouthEastRequest { get; set; }
public static async Task RunAsync(SouthEastRequestConfig requestConfig)
{
SouthEastRequest = requestConfig;
var builder = Builder.CreateDefaultBuilder<SouthEastDetailsSpider>(options =>
{
options.UseProxy = false;
......@@ -56,12 +63,11 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
/// <param name="requests"></param>
public void LoadRequests(List<Request> requests)
{
string url = "https://gql.settour.com.tw/graphql";
string origin = "https://trip.settour.com.tw/";
SouthEastService.TRAVEL_CONFIGS.ForEach(x => {
var keys = JObject.Parse(x.KeyInfo);
foreach (var item in SouthEastService.TRAVEL_CONFIGS)
{
var keys = JObject.Parse(item.KeyInfo);
string _prodNo = keys["code"].ToString();
var tripRequest = new Request(url);
var tripRequest = new Request(SouthEastRequest.URL);
tripRequest.Method = "POST";
var tripData = new
{
......@@ -84,15 +90,15 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
variables = tripData
};
tripRequest.SetHeader("prodNo", _prodNo);
tripRequest.SetHeader("origin", origin);
tripRequest.SetHeader("origin", SouthEastRequest.Origin);
tripRequest.Content = MessagePackSerializer.Typeless.Serialize(new StringContent(Newtonsoft.Json.JsonConvert.SerializeObject(tripParam), "application/json", "UTF-8"));
requests.Add(tripRequest);
var productRequest = new Request(url)
var productRequest = new Request(SouthEastRequest.URL)
{
Method = "POST"
};
productRequest.SetHeader("origin", origin);
productRequest.SetHeader("origin", SouthEastRequest.Origin);
var productData = new
{
companyNo = "",
......@@ -119,11 +125,9 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
};
productRequest.Content = MessagePackSerializer.Typeless.Serialize(new StringContent(Newtonsoft.Json.JsonConvert.SerializeObject(productParam), "application/json", "UTF-8"));
requests.Add(productRequest);
});
}
}
/// <summary>
/// 详情数据解析
/// </summary>
......@@ -135,9 +139,13 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
}
/// <summary>
/// 详情数据处理
/// </summary>
/// <param name="context"></param>
/// <returns></returns>
protected override Task Parse(DataContext context)
{
var obj = context.Selectable.Value;
var rootValue = JObject.Parse(context.Selectable.Value);
if (rootValue != null)
{
......
......@@ -24,13 +24,19 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
/// </summary>
public class SouthEastListSpider: Spider
{
public int pageNo = 1;
/// <summary>
/// 请求配置数据
/// </summary>
public static SouthEastRequestConfig SouthEastRequest { get; set; }
/// <summary>
/// 配置请求
/// </summary>
/// <returns></returns>
public static async Task RunAsync()
public static async Task RunAsync(SouthEastRequestConfig requestConfig)
{
SouthEastRequest = requestConfig;
var builder = Builder.CreateDefaultBuilder<SouthEastListSpider>(options =>
{
options.UseProxy = false;
......@@ -56,45 +62,44 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
{
AddDataFlow(new ListParser());
List<Request> requests = new List<Request>();
string url = "https://trip.settour.com.tw/taiwan/search?departure=&destination=&startDate=20200722&endDate=20201231&keyWord=&sort=PA&minPrice=-1&maxPrice=-1&tourDays=&travelType=";
url = "https://gql.settour.com.tw/graphql";
var request = new Request(url);
request.Method = "POST";
var data = new
{
companyNo = "",
departure = "",
destination = "",
endDate = "20201231",
isB2C = true,
isSpecial = false,
keyWord = "",
maxPrice = -1,
minPrice = -1,
pageNo = 1,
portType = "B2C",
sort = "PA",
startDate = "20200722",
tourDays = "",
travelType = "",
};
var param = new
for (int i = 1; i <= SouthEastRequest.TotalPage; i++)
{
operationName = "",
query = "query ($departure: String, $destination: String, $startDate: String!, $endDate: String!, $keyWord: String, $portType: String!, $tourDays: String, $travelType: String, $isSpecial: Boolean, $pageNo: Int!, $sort: String!, $minPrice: Int!, $maxPrice: Int!, $companyNo: String, $isB2C: Boolean!) {getDtSearchResult(departure: $departure, destination: $destination, startDate: $startDate, endDate: $endDate, keyWord: $keyWord, portType: $portType, tourDays: $tourDays, travelType: $travelType, isSpecial: $isSpecial, pageNo: $pageNo, sort: $sort, minPrice: $minPrice, maxPrice: $maxPrice, companyNo: $companyNo) {error {msgCode msgDesc __typename} data { pageNo totalPage totalSize promote {prodNo prodType prodName tourDay tourNight travelCode travelName saleTag {code name __typename} price { b2cPrice b2bPrice @skip(if: $isB2C) __typename } departureDate {grupNo date __typename } mktInfo imageName promoteTag __typename } products {prodNo prodType prodName tourDay tourNight travelCode travelName saleTag { code name __typename } price { b2cPrice b2bPrice @skip(if: $isB2C) __typename } departureDate{ grupNo date __typename} mktInfo imageName promoteTag __typename } __typename } __typename } }",
variables = data
};
var content = new StringContent(Newtonsoft.Json.JsonConvert.SerializeObject(param), "application/json", "UTF-8");
request.SetHeader("origin", "https://trip.settour.com.tw");
request.Content = MessagePackSerializer.Typeless.Serialize(content);
requests.Add(request);
var request = new Request(SouthEastRequest.URL);
request.Method = "POST";
var data = new
{
companyNo = "",
departure = "",
destination = "",
endDate = SouthEastRequest.EndDate,
isB2C = true,
isSpecial = false,
keyWord = "",
maxPrice = -1,
minPrice = -1,
pageNo = i,
portType = "B2C",
sort = "PA",
startDate = SouthEastRequest.StartDate,
tourDays = "",
travelType = "",
};
var param = new
{
operationName = "",
query = "query ($departure: String, $destination: String, $startDate: String!, $endDate: String!, $keyWord: String, $portType: String!, $tourDays: String, $travelType: String, $isSpecial: Boolean, $pageNo: Int!, $sort: String!, $minPrice: Int!, $maxPrice: Int!, $companyNo: String, $isB2C: Boolean!) {getDtSearchResult(departure: $departure, destination: $destination, startDate: $startDate, endDate: $endDate, keyWord: $keyWord, portType: $portType, tourDays: $tourDays, travelType: $travelType, isSpecial: $isSpecial, pageNo: $pageNo, sort: $sort, minPrice: $minPrice, maxPrice: $maxPrice, companyNo: $companyNo) {error {msgCode msgDesc __typename} data { pageNo totalPage totalSize promote {prodNo prodType prodName tourDay tourNight travelCode travelName saleTag {code name __typename} price { b2cPrice b2bPrice @skip(if: $isB2C) __typename } departureDate {grupNo date __typename } mktInfo imageName promoteTag __typename } products {prodNo prodType prodName tourDay tourNight travelCode travelName saleTag { code name __typename } price { b2cPrice b2bPrice @skip(if: $isB2C) __typename } departureDate{ grupNo date __typename} mktInfo imageName promoteTag __typename } __typename } __typename } }",
variables = data
};
var content = new StringContent(Newtonsoft.Json.JsonConvert.SerializeObject(param), "application/json", "UTF-8");
request.SetHeader("origin", SouthEastRequest.Origin);
request.Content = MessagePackSerializer.Typeless.Serialize(content);
requests.Add(request);
}
await AddRequestsAsync(requests);
}
/// <summary>
/// 东南旅游数据解析
/// 东南旅游列表数据解析
/// </summary>
protected class ListParser : DataParser
{
......@@ -111,15 +116,11 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
protected override Task Parse(DataContext context)
{
var rootValue = JObject.Parse(context.Selectable.Value);
Console.WriteLine("SouthEastListSpider:" + rootValue);
if (rootValue != null)
{
var rootData = JObject.Parse(rootValue["data"].ToString());
var getDtSearchResult = JObject.Parse(rootData["getDtSearchResult"].ToString());
var data = JObject.Parse(getDtSearchResult["data"].ToString());
var pageNo = data["pageNo"].ToString();
var totalPage = data["totalPage"].ToString();
var totalSize = data["totalSize"].ToString();
var promoteArray = JArray.Parse(data["promote"].ToString());
if (promoteArray != null && promoteArray.Count > 0)
{
......@@ -134,7 +135,11 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
return Task.CompletedTask;
}
/// <summary>
/// 解析列表数据
/// </summary>
/// <param name="array"></param>
private void FormatList(JArray array)
{
List<RB_Travel_Config> list = new List<RB_Travel_Config>();
......
using System;
using System.Collections.Generic;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using DotnetSpider.DataFlow;
using DotnetSpider.DataFlow.Parser;
using DotnetSpider.Http;
using DotnetSpider.Scheduler.Component;
using DotnetSpider.Selector;
using DotnetSpider.Spiders.Model;
using MessagePack;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Newtonsoft.Json.Linq;
using Serilog;
using System.Linq;
namespace DotnetSpider.Spiders.Rules.SouthSastRule
{
/// <summary>
/// 东南旅游列表总页数
/// </summary>
public class SouthEastPageSpider : Spider
{
/// <summary>
/// 请求配置数据
/// </summary>
public static SouthEastRequestConfig SouthEastRequest { get; set; }
/// <summary>
/// 配置请求
/// </summary>
/// <returns></returns>
public static async Task RunAsync(SouthEastRequestConfig requestConfig)
{
SouthEastRequest = requestConfig;
var builder = Builder.CreateDefaultBuilder<SouthEastPageSpider>(options =>
{
options.UseProxy = false;
options.Speed = 1;
options.RequestTimeout = 60;
});
builder.UseSerilog();
builder.UseQueueDistinctBfsScheduler<HashSetDuplicateRemover>();
await builder.Build().RunAsync();
}
public SouthEastPageSpider(IOptions<SpiderOptions> options, SpiderServices services, ILogger<Spider> logger) : base(options, services, logger)
{
}
/// <summary>
/// 获取列表数据
/// </summary>
/// <param name="stoppingToken"></param>
/// <returns></returns>
protected override async Task InitializeAsync(CancellationToken stoppingToken)
{
AddDataFlow(new PageParser());
List<Request> requests = new List<Request>();
var request = new Request(SouthEastRequest.URL);
request.Method = "POST";
var data = new
{
companyNo = "",
departure = "",
destination = "",
endDate = SouthEastRequest.EndDate,
isB2C = true,
isSpecial = false,
keyWord = "",
maxPrice = -1,
minPrice = -1,
pageNo = 1,
portType = "B2C",
sort = "PA",
startDate = SouthEastRequest.StartDate,
tourDays = "",
travelType = "",
};
var param = new
{
operationName = "",
query = "query ($departure: String, $destination: String, $startDate: String!, $endDate: String!, $keyWord: String, $portType: String!, $tourDays: String, $travelType: String, $isSpecial: Boolean, $pageNo: Int!, $sort: String!, $minPrice: Int!, $maxPrice: Int!, $companyNo: String, $isB2C: Boolean!) {getDtSearchResult(departure: $departure, destination: $destination, startDate: $startDate, endDate: $endDate, keyWord: $keyWord, portType: $portType, tourDays: $tourDays, travelType: $travelType, isSpecial: $isSpecial, pageNo: $pageNo, sort: $sort, minPrice: $minPrice, maxPrice: $maxPrice, companyNo: $companyNo) {error {msgCode msgDesc __typename} data { pageNo totalPage totalSize promote {prodNo prodType prodName tourDay tourNight travelCode travelName saleTag {code name __typename} price { b2cPrice b2bPrice @skip(if: $isB2C) __typename } departureDate {grupNo date __typename } mktInfo imageName promoteTag __typename } products {prodNo prodType prodName tourDay tourNight travelCode travelName saleTag { code name __typename } price { b2cPrice b2bPrice @skip(if: $isB2C) __typename } departureDate{ grupNo date __typename} mktInfo imageName promoteTag __typename } __typename } __typename } }",
variables = data
};
var content = new StringContent(Newtonsoft.Json.JsonConvert.SerializeObject(param), "application/json", "UTF-8");
request.SetHeader("origin", SouthEastRequest.Origin);
request.Content = MessagePackSerializer.Typeless.Serialize(content);
requests.Add(request);
await AddRequestsAsync(requests);
}
/// <summary>
/// 东南旅游总页数解析
/// </summary>
protected class PageParser : DataParser
{
public PageParser()
{
}
/// <summary>
/// 数据解析
/// </summary>
/// <param name="context"></param>
/// <returns></returns>
protected override Task Parse(DataContext context)
{
var rootValue = JObject.Parse(context.Selectable.Value);
if (rootValue != null)
{
var rootData = JObject.Parse(rootValue["data"].ToString());
var getDtSearchResult = JObject.Parse(rootData["getDtSearchResult"].ToString());
var data = JObject.Parse(getDtSearchResult["data"].ToString());
Int32.TryParse(data["totalPage"].ToString(), out int totalPage);
SouthEastService.RequestConfig.TotalPage = totalPage;
}
return Task.CompletedTask;
}
}
}
}
......@@ -99,10 +99,17 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
var rootValue = JObject.Parse(context.Selectable.Value);
if (rootValue != null)
{
var rootData = JObject.Parse(rootValue["data"].ToString());
var getGfgProdHtml = JObject.Parse(rootData["getGfgProdHtml"].ToString());
var data = JArray.Parse(getGfgProdHtml["data"].ToString());
FormatList(context.Request.Headers["prodNo"].ToString(),data);
try
{
var rootData = JObject.Parse(rootValue["data"].ToString());
var getGfgProdHtml = JObject.Parse(rootData["getGfgProdHtml"].ToString());
var data = JArray.Parse(getGfgProdHtml["data"].ToString());
FormatList(context.Request.Headers["prodNo"].ToString(), data);
}
catch (Exception ex)
{
}
}
return Task.CompletedTask;
}
......
......@@ -13,35 +13,41 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
{
public static List<RB_Travel_Config> TRAVEL_CONFIGS = new List<RB_Travel_Config>();
public static SouthEastRequestConfig RequestConfig = new SouthEastRequestConfig();
public SouthEastService()
{
TRAVEL_CONFIGS = new List<RB_Travel_Config>();
RequestConfig = new SouthEastRequestConfig()
{
URL = "https://gql.settour.com.tw/graphql",
StartDate = DateTime.Now.AddDays(1).ToString("yyyyMMdd"),
EndDate = DateTime.Now.AddDays(7).ToString("yyyyMMdd"),
TotalPage = 1,
Origin= "https://trip.settour.com.tw"
};
}
public async Task RunAsync()
{
Console.WriteLine("********* SouthEastService_Srart ***********");
//1、获取列表页数量
//await SouthEastListSpider.RunAsync();
Console.WriteLine("********* 2222222222222222222222 ***********");
//2、获取详情信息,补充列表不完善信息
var obj = new
{
code = "GDP0000001346",
no = "1",
travel_title_key = "0"
};
TRAVEL_CONFIGS.Add(new RB_Travel_Config()
{
KeyInfo = Newtonsoft.Json.JsonConvert.SerializeObject(obj)
});
//await SouthEastDetailsSpider.RunAsync();
//获取总页数
await SouthEastPageSpider.RunAsync(RequestConfig);
Console.WriteLine("获取总页数:" + RequestConfig.TotalPage);
//获取列表
await SouthEastListSpider.RunAsync(RequestConfig);
Console.WriteLine("获取总条数:" + TRAVEL_CONFIGS.Count);
//GFG0000000094
//获取行程信息
await SouthEastDetailsSpider.RunAsync(RequestConfig);
//await SouthEastProdHtmlDetailsSpider.RunAsync();
Console.WriteLine("COnfig:" + Newtonsoft.Json.JsonConvert.SerializeObject(TRAVEL_CONFIGS));
await SouthEastProdHtmlDetailsSpider.RunAsync();
Console.WriteLine("********* SouthEastService_End ************");
////3、补充酒店,餐食等信息
......@@ -51,4 +57,36 @@ namespace DotnetSpider.Spiders.Rules.SouthSastRule
//await BwtStorageSpider.RunAsync();
}
}
/// <summary>
/// 请求配置
/// </summary>
public class SouthEastRequestConfig
{
/// <summary>
/// 开始时间
/// </summary>
public string StartDate { get; set; }
/// <summary>
/// 结束时间
/// </summary>
public string EndDate { get; set; }
/// <summary>
/// 请求URL
/// </summary>
public string URL { get; set; }
/// <summary>
/// 跨域设置
/// </summary>
public string Origin { get; set; }
/// <summary>
/// 总页数
/// </summary>
public int TotalPage { get; set; }
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment