我想尝试将我的音乐文件按类型划分为文件夹,为了方便起见,我用 c# 编写代码。我需要的信息位于 google 页面 html 代码的这个 div 类中 \'...
我想尝试将我的音乐文件按类型划分到文件夹中,为了方便起见,我用 c# 编写代码。我需要的信息位于 google 页面 \'xGj8Mb\' 的 html 代码的这个 div 类中,但使用标准 c# 方法以及 AngleSharp 库,我无法访问此信息,并且它不会保存太多其他信息。我使用 Jsoup 库用 Java 重写了相同的代码,它从页面中获取所有信息。下面我报告了我用于进行此实验的代码,既使用了 AngleSharp 也使用了标准方法。
使用标准方法编码
public class Program {
private static async Task Main(string[] args) {
const string folderPath = @"C:\Users\chris\Desktop\Attilio\ATTILIO MUSIC";
if (!Directory.Exists(folderPath)) {
Console.Error.WriteLine("DOES NOT EXIST");
return;
}
var archive = Directory.GetFiles(folderPath);
foreach (var entry in archive) {
// Check if the file is not a directory and has the .mp3 or .wav extension
var fileName = entry.Split(folderPath + @"\")[1];
//Console.WriteLine(fileName);
SearchOnInternet(fileName);
return;
}
Console.WriteLine($"Number: {archive.Length}");
}
private static void SearchOnInternet(string fileName) {
// Split to remove the extension from the file name
if (fileName.EndsWith(".mp3"))
fileName = fileName.Split(".mp3")[0];
else if (fileName.EndsWith(".wav"))
fileName = fileName.Split(".wav")[0];
// Conversion for the search
var fileNameAsUrl = Uri.EscapeDataString(fileName);
Console.WriteLine(fileNameAsUrl);
var url =
$"https://www.google.com/search?q={fileNameAsUrl}";
// Search
using (var client = new HttpClient()) {
var response = client.GetAsync(url);
if (response.Result.IsSuccessStatusCode) {
var responseBody = response.Result.Content.ReadAsStringAsync();
// Save page
var outputPath = @"C:\Users\chris\Documents\Universita\Programmi\C#\FileAura\FileAuraServer\Search";
if (!Directory.Exists(outputPath))
Directory.CreateDirectory(outputPath);
File.WriteAllText(
@$"{outputPath}\{fileName}.html",
responseBody.Result);
}
}
}
}
使用 AngleSharp 的代码
private static async void SearchOnInternet(string fileName) {
// Split to remove the extension from the file name
if (fileName.EndsWith(".mp3"))
fileName = fileName.Split(".mp3")[0];
else if (fileName.EndsWith(".wav"))
fileName = fileName.Split(".wav")[0];
// Conversion for the search
var fileNameAsUrl = Uri.EscapeDataString(fileName);
var googleSearchUrl =
$"https://www.google.com/search?q={fileNameAsUrl}";
// Create an HttpClient to send the request
using var httpClient = new HttpClient();
// Send a GET request to Google
var response = await httpClient.GetAsync(googleSearchUrl);
// Ensure the request was successful
response.EnsureSuccessStatusCode();
// Get the response content as a string
var responseContent = await response.Content.ReadAsStringAsync();
// Save the HTML content to a file
string filePath = "output.html";
await File.WriteAllTextAsync(filePath, responseContent);
Console.WriteLine($"The HTML content has been saved to {filePath}");
// Configure AngleSharp to parse the HTML
var context = BrowsingContext.New(Configuration.Default);
var parser = context.GetService<IHtmlParser>();
var document = await parser.ParseDocumentAsync(responseContent);
// Example: extract the title of the search results page
var title = document.Title;
Console.WriteLine("Title: " + title);
// Example: extract the search results
var results = document.QuerySelectorAll("h3"); // <h3> elements contain the titles of the search results
foreach (var result in results)
{
Console.WriteLine(result.TextContent);
}
}