package homeWork5;
import java.io.*;
import java.util.*;
public class WarAndWorldService1 {
public void runSet() {
try {
FileReader fileReader = new FileReader("Module/Война и мир_книга.txt");
Set<String> wordsWeNeed = new HashSet<>();
int symbol;
StringBuilder builder = new StringBuilder();
while ((symbol = fileReader.read()) != -1) {
if (symbol != ' ' && symbol != '\n' && symbol != ',' && symbol != '.' && symbol != '!' && symbol != ')' && symbol != '"'
&& symbol != ':' && symbol != ';' && symbol != '?' && symbol != '*' && symbol != '(') {
builder.append((char) symbol);
} else {
wordsWeNeed.add(builder.toString());
builder.setLength(0);
}
}
wordsWeNeed.add(builder.toString());
System.out.println(wordsWeNeed);
System.out.println(wordsWeNeed.size());
} catch(FileNotFoundException f){
System.out.println("Ошибка. Файл не найден.");
} catch(IOException e){
System.out.println("Ошибка чтения файла.");
}
}
}
问题是如何首先将文本文件数据转换为字符串。例如:
while((symbol = fileReader.read()) != -1) {
builder.append((char) symbol);
}
String result = builder.toString;
然后从这一行中提取单词(没有标点符号等)。我知道有这样的事情:
String[] resultArr = result.split(" ");
但据我了解,在这种情况下,需要使用正则表达式。问题是,我可以从没有正则表达式的字符串中提取“纯”词吗?