是否可以以某种方式自定义 QTabWidget？

Question

Null

Asked:2022-05-10 07:10:48 +0000 UTC2022-05-10 07:10:48 +0000 UTC 2022-05-10 07:10:48 +0000 UTC

奥赛任务优化

772

有一个任务：

最初，我用python编写代码，结果是这样的：

from array import array


def string_xor(a: str, b: str):
    """
    Проверка на схожесть строк
    3 - одинаковые
    2 - 1 различие
    1 - больше 2 отличий
    """
    k = len(a)
    if k == len(b):
        a1 = array('u', a)
        b1 = array('u', b)
        errors = 0
        for i in range(k):
            if a1[i] != b1[i]:
                errors += 1
        if errors > 1:
            return 1
        elif errors == 1:
            return 2
        elif errors == 0:
            return 3
    return 1


n, m = map(int, input().split())
words = sorted(input() for _ in range(n))
for _ in range(m):
    word = input()
    one_diff = []  # Список слов, которые расходятся ровно на одну букву
    for cur in words:
        ans = string_xor(word, cur)
        if ans == 3:
            print(cur)
            break
        elif ans == 2:
            one_diff.append(cur)
    else:
        if one_diff:
            print(min(one_diff))
        else:
            print('?')

但它在其中一项时间测试中崩溃。然后我重写了它的优点，认为它会有所帮助。

#include<iostream>
#include "vector"
#include "algorithm"
using namespace std;

int check(string a, string b) {
    int k = a.size();
    if (k == b.size()) {
        int errors(0);
        for (int i = 0; i < k; i++)
            errors += a[i] != b[i];
        if (errors == 0)
            return 0;
        else if (errors == 1)
            return 1;
        return 2;
    }
    return 2;
}

int main() {
    ios::sync_with_stdio(false);
    int n, m;
    cin >> n >> m;
    string word;
    vector <string> words(n), one_diff;
    for (string &el : words)
        cin >> el;
    sort(words.begin(), words.end());
    for (int i = 0; i < m; i++) {
        cin >> word;
        bool is_break(false);
        for (const string &cur : words) {
            int d = check(cur, word);
            if (d == 0) {
                cout << cur << "\n";
                is_break = true;
                break;
            }
            else if (d == 1)
                one_diff.push_back(cur);
        }
        if (!is_break) {
            if (!one_diff.empty()) {
                sort(one_diff.begin(), one_diff.end());
                cout << one_diff[0] << "\n";
                one_diff.clear();
            }
            else
                cout << "?" << "\n";
        }
    }
}

但它并没有帮助，落在同样的考验。告诉我这里需要更改什么，以便程序及时适应。

我做了一个字符串长度的字典，有一个时间是1.033，它变成了1.09代码：

from array import array


def string_xor(a: str, b: str):
    k = len(a)
    if k == len(b):
        errors = 0
        for a1, b1 in zip(a, b):
            errors += a1 != b1
        if errors > 1:
            return 1
        elif errors == 1:
            return 2
        elif errors == 0:
            return 3
    return 1


n, m = map(int, input().split())
d = {}
for _ in range(n):
    word = input()
    z = len(word)
    d[z] = d.get(z, [word]) + [word]
for i in d.keys():
    d[i].sort()
for _ in range(m):
    word = input()
    one_diff = set()
    try:
        for cur in d[len(word)]:
            ans = string_xor(word, cur)
            if ans == 3:
                print(cur)
                break
            elif ans == 2:
                one_diff.add(cur)
        else:
            if one_diff:
                print(min(one_diff))
            else:
                print('?')
    except KeyError:
        print('?')

PS 代码及时崩溃...
PPS 抱歉，我无法提供该任务的链接。

4 个回答

Voted

Stanislav Volodarskiy · Answer 1 · 2022-05-25T04:46:12Z

算法

对于每个字长，都会k计算k + 1哈希值。一个用于整个单词，其余用于没有一个字母的单词的所有变体。字母没有被划掉，（实际上）被字母表之外的字符代替。所以少碰撞。

单词索引放置在手写的哈希表中。表的大小等于字典中所有单词的哈希总数。换句话说，表格中没有空闲单元格。

Word     words[n]           // слова словаря
uint32_t index[ht_size + 1] // метки в следующей таблице
uint32_t table[ht_size]     // индексы слов

// этот код перебирает все слова в словаре с хешем `h`
for (uint32_t i = index[h]; i < index[h + 1]; ++i) {
    words[table[i]];
}

创建表后，k + 1会为每个检查的单词计算一个哈希值。检查与哈希相对应的所有单词是否可能相等或相等，而不需要单个字符。

字典记忆

所有单词按20字符排列的字典。文件大小不超过 2MB。那么字数n < 100000。存储一个字的结构需要21每个字的字节数。哈希数2100000 = (1 + 20) * 100000。

words[ 100000] // 2100000B (2.1MB)
index[2100001] // 8400004B (8.4MB)
table[2100000] // 8400000B (8.4MB)

总计19MB。

一个大小的文件可以容纳的最大单词数2MB是1 + 26 + 26**2 + 26**3 + 404953 = 423232. 所有长度为 0、1、2 和 3 的不同单词，以及长度为 4 的单词的其余部分。这样的字典将占用423232 * 21 = 8887872内存中的字节。没有了9MB。所需的最大哈希数不超过423232 + 2 * 1024 * 1024 = 2520384. 总内存不超过9MB + 10MB + 10MB = 29MB.

表现

对于所有经过测试的字典，在未优化的 GCC 构建上的运行时间不超过一秒。也许缺少了什么。字典测试了 230,000 个单词（最多四个字符）和 50,000 个单词（最多 20 个字符）。速度取决于散列的质量，而散列的质量又必须很快。哈希函数在 Boost 中被监视。

通过在字典中选择单词，您可以尝试减慢程序的速度。最多 20 个字符的单词2 * 10^28，很明显，对于任何适合 64MB 的哈希表，您都可以选择这样的单词，它们最终都放在一个篮子里。

编码

// g++ -std=c++11 -pedantic -Wall -Wextra -Werror proofreading.cpp

#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <vector>

const int max_size = 20;

void fail() {
    std::fprintf(stderr, "ERROR\n");
    std::exit(EXIT_FAILURE);
}

void skip_line() {
    for (; ; ) {
        int c = std::getc(stdin);
        if (c == EOF) {
            fail();
        }
        if (c == '\n') {
            break;
        }
    }
}

class Word {
public:
    void scan() {
        if (std::fgets(data, max_size + 1, stdin) == nullptr) {
            fail();
        }
        const size_t len = std::strlen(data);
        if (len == 0) {
            fail();
        }
        if (data[len - 1] == '\n') {
            data[len - 1] = '\0';
        } else {
            skip_line();
        }
    }

    void print() const {
        std::fputs(data, stdout);
    }

    size_t length() const {
        return std::strlen(data);
    }

    uint32_t hash() const {
        uint32_t seed = 0;
        hash(seed, data);
        return seed;
    }

    uint32_t hash(size_t skip) const {
        uint32_t seed = 0;
        hash(seed, skip, data);
        hash(seed, static_cast<char>(skip));
        hash(seed, data + (skip + 1));
        return seed;
    }

    bool eq(const Word &w) const {
        return std::strcmp(data, w.data) == 0;
    }

    bool eq(const Word &w, size_t skip) const {
        return
            length() == w.length() &&
            std::memcmp(data             , w.data             , skip) == 0 &&
            std::strcmp(data + (skip + 1), w.data + (skip + 1)      ) == 0
        ;
    }

    bool lt(const Word &w) const {
        return std::strcmp(data, w.data) < 0;
    }


private:
    char data[max_size + 1];

    static void hash(uint32_t &seed, char c) {
        seed ^= static_cast<unsigned char>(c) + (seed << 6) + (seed >> 2);
    }

    static void hash(uint32_t &seed, size_t n, const char data[/* n */]) {
        for (size_t i = 0; i < n; ++i) {
            hash(seed, data[i]);
        }
    }

    static void hash(uint32_t &seed, const char data[]) {
        for (const char *p = data; *p != '\0'; ++p) {
            hash(seed, *p);
        }
    }
};

int main() {
    int n, m;

    if (std::scanf("%d %d", &n, &m) != 2) {
        fail();
    }
    skip_line();

    std::vector<Word> words(n);
    size_t ht_size = 0;
    for (int i = 0; i < n; ++i) {
        words[i].scan();
        ht_size += 1 + words[i].length();
    } 

    std::vector<uint32_t> index(ht_size + 1, 0);
    for (const Word &w : words) {
        ++index[w.hash() % ht_size];
        const size_t length = w.length();
        for (size_t i = 0; i < length; ++i) {
            ++index[w.hash(i) % ht_size];
        }
    } 

    {
        uint32_t acc = 0;
        for (uint32_t &i : index) {
            acc += i;
            i = acc;
        }
    }

    for (int i = ht_size; i > 0; --i) {
        index[i] = index[i - 1];
    }
    index[0] = 0;

    std::vector<uint32_t> table(ht_size);
    for (int i = 0; i < n; ++i) {
        const Word &w = words[i];
        uint32_t &j = index[w.hash() % ht_size];
        table[j++] = i;
        const size_t length = w.length();
        for (size_t k = 0; k < length; ++k) {
            uint32_t &j = index[w.hash(k) % ht_size];
            table[j++] = i;
        }
    } 

    for (int i = ht_size; i > 0; --i) {
        index[i] = index[i - 1];
    }
    index[0] = 0;

    for (int j = 0; j < m; ++j) {
        Word w;
        w.scan();

        {
            const uint32_t h = w.hash() % ht_size;
            const uint32_t i1 = index[h];
            const uint32_t i2 = index[h + 1];
            uint32_t i;
            for (i = i1; i < i2; ++i) {
                 if (w.eq(words[table[i]])) {
                     break;
                 }
            }
            if (i < i2) {
                w.print();
                std::puts("");
                continue;
            }
        }

        {
            const Word *best_match = nullptr;
            const size_t length = w.length();
            for (size_t k = 0; k < length; ++k) {
                const uint32_t h = w.hash(k) % ht_size;
                const uint32_t i1 = index[h];
                const uint32_t i2 = index[h + 1];
                for (uint32_t i = i1; i < i2; ++i) {
                    const Word &ww = words[table[i]];
                    if (w.eq(ww, k)) {
                        if (best_match == nullptr || ww.lt(*best_match)) {
                            best_match = &ww;
                        }
                    }
                }
            }

            if (best_match != nullptr) {
                best_match->print();
                std::puts("");
            } else {
                std::puts("?");
            }
        }
    } 
}

Jack_oS · Answer 2 · 2022-05-20T23:00:45Z

Jack_oS

2022-05-20T23:00:45Z2022-05-20T23:00:45Z

试试这样：

from jellyfish import levenshtein_distance


def orthography(word, vocabulary):
    if word in vocabulary:
        return word

    for v_word in sorted(vocabulary):
        if levenshtein_distance(word, v_word) == 1:
            return v_word

    return '?'

for word in words:
    print(orthography(word, vocabulary))

然后对于这样的数据：

vocabulary = ['a', 'love', 'contexts', 'math']
words = ['i', 'love', 'programming', 'contests']

将输出以下内容：

a
love
?
contexts

对于第二个测试用例：

vocabulary = ['ac', 'ab']
words = ['aa']

答案将是：

ab

2

Никита Самоуков · Answer 3 · 2022-05-25T00:28:56Z

Никита Самоуков

2022-05-25T00:28:56Z2022-05-25T00:28:56Z

这是一个特殊的哈希任务。您只需要计算成对字母的哈希值。

X_X_X_X (0-a)
_X_X_X_ (0-b)
XX__XX__XX__XX (1-a)
__XX__XX__XX__ (1-b)
XXXX____XXXX____ (2-a)
____XXXX____XXXX (2-b)

也就是有字

vector<string> words;

还有他们的哈希

array<array<unordered_map<size_t, vector<string*>>, 2>, 5> hashes;

并检查字典是否包含：

string* word_get(string s)
{
    for(size_t i = 0; i < hashes.size(); i++)
    {
        size_t ha = word_hasher(s, i);
        if (hashes[i][0].find(ha) == hashes[i][0].end() && 
            hashes[i][1].find(ha) == hashes[i][1].end()) 
            return nullptr;
    }

    // word final tests
    return nullptr;
}

2

ТарасПрогер · Answer 4 · 2022-07-01T13:43:39Z

我制作了自己的版本：

#include <iostream>

using namespace std;

string* vocab;
string* text;

int quantityOfWordsInVocab, quantityOfWordsInText;


void diff(int index)
{
    int howMany{ 0 };
    int k = 0;
    int sizeOfTextWord = text[index].size();
        for (int j = 0; j < quantityOfWordsInVocab; j++)
        {
            if (sizeOfTextWord != vocab[j].size()) {  continue; }

            while (k != vocab[j].size())
            {
                
                if (text[index][k] != vocab[j][k])
                {
                    howMany++;
                }
    
                k++;            
            }
            if (!(howMany >= 2))
            {
            
                cout << vocab[j] << '\n';
                return;
            }
            howMany = 0;
        }

    
        cout << "?\n";
}

int main()
{
    ios::sync_with_stdio(false);
    cin >> quantityOfWordsInVocab >> quantityOfWordsInText;

    vocab = new string[quantityOfWordsInVocab];
    text = new string[quantityOfWordsInText];

    for (size_t i = 0; i < quantityOfWordsInVocab; i++)
    {
        cin >> vocab[i];
    }

    for (size_t i = 0; i < quantityOfWordsInText; i++)
    {
        cin >> text[i];
    }

    cout << endl;
    ////////////////////////////////////////////////////////////

    bool ok = false;
    for (size_t i = 0; i < quantityOfWordsInText; i++)
    {
        for (size_t j = 0; j < quantityOfWordsInVocab; j++)
        {       
            if (text[i] == vocab[j])
            {
                cout << text[i] << '\n';
                ok = true;
                break;
            }           
                    
        }
        if (!ok)
        {
            diff(i);
        }
        ok = false;
    }
    return 0; 
}

工作速度足够快

奥赛任务优化

算法

字典记忆

表现

编码

表格填充不起作用

提示 50/50，有两个，其中一个是正确的

在 PyQt5 中停止进程

我的脚本不工作

在文本文件中写入和读取列表

如何像屏幕截图中那样并排排列这些块？

确定文本文件中每一行的字符数

将接口对象传递给 JAVA 构造函数

正确更新数据库中的数据

Python解析不是css

奥赛任务优化

4 个回答

算法

字典记忆

表现

编码

相关问题