Huffman Codes

In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded fromthe code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification

Each input file contains one test case. For each case, the first line gives an integer N(2N63)N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] … c[N] f[N]

where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M(1000)M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:

c[i] code[i]

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0’s and '1’s.

Output Specification

For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

Sample Output

Yes
Yes
No
No

分析

核心算法

计算WPL

int GetWPL(HuffmanTree T,int depth)
{
    if(!T->Left&&!T->Right)return depth*T->Weight;
    else return GetWPL(T->Left,depth+1)+GetWPL(T->Right,depth+1);
}

对每位同学的提交,检查

  • 长度是否正确,注意:Code[i]的最大长度为N-1
  • 建树过程中检查是否满足前缀码要求

参考代码:(记得当时有两个样例没通过)

#include<iostream>
#define Maxchar 63
#define MaxStu 1000
#define MINDATA -1
using namespace std;
typedef struct HeapStruct_Huff* MinHeap;
typedef struct HuffmanNode* HuffmanTree;
typedef int ElementType;
struct HeapStruct_Huff{
    HuffmanTree *Elements;
    int Size,Capicity;
};
struct HuffmanNode{
    ElementType weight;
    HuffmanTree Left,Right;
};
MinHeap CreateMinHeap();
HuffmanTree CreateHuffmanTree();
void BuildMinHeap(MinHeap,int,const int* cf);
void InsertHeap(MinHeap,HuffmanTree);
HuffmanTree DeleteHeap(MinHeap);
HuffmanTree BuildHuffmanTree(MinHeap,int,const int* cf);
int HuffmanWPL(HuffmanTree,int);
void StuWPL(int,int,int*,string[][Maxchar],const int* cf);
bool isprefix(string*,int);
int main(int argc,char*argv[]) {
    char c;
    int n,stu,hwpl,swpl[MaxStu],char_freq[Maxchar];
    MinHeap H=CreateMinHeap();
    cin>>n;
    for(int i=0;i<n;i++)cin>>c>>char_freq[i];
    HuffmanTree T=BuildHuffmanTree(H,n,char_freq);
    hwpl=HuffmanWPL(T,0);
    cin>>stu;
    for(int i=0;i<stu;i++)swpl[i]=0;
    string stud[stu][Maxchar];
    StuWPL(stu,n,swpl,stud,char_freq);
    for(int i=0;i<stu;i++){
        if(!isprefix(stud[i],n)&&swpl[i]==hwpl)cout<<"Yes"<<endl;
        else cout<<"No"<<endl;
    }
}
HuffmanTree CreateHuffmanTree(){
    auto T=new struct HuffmanNode;
    T->Left=T->Right= nullptr;
    return T;
}
MinHeap CreateMinHeap(){
    auto H=(MinHeap) malloc(sizeof (struct HeapStruct_Huff));
    H->Elements= (HuffmanTree*) malloc(sizeof (HuffmanTree)*(Maxchar+1));
    H->Size=0,H->Capicity=Maxchar;
    H->Elements[0]=CreateHuffmanTree();
    H->Elements[0]->weight=MINDATA;
    H->Elements[0]->Left=H->Elements[0]->Right= nullptr;
    return H;
}
void InsertHeap(MinHeap H,HuffmanTree T){
    int i;
    for(i=++H->Size;T->weight<H->Elements[i/2]->weight;i/=2)
        H->Elements[i]=H->Elements[i/2];
    H->Elements[i]=T;
}
HuffmanTree DeleteHeap(MinHeap H){
    int parent,child;
    HuffmanTree minnode=H->Elements[1],tmp=H->Elements[H->Size--];
    for(parent=1;parent*2<=H->Size;parent=child){
        child=parent*2;
        if(child!=H->Size&&(H->Elements[child]->weight>H->Elements[child+1]->weight))child++;
        if(tmp->weight<=H->Elements[child]->weight)break;
        else H->Elements[parent]=H->Elements[child];
    }
    H->Elements[parent]=tmp;
    return  minnode;
}
void BuildMinHeap(MinHeap H,int n,const int* cf){
    int i,parent,child;
    for(i=1;i<=n;i++){
        H->Elements[i]=CreateHuffmanTree();
        H->Elements[i]->weight=cf[i-1];
        H->Size++;
    }
    for(i=H->Size/2;i>0;i--) {
        HuffmanTree tmp=H->Elements[i];
        for(parent=i;parent*2<=H->Size;parent=child) {
            child=parent*2;
            if(child!=H->Size&&(H->Elements[child]->weight>H->Elements[child+1]->weight))child++;
            if(tmp->weight<=H->Elements[child]->weight)break;
            else H->Elements[parent]=H->Elements[child];
        }
        H->Elements[parent]=tmp;
    }
}
HuffmanTree BuildHuffmanTree(MinHeap H,int n,const int* cf){
    HuffmanTree T;
    BuildMinHeap(H,n,cf);
    while(H->Size>1){
        T=CreateHuffmanTree();
        T->Left=DeleteHeap(H);
        T->Right=DeleteHeap(H);
        T->weight=T->Left->weight+T->Right->weight;
        InsertHeap(H,T);
    }
    T=DeleteHeap(H);
    return T;
}
int HuffmanWPL(HuffmanTree T,int deep){
    static int hwpl;
    if(T){
        if(!T->Left&&!T->Right)hwpl+=deep*T->weight;
        HuffmanWPL(T->Left,deep+1);
        HuffmanWPL(T->Right,deep+1);
    }
    return hwpl;
}
void StuWPL(int stu,int n,int* swpl,string stud[][Maxchar],const int* cf){
    int i,j;char c;
    for(i=0;i<stu;i++)for(j=0;j<n;j++)cin>>c>>stud[i][j];
    for(i=0;i<stu;i++)for(j=0;j<n;j++)swpl[i]+=cf[j]*(int)stud[i][j].size();
}
bool isprefix(string* stud,int n){
    int i;
    string maxstring;
    for(i=0;i<n;i++)if(stud[i].size()>maxstring.size())maxstring=stud[i];
    for(i=0;i<n;i++){
        int j=0;
        if(stud[i]!=maxstring&&stud[i][0]==maxstring[0])
            for(;stud[i][j]&&stud[i][j]==maxstring[j];j++);
        if(!stud[i][j])return true;
    }
    return false;
}

PAT Judge

The ranklist of PAT is generated from the status list, which shows the scores of the submissions. This time you are supposed to generate the ranklist for PAT.

Input Specification

Each input file contains one test case. For each case, the first line contains 3 positive integers, N(104)N (≤10^4), the total number of users, K(5)K (≤5), the total number of problems, and M(105)M (≤10^5), the total number of submissions. It is then assumed that the user id’s are 5-digit numbers from 00001 to N, and the problem id’s are from 1 to K. The next line contains K positive integers p[i] (i=1, …, K), where p[i] corresponds to the full mark of the i-th problem. Then M lines follow, each gives the information of a submission in the following format:

user_id problem_id partial_score_obtained

where partial_score_obtained is either −1 if the submission cannot even pass the compiler, or is an integer in the range [0, p[problem_id]]. All the numbers in a line are separated by a space.

Output Specification

For each test case, you are supposed to output the ranklist in the following format:

rank user_id total_score s[1] … s[K] where rank is calculated according to the total_score, and all the users with the same total_score obtain the same rank; and s[i] is the partial score obtained for the i-th problem. If a user has never submitted a solution for a problem, then “-” must be printed at the corresponding position. If a user has submitted several solutions to solve one problem, then the highest score will be counted.
The ranklist must be printed in non-decreasing order of the ranks. For those who have the same rank, users must be sorted in nonincreasing order according to the number of perfectly solved problems. And if there is still a tie, then they must be printed in increasing order of their id’s. For those who has never submitted any solution that can pass the compiler, or has never submitted any solution, they must NOT be shown on the ranklist. It is guaranteed that at least one user can be shown on the ranklist.

Sample Input

7 4 20
20 25 25 30
00002 2 12
00007 4 17
00005 1 19
00007 2 25
00005 1 20
00002 2 2
00005 1 15
00001 1 18
00004 3 25
00002 2 25
00005 3 22
00006 4 -1
00001 2 18
00002 1 20
00004 1 15
00002 4 18
00001 3 4
00001 4 2
00005 2 -1
00004 2 0

Sample Output

1 00002 63 20 25 - 18
2 00005 42 20 0 22 -
2 00007 42 - 25 - 17
2 00001 42 18 18 4 2
5 00004 40 15 0 25 -

分析

此题坑点:

  • 提交不通过编译的得分是-1,但最后输出时显示是0分,只有完全没做过的题显示’-’
  • 一个人只有提交过通过了编译的答案(最低分0分)才能在榜单上显示
  • 如果一个人对同一题多次提交过满分,该题满分次数只算一次
  • 榜单排序规则是
    • 第一关键字是总得分,总得分相同的排名相同
    • 相同总得分虽然排名相同,但仍然存在先后顺序
      • 第二关键字是题目满分个数
      • 第三关键字是id顺序

参考代码(好像也是有测试点过不了)

#include<iostream>
#include<cstdlib>
#define marks 5
#define users 1050
#define missioins 10050
using namespace std;
typedef struct input{
    int id,prob,mark;
}input;
typedef struct output{
    int id,tol,perf;
}output;
input inputs[missioins];
output outputs[users];
int n,m,k,fullmark[marks],compare(const void*,const void*),ranks[users][marks+4];
void patjudge();
int main(){
    cin>>n>>k>>m;
    for(int i=0;i<k;i++)cin>>fullmark[i];
    for(int i=0;i<m;i++)cin>>inputs[i].id>>inputs[i].prob>>inputs[i].mark;
    patjudge();
}
int compare(const void*node1,const void*node2){
    int comp=-1;
    if(((output*)node1)->tol<((output*)node2)->tol)comp=1;
    else if(((output*)node1)->tol==((output*)node2)->tol){
        if(((output*)node1)->perf<((output*)node2)->perf)comp=1;
        else if(((output*)node1)->perf==((output*)node2)->perf)
            if(((output*)node1)->id>((output*)node2)->id)comp=1;
    }
    return comp;
}
void patjudge(){
    for(int i=0;i<n;i++)for(int j=1;j<k+1;j++)ranks[i][j]=-2;
    for(int i=0,l=0,j;i<m;i++){
        int flag=0;
        for(j=0;j<=l;j++)if(ranks[j][0]==inputs[i].id){
            if(inputs[i].mark>ranks[j][inputs[i].prob]){
                if(inputs[i].mark>=0)ranks[j][k+3]=1;
                if(inputs[i].mark==fullmark[inputs[i].prob-1])ranks[j][k+1]++;
                ranks[j][inputs[i].prob]=inputs[i].mark;
            }
            flag=1;
        }
        if(!flag){
            ranks[l][0]=inputs[i].id;
            if(inputs[i].mark>=0)ranks[l][k+3]=1;
            if(inputs[i].mark==fullmark[inputs[i].prob-1])ranks[l][k+1]++;
            ranks[l++][inputs[i].prob]=inputs[i].mark;
        }
    }
    for(int i=0;i<n;i++)for(int j=1;j<=k;j++){
        if(ranks[i][j]>0)ranks[i][k+2]+=ranks[i][j];
        else if(ranks[i][j]==-1)ranks[i][j]=0;
    }
    for(int i=0;i<n;i++)outputs[i].id=ranks[i][0],outputs[i].tol=ranks[i][k+2],outputs[i].perf=ranks[i][k+1];
    qsort(outputs,n,sizeof(output),compare);
    int nums=0;
    for(int i=0;i<n;i++)if(ranks[i][k+3]==1)nums++;
    for(int i=0,num=1;i<nums;i++){
        if(i>0&&outputs[i].tol<outputs[i-1].tol)num=i+1;
        cout<<num<<' ';
        printf("%05d ",outputs[i].id);
        cout<<outputs[i].tol<<' ';
        for(int j=0;j<n;j++)if(ranks[j][0]==outputs[i].id){
            for(int l=1;l<k;l++){
                if(ranks[j][l]>=0)cout<<ranks[j][l]<<' ';
                else cout<<"- ";
            }
            if(ranks[j][k]>=0)cout<<ranks[j][k]<<endl;
            else cout<<'-'<<endl;
        }
    }
}