parsing file by token

Posted by zhuizhuhaomeng Blog on August 5, 2024
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
sub parse_file($)
{
    open my $in, $yfile
        or die "Cannot open file $yfile for reaing: $!\n";
    my $s = do { local $/; <$in> };
    close $in;

    while (1) {
        # skip single quote string
        if ($s =~ /\G\s*('(?:\\.|[^'\\]+)*')/gcsm) {
            next;

        # skip double quote string
        } elsif ($s =~ /\G\s*("(?:\\.|[^"\\]+)*")/gcsm) {
            next;

        # skip comment like /* xxx */
        } elsif ($s =~ m{\G\s*(/\*.*?\*/)}gcsm) {
            next;

        # skip comment like // xxx
        } elsif ($s =~ m{\G\s*//[^\n]*\n?}gcms) {
            next;

        } elsif ($s =~ m{\G\s*([^'"/]+)}gcsm) {
            my $seg = $1;
            while ($seg =~ /\b([_a-zA-Z]\w*)/g) {
                my $name = $1;
                print($name);
            }
        # match single / with optional space
        } elsif ($s =~ /\G\s*(.)/gcsm) {
            next;

        } else {
            last;
        }
    }
}