#!/usr/bin/perl ## # File : cut.pl # Author : spjspj ## use strict; use LWP::Simple; use POSIX qw(strftime); # Main { if (scalar (@ARGV) < 4) { print ("Usage: cut.pl <file> <term> <helper> <operation>!\n"); print (" . File can be - list, STDIN, or an actual file\n"); print (" . Term can be - a regex you're looking for\n"); print (" . Operation can be - grep, filegrep, count, size, strip_http, matrix_flip(for converting ringing touches!), oneupcount, wget\n"); print (" . Helper is dependent on the operation you're doing. A number for grep will go +/- that amount \n"); print (" cut.pl bob.txt dave 5 grep\n"); print (" cut.pl all_java2.java TOKEN_STARTS_HERE TOKEN_ENDS_HERE grep_between\n"); print (" cut.pl full_text.txt keys 0 filegrep\n"); print (" cut.pl full_text.txt 0 0 make_code_bat\n"); print (" dir /a /b /s *.java | cut.pl stdin 0 0 make_code_bat > bob.bat\n"); print (" dir /a /b /s *.xml | cut.pl stdin 0 0 make_code_bat > bob_xml.bat\n"); print (" cut.pl d:\\perl_programs output.*txt 7 age_dir | cut.pl list . 0 grep\n"); print (" cut.pl bob.txt 0 0 uniquelines \n"); print (" cut.pl file 0 0 strip_http\n"); print (" cut.pl stdin \";;;\" \"1,2,3,4\" fields\n"); print (" cut.pl bob.txt 0 0 matrix_flip\n"); print (" cut.pl bob.txt 0 0 condense (Used for making similar lines in files smaller..)\n"); print (" cut.pl bob.txt 0 0 str_condense (Used for making similar lines in files smaller..)\n"); print (" cut.pl stdin \"http://bob.com/a=XXX.id\" 1000 oneupcount \n"); print (" cut.pl stdin \"http://www.comlaw.gov.au/Details/XXX\" 1000 wget\n"); print (" cut.pl stdin \"https://gatherer.wizards.com/Pages/Card/Details.aspx?multiverseid=XXX\" 5274 oneupcount\n"); print (" cut.pl stdin \"https://gatherer.wizards.com/Pages/Card/Details.aspx?multiverseid=XXX' 5274 wget\n"); print (" cut.pl modern_bluesa \";;;\" \"0,7\" fields | cut.pl stdin \";;;\" 3 wordcombos\n"); print (" cut.pl modern_bluesa \";;;\" \"0,7\" fields | cut.pl stdin 0 0 uniquewords\n"); print (" cut.pl modern_bluesa \";;;\" \"0,2\" images_html\n"); print (" cut.pl stdin start_ _end letters\n"); print (' echo "" | cut.pl stdin "http://www.slightlymagic.net/forum/viewtopic.php?f=70&t=4554&start=30" 0 wget'); print ("\n"); print (" cut.pl all_java.java \"\\\+\\\+\\\+\\\+\" \"extends token\" cut_on_first_display_with_second\n\n"); print ("\n"); print ('\necho "" | cut.pl stdin "http://www.slightlymagic.net/forum/viewtopic.php?f=70&t=14062&start=XXX" 400 oneupcount | cut.pl stdin "XXX" 400 wget\n'); print ("\n"); print ('\necho "" | cut.pl stdin "http://mythicspoiler.com/c17/cards/stalkingleonin.html" 0 wget_card_spoiler\n'); print ('dir /a /b /s *.jar | cut.pl stdin "^" "7z l -r \"" replace | cut.pl stdin "$" "\"" replace > d:\temp\xyz.bat'); print ("\n"); print ('echo "1" | cut.pl stdin "https://gatherer.wizards.com/Handlers/Image.ashx?multiverseid=16431&type=card" "6ED/Phantasmal Terrain.full.jpg" wget_image'); print ("\n"); exit 0; } my $file = $ARGV [0]; my $term = $ARGV [1]; my $helper = $ARGV [2]; my $operation = $ARGV [3]; my %combos; my %all_combos; my %dedup_line_hash; my $in_between_lines = 0; if ($file eq "list" && $operation ne "size") { while (<STDIN>) { chomp $_; my $file = $_; my $found_output = 0; #print "==========\n"; #print "RUNNING: cut.pl $file $term $helper $operation \n"; open PROC, "cut.pl $file $term $helper $operation |"; while (<PROC>) { if ($found_output == 0) { print ("\n\n==================\nProcessing file: $file\n"); $found_output = 1; } print ($_); } if ($found_output > 0) { print ("\n******************xx\n"); } close PROC; } exit; } if ($file eq "stdin") { open FILE, "-"; } elsif ($operation eq "strip_http") { open FILE, "$file"; binmode (FILE); } elsif ($operation ne "age_dir") { open FILE, "$file"; } else { } my $current_file = ''; my $dot_current_file = ''; my $in_file = 0; my $num_files = 0; # size functions! my $total_size = 0; # OldGrep functions! my %grep_past_lines; my $grep_past_lines_index = 1; my $grep_forward_lines = -1; # Grep variables: # Before and or after! my $before = 0; my $before_index = 0; my $after = 0; my $orig_after = 0; my $after_index = 0; my @before_lines; my @after_lines; # Grep variables: # Check before all the time (from first line), but only checkafter after the first line is matched! my $use_before = 0; my $use_after = 0; my $num_lines_after = 0; if ($helper =~ m/^\d+$/) { $before = $helper; $use_before = 1; if ($helper eq "0") { $use_before = 0; } $after = 0; $use_after = 1; $orig_after = $helper; } elsif ($helper =~ m/^-\d+/) { $before = -1 * $helper; $use_before = 1; } elsif ($helper =~ m/^\+\d+/) { $after = 0; $orig_after = $helper; $use_after = 1; } # Count functions! my $count = 1; my $seen_http = 0; my $lines_http = 0; my %matrix_flip; my $matrix_row = 0; my $matrix_col = 0; my $max_matrix_col = 0; my $condense_begin = 1; my $condense_line = ""; my $condense_start = ""; my $condense_regex = ""; my $condense_count = 0; if ($operation eq "oneupcount") { my $i = 0; for ($i = 0; $i < $helper; $i ++) { my $l = $term; $l =~ s/XXX/$i/; print ("$l\n"); } exit; } if ($operation eq "wget_seed") { my $i; for ($i = 10; $i < $helper + 10; $i++) { my $url = $term; $url = "http://gatherer.wizards.com/Pages/Card/Details.aspx?action=random"; my $content = get $url; $content =~ s/\s\s/ /gim; $content =~ s/\s\s/ /gim; $content =~ s/\n//gim; $content =~ s/.*multiverseid=(\d+).*/$1/gim; print "$content\n"; } } my %kkks; if ($operation eq "filegrep") { open KEYS, "$term"; while (<KEYS>) { chomp; $kkks {"^$_"} = 1; } } if ($operation eq "make_code_bat") { print ("\@echo off\n"); } my %ulines; my $ulines_count = 0; my @cut_on_term; my $saw_helper_cut_on_term = 0; while (<FILE>) { chomp $_; my $line = $_; if ($operation eq "grepold") { if ($line !~ m/$term/i && $grep_forward_lines < 0) { $grep_past_lines {$grep_past_lines_index} = $line; $grep_past_lines_index ++; if ($grep_past_lines_index > $helper) { $grep_past_lines_index = 1; } } elsif ($line =~ m/$term/i) { my $i = $grep_past_lines_index; if (defined ($grep_past_lines {$i})) { print $grep_past_lines {$i}, " --- 22222\n"; } $i++; if ($i > $helper) { $i = 1; } while ($i != $grep_past_lines_index) { if (defined ($grep_past_lines {$i})) { print $grep_past_lines {$i}, " --- 33333\n"; } $i ++; if ($i > $helper) { $i = 1; } } print "\n", $line, "\n"; my %new_hash; %grep_past_lines = %new_hash; $grep_past_lines_index = 1; $grep_forward_lines = $helper + 1; } if ($grep_forward_lines <= $helper && $grep_forward_lines > 0) { print $line, " --- 44444\n"; } $grep_forward_lines--; if ($grep_forward_lines == 0) { print "\n"; } } if ($operation eq "wget") { my $i; { my $url = $term; $url =~ s/XXX/$line/; print ("Looking at :$url:\n"); my $content = get $url; die "Couldn't get $url" unless defined $content; $content =~ s/\s\s/ /gim; $content =~ s/\s\s/ /gim; $content =~ s/\n//gim; print $url, "\n\n\n\n\n", "=================\n", $content, "============\n"; } } if ($operation eq "wget_card_spoiler") { my $i; { my $url = $term; $url =~ s/XXX/$line/; my $content = get $url; die "Couldn't get $url" unless defined $content; $content =~ s/\s\s/ /gim; $content =~ s/\s\s/ /gim; $content =~ s/\n//gim; $content =~ s/^.*CARD NAME-*->/CARD NAME-->/gim; $content =~ s/<!-*-END CARD.*$//gim; # Example: <!--CARD NAME-->Licia, Sanguine Tribune </font></td></tr><tr><td colspan="2" valign="top"> <!--MANA COST--> 5RWB </td></tr><tr><td colspan="2" valign="top"> <!--TYPE--> Legendary Creature - Vampire Soldier </td></tr><tr><td colspan="2" valign="top"> <!--CARD TEXT--> Licia, Sanguine Tribune costs 1 less to cast for each 1 life you gained this turn. <br><br> First strike, lifelink <br><br> Pay 5 life: Put three +1/+1 counters on Licia. Activate this ability only on your turn and only once each turn. </td></tr><tr><td colspan="2" valign="top"><i> <!--FLAVOR TEXT--> "I give my blood, my life, all I have in exchange for victory." </i></td></tr><tr><td colspan="2" valign="top"></td></tr><tr><td width="210" valign="bottom"><font size="1"> <!--ILLUS--> Illus. Magali Villeneuve <!--Set Number--><i> </i></font></td><td valign="bottom" align="left"><font size="2"> <!--P/T--> 4/4 <!--END CARD TEXT--><!--END CARD TEXT--><!--END CARD TEXT--><!--END CARD TEXT--><!--END CARD TEXT--><!--END CARD TEXT--> # >>> Duelist's Heritage|Commander 2016|1|R|{2}{W}|Enchantment|||Whenever one or more creatures attack, you may have target attacking creature gain double strike until end of turn.| $content =~ m/.*CARD NAME-*-> *(.*?) *</im; my $card_name = $1; $content =~ m/.*MANA COST-*->(.*?)</im; my $mana_cost = $1; $mana_cost =~ s/ //gim; $mana_cost =~ s/(.)/{$1}/gim; $content =~ m/.*TYPE-*->(.*?)</im; my $type = $1; $content =~ m/.*TEXT-*->(.*?)<!-*-FLAVOR/im; my $text = $1; $text =~ s/<br[ \/]*?>/\$/img; $text =~ s/\$ *\$/\$/img; $text =~ s/<[^>]+>//img; $text =~ s/[\$ ]*$//img; $content =~ m/.*P\/T-*-> *(.*?) *\/ *(.*?) *(<|$)/im; my $p = $1; my $t = $2; print ("$card_name|Commander 2017|??|CURM|$mana_cost|$type|$p|$t|$text|\n"); } } if ($operation eq "wget_image") { my $i; { if (!(-f "$helper")) { my $url = $term; print ("Download :$url:\n"); my $content = get $url; print ("Saw " . length ($content) . " bytes!\n"); print ("Save in $helper\n"); open OUTPUT, "> " . $helper or die "No dice!"; binmode (OUTPUT); print OUTPUT $content; close OUTPUT; print $url, " >>> ", $helper, "\n"; } else { print ("Found $helper existed already..\n"); if (-s "$helper" == 0) { `del "$helper"`; } } } } if ($operation eq "grep") { if ($line !~ m/$term/i && $use_after && $after > 0) { print ($line, "\n"); $after--; if ($after == 0) { print ("aaa===================\n"); } } if ($line !~ m/$term/i && $use_before) { $before_lines [$before_index] = $line; #print (" >>>> adding in $before_index ($line)\n"); #print (join (',,,', @before_lines)); #print ("\n"); $before_index ++; if ($before_index >= $before) { $before_index = 0; } } if ($line =~ m/$term/i) { if ($use_before) { #print ("bbb===================\n"); my $b = $before_index; my $ok_once = 1; while ($b != $before_index || $ok_once) { if (defined ($before_lines [$b])) { #print ("bbb" , $before_lines [$b], "\n");#.($b, .$before. $before_index, $ok_once).\n"); print ($before_lines [$b], "\n");#.($b, .$before. $before_index, $ok_once).\n"); } $ok_once = 0; if ($b >= $before - 1) { $b = -1; } $b++; } my @new_array; @before_lines = @new_array; } print ("$line\n"); if ($use_after) { $after = $orig_after; } } } if ($operation eq "grep_between") { if ($line =~ m/$term/i) { print ("\n===================================================================\n"); print ($line, "\n"); $in_between_lines = 1; } if ($line !~ m/$helper/i && $in_between_lines) { print ($line, "\n"); } if ($line =~ m/$helper/i && $in_between_lines) { print ($line, "\n"); $in_between_lines = 0; } } if ($operation eq "filegrep") { my $k; my $print = 1; foreach $k (keys (%kkks)) { if ($line =~ m/$k/ && $print) { $print = 0; print ($line, "\n"); } } } if ($operation eq "size") { if (-f $line) { my $sizer = -s $line; my $zzz = " $sizer"; $zzz =~ s/.*(........................)$/$1/; print ($zzz, " --- $line\n"); $total_size += $sizer; } } if ($operation eq "count") { print ("$count - $line\n"); $count++; } if ($operation eq "strip_http") { # Has to work on a file.. if ($line =~ m/.*HTTP/) { $seen_http = 1; print ("SEEN HTTP\n"); } #print (">>$line<<\n"); $lines_http ++; if ($seen_http && $line eq "") { $seen_http = 2; } } if ($operation eq "replace") { my $orig_line = $line; $line =~ s/$term/$helper/gi; if ($helper =~ m/\\n/) { $line =~ s/\\n/\n/gi; } if ($helper =~ m/''/) { $line =~ s/''/"/gi; } print ("$line\n"); eval("\$orig_line =~ s/$term/$helper/gi;"); #print ("$orig_line\n"); } if ($operation eq "dedup_line") { $line =~ m/::(.*)::/; my $user = $1; my $new_line; $line =~ s/.*://; while ($line =~ s/,([^,]*),/,/im) { $new_line .= "\n$user:$1\n"; if (not defined ($dedup_line_hash {"$user:$1"})) { $dedup_line_hash {"$user:$1"} = 1; $dedup_line_hash {$user} ++; } } print ("$new_line\n"); } if ($operation eq "matrix_flip") { my @chars = split //, $line; $matrix_col = 0; my $char; foreach $char (@chars) { $matrix_flip {"$matrix_row,$matrix_col"} = $char; $matrix_col ++; if ($max_matrix_col < $matrix_col) { $max_matrix_col = $matrix_col; } } $matrix_row ++; } if ($operation eq "str_condense") { if ($line =~ m/(.)(\1{3,})/) { $line =~ s/(.)(\1{3,})/sprintf ("$1!%d#", length ($2));/eg; } print $line, "\n"; } if ($operation eq "condense") { if ($condense_begin == 1) { #print (" begin........... $line \n"); $condense_begin = 0; $condense_line = $line; $condense_start = $line; $condense_start =~ s/^(.{10,25}).*/$1/; $condense_start =~ s/\W/./g; $condense_count = 0; } else { if ($line =~ $condense_start) { #print (" similar........... $line \n"); $condense_count++; } else { if ($condense_count > 1) { $condense_line .= " {+similar=$condense_count}"; } print $condense_line, "\n"; $condense_line = $line; if ($condense_line !~ m/......./) { $condense_begin = 1; } else { $condense_start = $line; $condense_start =~ s/^(.{10,25}).*/$1/; $condense_start =~ s/\W/./g; $condense_count = 0; } } } } if ($operation eq "fields") { #$line = "BBB$term$line$term"; my @fs = split /$term/, $line; my @shows = split /,/, "$helper,"; my $s; foreach $s (@shows) { if ($s eq "Rest") { print $line; } elsif ($s eq "NewLine") { print "\n"; } else { print $fs [$s], "$term"; } } print "\n"; } if ($operation eq "wordcombos") { my @fs = split /$term/, $line; # The first one is key, the rest need to be made into something my $current_key = $fs [0]; my $current_val = $fs [1]; $current_val =~ s/ /XXX/g; $current_val =~ s/\W//g; $current_val =~ s/XXX*/ /g; #print $current_key , " ---- ", $current_val, "\n"; my @words = split / /, uc ($current_val); my $w; my $ws; for ($w = 0; $w < scalar (@words); $w++) { my $x; $ws = $words [$w]; for ($x = $w + 1; $x < $w + $helper; $x++) { $ws .= "," . $words [$x]; } #$ws .= ";;;" . $current_key; $combos {$ws} ++; $all_combos {$ws} .= ";;;" . $current_key; } } if ($operation eq "uniquewords") { $line .= " "; my @words = split / /, uc ($line); my $w; my $ws; for ($w = 0; $w < scalar (@words); $w++) { $combos {$words [$w]} ++; } } if ($operation eq "cut_on_first_display_with_second") { if ($line =~ m/$helper/img) # cut_on_term { if ($saw_helper_cut_on_term) { print join ("\n", @cut_on_term); } else { #print ("\nNothing in this segment!!\n"); } $saw_helper_cut_on_term = 0; my @new_array; @cut_on_term = @new_array; } push @cut_on_term, $line; if ($line =~ m/$term/img) { $saw_helper_cut_on_term = 1; } } if ($operation eq "images_html") { my @fs = split /$term/, $line; my @shows = split /,/, "$helper,"; my $s; { # <img src='http://gatherer.wizards.com/Handlers/Image.ashx?multiverseid=220251&type=card'/> if ($fs[$shows[0]] =~ m/\*/) { my $id = $fs[$shows[0]]; $id =~ s/\*//g; $id =~ s/ //g; my $x = "<img src='http://gatherer.wizards.com/Handlers/Image.ashx?multiverseid=XXX&type=card'/>"; $x =~ s/XXX/$id/; print "$fs[$shows[1]]<br>$x"; print "\n"; } } } if ($operation eq "make_code_bat") { if ($line !~ m/all_/img) { print ("echo \" $line +++++++\"\n"); print ("type \"$line\"\n"); } } if ($operation eq "uniquelines") { if (!defined ($ulines {$line})) { $ulines {$line} = 1; $ulines_count ++; print $line, "\n"; #print "xxx $ulines_count\n"; } } if ($operation eq "countlines") { $ulines {$line} ++; } } if ($operation eq "age_dir2") { opendir DIR, $file or die "cannot open dir $file: $!"; print $file, "\n"; my $nextFile; foreach $nextFile (grep {-f && ($helper > -M)} readdir DIR) { #print $nextFile, " -- $helper - ", -M, "\n"; if ($nextFile =~ m/$term/) { print "type $nextFile\n"; } } } if ($operation eq "age_dir") { my $i; my $cmd = "type "; my $next_term = $term; for ($i = 0; $i < $helper; $i++) { $next_term = $term; my $now = time(); my $yyyymmdd = strftime "%Y%m%d", localtime($now - $i * 24*3600); $next_term =~ s/YYYYMMDD/$yyyymmdd/; $cmd .= " $next_term "; } print $cmd; } if ($operation eq "matrix_flip") { my $i; my $j; { for ($i = 0; $i < $max_matrix_col; $i++) { for ($j = 0; $j < $matrix_row; $j++) { print ($matrix_flip {"$j,$i"}); } print ("\n"); } } } if ($operation eq "size") { print ($total_size, " --- Cumulative total\n"); } close FILE; if ($operation eq "strip_http") { if ($seen_http == 2) { `tail +$lines_http > /tmp/_cut_file; chmod 777 /tmp/_cut_file`; `mv /tmp/_cut_file $file`; } } if ($operation eq "condense") { if ($condense_count > 1) { $condense_line .= " {+similar=$condense_count}"; } print $condense_line, "\n"; } if ($operation eq "wordcombos") { my $v; my @keys = keys (%combos); my @new_keys; my $v = 0; my $k; foreach $k (@keys) { if ($k =~ m/,,/) { next; } #if ($k !~ m/WHENEVER/) { next; } #if ($combos {$k} > 10) { #push @new_keys, $combos {$k}; # . " ---- " . $k . ",,," . $all_combos {$k}; push @new_keys, $k; } $v ++; } my @jjs = sort @new_keys; foreach $k (sort @jjs) { print $k, "\n"; } } if ($operation eq "dedup_line") { my $k; for $k (sort keys (%dedup_line_hash)) { if ($k !~ m/.*:.*/) { print ("$k ---> $dedup_line_hash{$k}\n"); } if ($k =~ m/(.*):(.*)\s*$/) { if ($dedup_line_hash{$1} > 7) { print ("/h $2\n"); } } } } if ($operation eq "uniquewords") { my $v; my @keys = keys (%combos); my @new_keys; my $v = 0; my $k; my $i = 0; foreach $k (@keys) { $i ++; print $combos {$k}, "; $k\n"; } } if ($operation eq "countlines") { my $line; #foreach $line (sort {$a <=> $b} values %ulines) foreach $line (sort { $ulines{$a} <=> $ulines{$b} } keys %ulines) { print ("$ulines{$line} ==== $line\n"); } } if ($operation eq "letters") { #open PROC, "cut.pl $file $term $helper $operation |"; my %as; $as {"A"} = 1; $as {"B"} = 1; $as {"C"} = 1; $as {"D"} = 1; $as {"E"} = 1; $as {"F"} = 1; $as {"G"} = 1; $as {"H"} = 1; $as {"I"} = 1; $as {"J"} = 1; $as {"K"} = 1; $as {"L"} = 1; $as {"M"} = 1; $as {"N"} = 1; $as {"O"} = 1; $as {"P"} = 1; $as {"Q"} = 1; $as {"R"} = 1; $as {"S"} = 1; $as {"T"} = 1; $as {"U"} = 1; $as {"V"} = 1; $as {"W"} = 1; $as {"X"} = 1; $as {"Y"} = 1; $as {"Z"} = 1; my $k; foreach $k (sort keys (%as)) { my $k2; foreach $k2 (sort keys (%as)) { print "$term$k$k2$helper\n"; } } } }