DEFLNG a-z 'harvest -- sucks email addresses out of a file FUNCTION PBMAIN() theFile$ = TRIM$(COMMAND$) STDOUT "harvest -- bme email harvesting utility" STDOUT "" IF INSTR(theFile$, ANY "*?") THEN STDOUT "Error: You may not use wildcards -- if you want to process multiple files" STDOUT "use the 'wrap' utility and '>>' appended redirection." STDOUT "" EXIT FUNCTION END IF x$ = DIR$(theFile$) IF x$ = "" OR theFile$ = "" THEN STDOUT "Error: You must specify a valid filename to harvest" STDOUT "command line usage is 'harvest filename.ext' -- output to stdio" STDOUT "" EXIT FUNCTION END IF DIM potEmail$(1 TO 20000) 'max out at 20000 emails numEmails = 0 OPEN theFile$ FOR INPUT AS #1 DO LINE INPUT #1, x$ atSign = INSTR(x$, "@") IF atSign > 0 THEN 'might have one? theStart = INSTR(-(LEN(x$)-atSign), x$, ANY "':<>()[]{} " & CHR$(34)) IF theStart = 0 THEN theStart = 1 ELSE theStart = theStart + 1 END IF theEnd = INSTR(atSign, x$, ANY "':<>()[]{} " & CHR$(34)) IF theEnd = 0 THEN theEnd = LEN(x$) ELSE theEnd = theEnd - 1 END IF theLength = theEnd - theStart + 1 theEmail$ = LCASE$(MID$(x$, theStart, theLength)) IF LEN(theEmail$) < 7 THEN EXIT IF IF INSTR(-1, theEmail$, ".") < LEN(theEmail$) - 3 THEN EXIT IF IF INSTR(theEmail$, "@") = 0 THEN EXIT IF IF INSTR(theEmail$, ANY "':><[]{}() ") > 0 THEN EXIT IF IF RIGHT$(theEmail$, 1) = "." OR RIGHT$(theEmail$, 1) = "," THEN theEmail$ = MID$(theEmail$, 1, LEN(theEmail$)-1) END IF IF LEFT$(theEmail$,1) = "@" OR RIGHT$(theEmail$,1) = "@" THEN EXIT IF if tally(theEmail$, "@") > 1 then exit if IF INSTR(theEmail$, ANY "$,/\") > 0 THEN EXIT IF FOR r = 1 TO numEmails IF potEmail$(r) = theEmail$ THEN EXIT IF NEXT r INCR numEmails potEmail$(numEmails) = theEmail$ IF numEmails = 20000 THEN EXIT DO END IF LOOP UNTIL EOF(1) CLOSE #1 ARRAY SORT potEmail$() FOR numEmails FOR r=1 TO numEmails STDOUT potEmail$(r) NEXT r END FUNCTION