Wednesday, April 28, 2010

Ways to find duplicate

1. proc sort

2. group by, class, (not sorted)

3. sql distinct

4. unique index

Thursday, April 22, 2010

Read CSV file with carriage return characters

/* Replace carriage return and linefeed characters inside */
/* double quotes with a specified character. This sample */
/* uses '@' and '$', but any character can be used, including */
/* spaces. CR/LFs not in double quotes will not be replaced. */

%let repA=' '; /* replacement character LF */
%let repD=' '; /* replacement character CR */

%let dsnnme="/home/help/zzz.csv"; /* use full path of CSV file */

data _null_;
/* RECFM=N reads the file in binary format. The file consists */
/* of a stream of bytes with no record boundaries. SHAREBUFFERS */
/* specifies that the FILE statement and the INFILE statement */
/* share the same buffer. */

infile &dsnnme recfm=n sharebuffers;
file &dsnnme recfm=n;

/* OPEN is a flag variable used to determine if the CR/LF is within */
/* double quotes or not. Retain this value. */

retain open 0;

input a $char1.;
/* If the character is a double quote, set OPEN to its opposite value. */
if a = '"' then open = ^(open);

/* If the CR or LF is after an open double quote, replace the byte with */
/* the appropriate value. */

if open then do;
if a = '0D'x then put &repD;
else if a = '0A'x then put &repA;
end;
run;

filename infle '/home/help/zzz.csv' termstr=CRLF;

PROC IMPORT OUT= zzz DATAFILE= infle
DBMS=CSV REPLACE;
GUESSINGROWS=1000;
GETNAMES=YES;
DATAROW=2;
RUN;

Monday, April 19, 2010

MSGLEVEL option

MSGLEVEL=I option prints additional SAS INFO messages related to index usage, merge processing, sort utilities, and CEDA usage, into your SAS log; as well as the regular SAS NOTE, WARNING, and ERROR messages.

The default is MSGLEVEL=N.

OPTIONS MSGLEVEL=I;

See also

Wednesday, April 14, 2010

Read all files in one directory

%macro readraw(dir=.);

%local fileref rc did dnum dmem memname;
%let fileref=thisdir;
%let rc=%sysfunc(filename(fileref,&dir));
%let did=%sysfunc(dopen(&fileref));
%let dnum=%sysfunc(dnum(&did));

%do dmem=1 %to &dnum;
%let memname=%sysfunc(dread(&did, &dmem));
%if %upcase(%scan(&memname,-1,.)) = DAT %then %do;
%let dataset=%scan(&memname,1,.);
data &dataset;
infile "&dir\&memname";
input Course_code $4. Location $15. Begin_date date9.;
format Begin_date date9.;
run;

proc print data=&dataset;
title "%trim(&syslast)";
run;
%end;
%end;
%let rc=%sysfunc(dclose(&did));
%let rc=%sysfunc(filename(fileref));

%mend readraw;

options mprint;
%readraw(dir=c:\workshop\winsas\amacr);