********************************************************************* SUPPLEMENTAL FILE 3 - Create fake dataset sampled from real HDSS data Author: Christopher Rentsch Email: Christopher.Rentsch@lshtm.ac.uk *********************************************************************; *set up library; options nocenter ps=500 ls=250; "libname REAL ""\\filepath"";" "libname FAKE ""\\filepath"";" *bring in real HDSS dataset; data real; set REAL.HDSSDATA; run; proc contents data = real; run; *create ID; data ID; length ID 3.; ID = 1; do ID=1 to 100; output; end; format ID 3.; run; proc print data = ID noobs; run; "*separately and for each field, take a random sample of 100 values from real HDSS data;" *and create an ID 1 to 100; *field1; data one; set real (keep=ID field1); rand=rand('UNIFORM'); length ID 3.; ID = 1; do ID=1 to 100; output; end; format ID 3.; proc sort data = one; by rand; data field1; set one (firstobs=1 obs=100); run; proc print data = field1; run; *example -- FirstName; *FirstName; data one; set real (keep=FirstName); rand=rand('UNIFORM'); proc sort data = one; by rand; data FirstName; set one (firstobs=1 obs=100); run; proc print data = FirstName; run; *bring all fields together; data FAKE.fake100; merge field1-field25; by ID; run; *NB. some manual edits were performed within excel on the fake data to allow the case examples to better mimic reality in the field;