CISC 7512X Midterm Exam
NAME: _______________________

<!-- cat midterm20240327.src.txt|perl -e'$/=undef;$_=<STDIN>;s/<!--.*?-->//sgi;print'>midterm20240327.txt -->

Each question is worth 5 points.
Pick the best answer that fits the question. 
Not all of the answers may be correct. 
If none of the answers fit, write your own answer.

For questions below, use the following schema definition.

restaurant(rid, name, phone, street, city, state, zip)
customer(cid, fname, lname, phone, street, city, state, zip)
carrier(crid, fname, lname, lp) 
delivery(did, rid, cid, tim, size, weight)
pickup(did, tim, crid)
dropoff(did, tim, crid)

<!-- DDL
drop table restaurant;
create table restaurant(rid bigint, name varchar(10), phone varchar(30), street varchar(20), city varchar(10), state varchar(2), zip varchar(5));
drop table customer;
create table customer(cid bigint, fname varchar(10), lname varchar(10), phone varchar(30), street varchar(50), city varchar(10), state varchar(2), zip varchar(5));
drop table carrier;
create table carrier(crid bigint, fname varchar(10), lname varchar(10), lp varchar(10));
drop table delivery;
create table delivery(did bigint, rid bigint, cid bigint, tim timestamp, weight int);
drop table pickup;
create table pickup(did bigint, tim timestamp, crid bigint);
drop table dropoff;
create table dropoff(did bigint, tim timestamp, crid bigint);
-->

It's a schema for a food delivery business that employs food carriers (carrier table). 
Customers (customer table) order food from restaurants (restaurant table). 
The restaurants order a delivery (delivery table); to deliver food from restaurant to customer.
The pickup table records when carrier picks up food at restaurant.
The dropoff table records when carrier drops off food at customer.

1. (5 points) Find name of customer with cid 235235.

a. select fname,lname 
   from customer 
   where cid=235235;<!-- Answer -->

b. select fname,lname 
   from restaurant 
   where rid=235235;

c. select * 
   from customer 
   where cid=236235;

d. select * 
   from customer 
      left outer join delivery 
      using (cid) 
   where cid=235235;

e. (write your own answer)


2. (5 points) How many restaurants placed delivery orders in January 2024?

a. select count(*) as cnt
   from restaurant 
      inner join delivery
      using (rid)
   where tim>=cast('2024-01-01' as date) and 
         tim<cast('2024-02-01' as date);
         
b. select a.rid,count(*) as cnt
   from restaurant a
      inner join delivery b
      on a.rid=b.rid
   where tim>=cast('2024-01-01' as date) and 
         tim<cast('2024-02-01' as date)
   group by a.rid;

c. select count(distinct rid) cnt
   from delivery 
   where tim>=cast('2024-01-01' as date) and 
         tim<cast('2024-02-01' as date);<!-- Answer -->

d. select sum ( count(*) ) over () cnt
   from restaurant a
      inner join delivery b
      on a.rid=b.rid
   where tim>=cast('2024-01-01' as date) and 
         tim<cast('2024-02-01' as date)
   group by a.rid;

e. (write your own answer)


3. (5 points) What percentage of deliveries cross state lines?

a. select 100.0*sum(case when a.state != c.state then 1.0 else 0.0 end)/sum(1.0) prcnt
   from restaurant a 
     natural inner join delivery b
     natural inner join customer c;

b. select 100.0*sum(case when a.state != c.state then 1.0 else 0.0 end)/sum(1.0) prcnt
   from restaurant a 
     inner join delivery b 
     on a.rid=b.rid
     inner join customer c 
     on b.cid=c.cid;<!-- Answer -->

c. select count(*)/100.0 prcnt
   from restaurant a 
     inner join delivery b using(rid)
     inner join customer c using(cid)
   where a.state != c.state;

   
d. select 100.0*sum(case when a.state != c.state then 1.0 else 0.0 end)/sum(1.0) prcnt
   from restaurant a 
     inner join delivery b 
     on a.rid=b.rid
     inner join customer c 
     on b.cid=c.cid 
   where (a.zip != c.zip and a.state = c.state) or 
     (a.zip = c.zip and a.state != c.state);

e. (write your own answer)



4. (5 points) Find all ongoing deliveries (food has been picked up, 
but not dropped off).

a. select a.*
   from delivery a 
     inner join pickup b using (did)
     inner join dropoff c using (did)
   where c.did is null;

b. select a.*
   from delivery a 
     inner join pickup b using (did)
     left outer join dropoff c using (did)
   where c.did is null;<!-- Answer -->

c. select a.*
   from delivery a 
     left outer join pickup b using (did)
     inner join dropoff c using (did)
   where c.did is null;

d. select a.*
   from delivery a 
     left outer join pickup b using (did)
     left outer join dropoff c using (did)
   where c.did is null;

e. (write your own answer)



5. (5 points) Find average time (in seconds) between a delivery order and food pickup.

a. with diff as (
     select extract(epoch from b.tim - a.tim) d
     from delivery a 
       natural inner join pickup b
   ) 
   select avg(d) 
   from diff;

b. with evts as (
     select did, tim from delivery
     union all
     select did, tim from pickup
   ),
   diff as (
     select extract(epoch from lag(tim) over (partition by did order by tim) - tim) d
     from evts
   )
   select avg(d)
   from diff;

c. select distinct avg( extract(epoch from b.tim - a.tim) ) over () avg
   from delivery a 
      natural inner join pickup b;

d. select avg( extract( epoch from b.tim - a.tim) ) 
   from delivery a 
     inner join pickup b 
     on a.did = b.did;<!-- Answer -->


e. (write your own answer)


6. (5 points) Identify restaurant with most deliveries.

a. with stats as (
     select a.rid,count(b.rid) cnt, max( count(b.rid) ) over () maxcnt
     from restaurant a
       natural left outer join delivery b
       natural left outer join pickup c
       natural left outer join dropoff d
       natural left outer join carrier e     
     group by a.rid
   )
   select * 
   from stats 
   where cnt = maxcnt;

b. with stats as (
     select rid, sum(1) cnt
     from delivery
     group by rid
   ),
   stats2 as (
     select rid, rank() over (order by cnt) rnk
     from stats
   )
   select * 
   from stats2
   where rnk=1;

c. with stats as (
     select rid, count(*) cnt, max( count(*) ) over () maxcnt
     from delivery 
     group by rid
   ) 
   select * 
   from stats
   where cnt = maxcnt;<!-- Answer -->

d. with stats as (
     select rid, sum(1) cnt
     from delivery
     group by rid
   ),
   stats2 as (
     select rid, dense_rank() over (order by cnt) rnk
     from stats
   )
   select * 
   from stats2
   where rnk=1;

e. (write your own answer)


7. (5 points) Identify restaurant with most customers.

a. with stats as (
     select a.rid, count(distinct c.cid) cnt
     from restaurant a
       naturaldropoff inner join delivery b
       natural inner join customer c
     group by a.rid
   ),
   mx as (
     select max(cnt) m
     from stats
   )
   select * 
   from stats cross join mx 
   where m = cnt;

b. with stats as (
     select rid, count(*) cnt, max( count(*) ) over () mx
     from delivery 
     group by rid
  ) 
  select *
  from stats
  where cnt = mx;

c. with stats as (
     select rid, rank() over ( order by count(distinct cid) desc) r
     from delivery 
     group by rid
   )
   select *
   from stats
   where r=1;<!-- Answer -->

d. select rid
   from delivery
   group by rid 
   having count(cid) >= all(select count(cid) from delivery group by rid);

e. (write your own answer)


8. (5 points) Carriers may deliver for one or more restaurants. 
What is the average number of restaurants that a carrier picks up from?

a. with stats as (
     select count(*) cnt
     from delivery a 
       natural inner join pickup b
     group by b.crid
   )
   select avg(cnt) from stats;


b. with stats as (
     select distinct crid, rid
     from pickup 
       natural left outer join delivery
   ),
   cnts as (
     select count(*) cnt
     from stats
     group by crid
     having count(*) is not null
   )
   select avg(cnt) from cnts;

c. with stats as (
     select count(distinct a.rid) cnt
     from delivery a
       inner join pickup b
       on a.did=b.did
     group by b.crid
   )
   select avg(cnt) from stats;<!-- Answer -->

d. with stats as (
     select count(distinct b.crid) cnt
     from delivery a 
       inner join pickup b
       on a.did=b.did
     group by a.rid
   )
   select avg(cnt) from stats;

e. (write your own answer)


9. (5 points) What percentage of deliveries are picked up and dropped off
by different carriers?

a. select count(*) / 100.0 prcnt
   from pickup a
     inner join dropoff b
     on a.did=b.did
   where a.crid != b.crid;

b. select 100.0*sum(case when a.crid != b.crid then 1.0 else 0.0 end)/sum(1.0) prcnt
   from pickup a
     inner join dropoff b
     on a.did=b.did;<!-- Answer -->

c. with evnts as (
     select * 
     from pickup 
        union all 
     select * 
     from dropoff
   ),
   fl as (   
     select case when min(crid)!=max(crid) then 1 else 0 end val
     from evnts
     group by did
   ) 
   select 100.0*sum(val)/count(*) 
   from fl;

d. select 100.0*sum(case when a.crid!=b.crid then 1.0 else 0.0 end)/sum(1.0) prcnt
   from pickup a cross join dropoff b
   where a.did=b.did;

e. (write your own answer)


10. (5 points) Find unusually slow deliveries, between pickup and dropoff, 
 those that are 2 standard deviations higher than the average time.

a. select a.did
   from pickup a
     natural inner join dropoff b
   group by a.did, extract(epoch from b.tim - a.tim)
   having extract(epoch from b.tim - a.tim) >= 
       avg( extract(epoch from b.tim - a.tim) )+
          2*stddev( extract(epoch from b.tim - a.tim) );

b. with dlta as (
     select a.did, extract(epoch from c.tim - b.tim) d
     from delivery a 
       left outer join pickup b on a.did=b.did
       left outer join dropoff c on a.did=c.did
   ),
   stats as (
     select did, d, avg(d) a, stddev(d) sd
     from dlta
     group by did, d
   )
   select did
   from stats
   where d >= a+2*sd;

c. with delta as (
     select a.did, extract(epoch from b.tim - a.tim) d
     from pickup a
       inner join dropoff b
       on a.did=b.did
   ),
   stats as (
     select did, d, avg(d) over () a, stddev(d) over () sd
     from delta
   )
   select *
   from stats 
   where d >= a+2*sd;<!-- Answer: does not use delivery table. -->

d. with dlta as (
     select a.did, extract(epoch from c.tim - b.tim) d
     from delivery a 
       left outer join pickup b on a.did=b.did
       left outer join dropoff c on a.did=c.did
   ),
   stats as (
     select did, d, avg(d) over () a, stddev(d) over () sd
     from dlta
   )
   select did
   from stats 
   where d >= a+2*sd;

e. (write your own answer)


11. (5 points) Find instances when a customer has 100lb (weight) or more on the 
way to them (aggregate all shipments picked up, but not dropped off yet).

a. with evnts as (
     select a.cid,a.tim,a.weight 
     from delivery a 
     union all 
     select a.cid,b.tim,-a.weight 
     from delivery a 
        left outer join dropoff b using (did)
   ),
   stats as (
    select cid, tim, 
      sum(weight) over (partition by cid order by tim 
       range between unbounded preceding and current row) weight
    from evnts 
   )
   select *
   from stats
   where weight >= 100;

b. select *
   from delivery 
   where weight >= 100;

c. select a.cid
   from delivery a 
     inner join pickup b using (did)
     left outer join dropoff c using (did)
   where c.did is null
   group by a.cid
   having sum(a.weight) >= 100;

d. with evnts as (
     select a.cid,b.tim,a.weight 
     from delivery a 
        inner join pickup b using (did)
     union all 
     select a.cid,b.tim,-a.weight 
     from delivery a 
        inner join dropoff b using (did)
   ),
   stats as (
    select cid, tim, 
      sum(weight) over (partition by cid order by tim 
       rows between unbounded preceding and current row) weight
    from evnts 
   )
   select *
   from stats
   where weight >= 100;<!-- Answer -->

e. (write your own answer)


12. (5 points) Identify carriers who are overworked (longest interval 
between pickup/dropoff is less than 8 hours---meaning they don't get 
to sleep for 8 hours). 

a. with stats as (
     select a.crid, 
        max( extract(epoch from b.tim - a.tim) ) over (order by a.tim, b.tim) mxdif
     from pickup a 
       inner join dropoff b
       on a.did=b.did
   )
   select * from stats
   where mxdif <=  8*60*60;

b. select a.crid
   from pickup a 
     inner join dropoff b
     on a.did=b.did
   group by a.crid
   having max( extract(epoch from b.tim - a.tim) ) < 8*60*60;

c. with evts as ( 
     select * from pickup 
     union all 
     select * from dropoff 
   ),
   dlta as (
     select crid, 
        extract(epoch from tim - lag(tim) over (partition by crid order by tim) ) d
     from evts
   ),
   mx as (
     select crid, max(d) over (partition by crid) maxd
     from dlta
   )
   select * from mx where maxd < 8*60*60;

d. with evts as ( 
     select * from pickup 
     union all 
     select * from dropoff 
   ),
   dlta as (
     select crid, 
        extract(epoch from tim - lag(tim) over (partition by crid order by tim) ) d
     from evts
   ),
   mx as (
     select crid, max(d) maxd
     from dlta
     group by crid
   )
   select * from mx where maxd < 8*60*60;<!-- Answer -->

e. (write your own answer)<!-- Answer; the original c answer had a bug -->


13. (5 points) What percentage of customers only order from 1 restaurant?

a. select distinct 
    100.0*sum( case when min(rid)=max(rid) then 1.0 else 0.0 end ) over () / count(*) over () 
   from delivery 
   group by cid;

b. select cid, count(distinct rid)/100.0 cnt
   from delivery
   group by cid
   having count(distinct rid) > 1;

c. with stats as (
     select case when min(rid)=max(rid) then 1.0 else 0.0 end fl
     from delivery 
     group by cid
   )
   select 100.0*sum(fl)/sum(1.0)
   from stats;<!-- Answer -->

d. with stats as (
     select cid, count(distinct rid) cnt
     from delivery
     group by cid
   )
   select cid, 100.0*sum(case when cnt=1 then 1.0 else 0.0 end)/sum(1.0)
   from stats
   group by cid;

e. (write your own answer)


14. (5 points) Find all deliveries where the carrier name is the same as 
customer (at the point of dropoff).

a. select b.*
   from customer a 
     inner join delivery b 
     on a.cid=b.cid
     inner join dropoff c 
     on b.did=c.did 
     inner join carrier d
     on c.crid=d.crid 
   where a.fname=d.fname and a.lname=d.lname;<!-- Answer -->

b. select b.*
   from customer a
     natural inner join delivery b
     natural inner join dropoff c
     natural inner join carrier d
   where (a.fname,a.lname) = (d.fname,d.lname);

c. select a.*
   from customer a
      inner join carrier b
      using (fname,lname)
      inner join dropoff c
      using (crid)
      inner join delivery d
      using (did);

d. select d.*
   from customer a 
     inner join delivery b 
     on a.cid=b.cid
     inner join dropoff c 
     on b.did=c.did 
     inner join carrier d
     on c.crid=d.crid and 
        a.fname=d.fname and 
        a.lname=d.lname;

e. (write your own answer)


15. (5 points) Find customers who never had a delivery.

a. select a.* 
   from customer a 
      left outer join delivery b 
      on a.cid=b.rid 
   where b.did is null;

b. select a.* 
   from customer a 
      natural left outer join delivery 
   where did is null;<!-- Answer -->

c. select a.* 
   from customer a 
     left outer join delivery 
     using (cid) 
   where a.cid is null;
  
d. select a.* 
   from customer a 
      natural inner join delivery 
   where did is null;

e. (write your own answer)

16. (5 points) Find customers who did not have any deliveries for 3 months.

a. select a.cid 
   from customer a 
     left outer join delivery b 
     on a.cid=b.cid 
    where b.tim >= (now()-interval '3 months') and b.did is null;

b. select a.cid 
   from customer a 
     left outer join delivery b 
     using(cid) and b.tim >= (now()-interval '3 months') 
   where b.did is not null;

c. select a.cid 
   from customer a 
      left outer join delivery b 
      on a.cid=b.cid and b.tim >= (now()-interval '3 months') 
   where b.did is null;<!-- Answer -->

d. select a.cid 
   from customer a 
     natural left outer join delivery b 
   where b.tim >= (now()-interval '3 months') and b.did is null;

e. (write your own answer)




17. (5 points) Below query is identical to: 
  select a.*,b.val c
  from T1 a 
    left outer join T2 b 
    on a.key=b.key and a.val!=b.val
    
a. with TMP as (
     select a.*,b.val c
     from T1 a 
       inner join T2 b 
       on a.key=b.key
     where a.val!=b.val
   )
   select a.*,b.c
   from T1 a 
      left outer join TMP b 
      on a.key=b.key;<!-- Answer -->

b. with TMP as (
      select a.*,b.val c
      from T1 a 
         left outer join T2 b 
         on a.key=b.key 
      where a.val!=b.val
   )
   select a.* 
   from TMP 
   where a.val!=b.val;

c. select a.*,b.val c
   from T1 a 
      inner join T2 b 
   on a.key=b.key and 
      a.val!=b.val;

d. All of the above queries are identical.

e. None of the queries are identical to the question.


18. (5 points) Find customers who have more than 10 deliveries?
   
a. select a.cid,count(*) 
   from customer a 
      inner join delivery b using(cid)
   group by a.cid 
   where count(*) > 10;

b. select b.cid,count(*) 
   from customer a 
     left outer join delivery b 
     on a.cid=b.cid 
   group by b.cid 
   having count(*) > 10;

c. select cid,count(*) 
   from delivery 
   group by cid 
   having count(*) > 10;<!-- Answer -->

d. select cid,sum(case when b.cid is not null then 1 else 0 end)
   from customer a 
     left outer join delivery b 
     using (cid) 
   group by cid 
   having sum(case when b.cid is not null then 1 else 0 end) >= 10;

e. (write your own answer)


19. (5 points) Find customers who have less than 5 deliveries.

a. select cid,count(*) 
   from delivery 
   group by cid 
   having count(*) < 5;
   
b. select a.cid,count(*) 
   from customer a 
     left outer join delivery b 
     on a.cid=b.cid 
   group by a.cid 
   having count(*) < 5;<!-- Answer -->

c. select cid,sum(case when b.cid is not null then 1 else 0 end) 
   from customer a 
     left outer join delivery b 
     using (cid)
   group by cid 
   having sum(case when b.cid is not null then 1 else 0 end) < 5;<!-- Answer -->

d. select a.cid,count(*)
   from customer a 
     inner join delivery b 
   using(cid) 
   group by a.cid 
   having count(*) < 5;

e. (write your own answer)


20. (5 points) Count of customers by state?
   
a. select zip,count(*) 
   from customer 
   group by zip;
   
b. select state,count(*) 
   from customer 
     natural inner join delivery 
   where state='NY' 
   group by state;

c. select state,count(*) 
   from customer 
   group by state;<!-- Answer -->

d. with cust_state as (
     select cid, case when state='NY' then 1 else 0 end as in_state
     from delivery natural inner join customer
   )
   select b.in_state,count(*)
   from customer a 
     left outer join cust_state b
     on a.cid=b.cid
   group by b.in_state;
   
e. (write your own answer)


<!-- answer key

1. a
2. c
3. b
4. b 
5. d
6. c 
7. c 
8. c
9. b 
10. c 
11. d 
12. d 
13. c 
14. a 
15. b 
16. c 
17. a 
18. c 
19. b, c
20. c

-->